Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging

Bugfixes, and reworking of the atomics documentation.

# gpg: Signature made Mon 13 Apr 2020 07:56:22 BST
# gpg:                using RSA key F13338574B662389866C7682BFFBD25F78C7AE83
# gpg:                issuer "pbonzini@redhat.com"
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>" [full]
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>" [full]
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4  E2F7 7E15 100C CD36 69B1
#      Subkey fingerprint: F133 3857 4B66 2389 866C  7682 BFFB D25F 78C7 AE83

* remotes/bonzini/tags/for-upstream:
  module: increase dirs array size by one
  memory: Do not allow direct write access to rom_device regions
  vl.c: error out if -mem-path is used together with -M memory-backend
  rcu: do not mention atomic_mb_read/set in documentation
  atomics: update documentation
  atomics: convert to reStructuredText
  oslib-posix: take lock before qemu_cond_broadcast
  piix: fix xenfv regression, add compat machine xenfv-4.2

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
diff --git a/.github/lockdown.yml b/.github/lockdown.yml
new file mode 100644
index 0000000..9acc393
--- /dev/null
+++ b/.github/lockdown.yml
@@ -0,0 +1,34 @@
+# Configuration for Repo Lockdown - https://github.com/dessant/repo-lockdown
+
+# Close issues and pull requests
+close: true
+
+# Lock issues and pull requests
+lock: true
+
+issues:
+  comment: |
+    Thank you for your interest in the QEMU project.
+
+    This repository is a read-only mirror of the project's master
+    repostories hosted on https://git.qemu.org/git/qemu.git.
+    The project does not process issues filed on GitHub.
+
+    The project issues are tracked on Launchpad:
+    https://bugs.launchpad.net/qemu
+
+    QEMU welcomes bug report contributions. You can file new ones on:
+    https://bugs.launchpad.net/qemu/+filebug
+
+pulls:
+  comment: |
+    Thank you for your interest in the QEMU project.
+
+    This repository is a read-only mirror of the project's master
+    repostories hosted on https://git.qemu.org/git/qemu.git.
+    The project does not process merge requests filed on GitHub.
+
+    QEMU welcomes contributions of code (either fixing bugs or adding new
+    functionality). However, we get a lot of patches, and so we have some
+    guidelines about contributing on the project website:
+    https://www.qemu.org/contribute/
diff --git a/.travis.yml b/.travis.yml
index e0c7221..2fd63ec 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -318,8 +318,10 @@
       env:
         - CONFIG="--enable-tools --target-list=aarch64-softmmu,alpha-softmmu,arm-softmmu,m68k-softmmu,microblaze-softmmu,mips-softmmu,mips64el-softmmu,nios2-softmmu,or1k-softmmu,ppc-softmmu,ppc64-softmmu,s390x-softmmu,sparc-softmmu,x86_64-softmmu,xtensa-softmmu"
         - TEST_CMD="make check-acceptance"
+        - CACHE_NAME="${TRAVIS_BRANCH}-linux-gcc-acceptance"
       after_script:
         - python3 -c 'import json; r = json.load(open("tests/results/latest/results.json")); [print(t["logfile"]) for t in r["tests"] if t["status"] not in ("PASS", "SKIP")]' | xargs cat
+        - du -chs $HOME/avocado/data/cache
       addons:
         apt:
           packages:
diff --git a/MAINTAINERS b/MAINTAINERS
index 9d156d7..642c8e0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -440,6 +440,7 @@
 F: hw/char/xen_console.c
 F: hw/display/xenfb.c
 F: hw/net/xen_nic.c
+F: hw/usb/xen-usb.c
 F: hw/block/xen*
 F: hw/block/dataplane/xen*
 F: hw/xen/
@@ -2833,6 +2834,7 @@
 M: Fam Zheng <fam@euphon.net>
 R: Philippe Mathieu-Daudé <philmd@redhat.com>
 S: Maintained
+F: .github/lockdown.yml
 F: .travis.yml
 F: scripts/travis/
 F: .shippable.yml
diff --git a/VERSION b/VERSION
index 94dbafd..d250277 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-4.2.91
+4.2.92
diff --git a/audio/audio.c b/audio/audio.c
index 9ac9a20..7a9e680 100644
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -1491,16 +1491,14 @@
 
 size_t audio_generic_read(HWVoiceIn *hw, void *buf, size_t size)
 {
-    size_t src_size, copy_size;
-    void *src = hw->pcm_ops->get_buffer_in(hw, &src_size);
-    copy_size = MIN(size, src_size);
+    void *src = hw->pcm_ops->get_buffer_in(hw, &size);
 
-    memcpy(buf, src, copy_size);
-    hw->pcm_ops->put_buffer_in(hw, src, copy_size);
-    return copy_size;
+    memcpy(buf, src, size);
+    hw->pcm_ops->put_buffer_in(hw, src, size);
+
+    return size;
 }
 
-
 static int audio_driver_init(AudioState *s, struct audio_driver *drv,
                              bool msg, Audiodev *dev)
 {
diff --git a/audio/dsoundaudio.c b/audio/dsoundaudio.c
index bd57082..4cdf19a 100644
--- a/audio/dsoundaudio.c
+++ b/audio/dsoundaudio.c
@@ -279,7 +279,7 @@
         return -1;
     }
 
-    if (*statusp & DSERR_BUFFERLOST) {
+    if (*statusp & DSBSTATUS_BUFFERLOST) {
         dsound_restore_out(dsb, s);
         return -1;
     }
@@ -540,7 +540,12 @@
     }
 
     req_size = audio_ring_dist(cpos, hw->pos_emul, hw->size_emul);
-    req_size = MIN(req_size, hw->size_emul - hw->pos_emul);
+    req_size = MIN(*size, MIN(req_size, hw->size_emul - hw->pos_emul));
+
+    if (req_size == 0) {
+        *size = 0;
+        return NULL;
+    }
 
     err = dsound_lock_in(dscb, &hw->info, hw->pos_emul, req_size, &ret, NULL,
                          &act_size, NULL, false, ds->s);
diff --git a/block/backup.c b/block/backup.c
index 7430ca5..a7a7dca 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -126,11 +126,7 @@
 static void backup_clean(Job *job)
 {
     BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
-    AioContext *aio_context = bdrv_get_aio_context(s->backup_top);
-
-    aio_context_acquire(aio_context);
     bdrv_backup_top_drop(s->backup_top);
-    aio_context_release(aio_context);
 }
 
 void backup_do_checkpoint(BlockJob *job, Error **errp)
diff --git a/block/block-backend.c b/block/block-backend.c
index 8b8f2a8..38ae413 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -1140,16 +1140,22 @@
     return 0;
 }
 
+/* To be called between exactly one pair of blk_inc/dec_in_flight() */
 static void coroutine_fn blk_wait_while_drained(BlockBackend *blk)
 {
+    assert(blk->in_flight > 0);
+
     if (blk->quiesce_counter && !blk->disable_request_queuing) {
+        blk_dec_in_flight(blk);
         qemu_co_queue_wait(&blk->queued_requests, NULL);
+        blk_inc_in_flight(blk);
     }
 }
 
-int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
-                               unsigned int bytes, QEMUIOVector *qiov,
-                               BdrvRequestFlags flags)
+/* To be called between exactly one pair of blk_inc/dec_in_flight() */
+static int coroutine_fn
+blk_do_preadv(BlockBackend *blk, int64_t offset, unsigned int bytes,
+              QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     int ret;
     BlockDriverState *bs;
@@ -1178,10 +1184,24 @@
     return ret;
 }
 
-int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset,
-                                     unsigned int bytes,
-                                     QEMUIOVector *qiov, size_t qiov_offset,
-                                     BdrvRequestFlags flags)
+int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
+                               unsigned int bytes, QEMUIOVector *qiov,
+                               BdrvRequestFlags flags)
+{
+    int ret;
+
+    blk_inc_in_flight(blk);
+    ret = blk_do_preadv(blk, offset, bytes, qiov, flags);
+    blk_dec_in_flight(blk);
+
+    return ret;
+}
+
+/* To be called between exactly one pair of blk_inc/dec_in_flight() */
+static int coroutine_fn
+blk_do_pwritev_part(BlockBackend *blk, int64_t offset, unsigned int bytes,
+                    QEMUIOVector *qiov, size_t qiov_offset,
+                    BdrvRequestFlags flags)
 {
     int ret;
     BlockDriverState *bs;
@@ -1214,6 +1234,20 @@
     return ret;
 }
 
+int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset,
+                                     unsigned int bytes,
+                                     QEMUIOVector *qiov, size_t qiov_offset,
+                                     BdrvRequestFlags flags)
+{
+    int ret;
+
+    blk_inc_in_flight(blk);
+    ret = blk_do_pwritev_part(blk, offset, bytes, qiov, qiov_offset, flags);
+    blk_dec_in_flight(blk);
+
+    return ret;
+}
+
 int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
                                 unsigned int bytes, QEMUIOVector *qiov,
                                 BdrvRequestFlags flags)
@@ -1234,7 +1268,7 @@
     BlkRwCo *rwco = opaque;
     QEMUIOVector *qiov = rwco->iobuf;
 
-    rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, qiov->size,
+    rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, qiov->size,
                               qiov, rwco->flags);
     aio_wait_kick();
 }
@@ -1244,8 +1278,8 @@
     BlkRwCo *rwco = opaque;
     QEMUIOVector *qiov = rwco->iobuf;
 
-    rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, qiov->size,
-                               qiov, rwco->flags);
+    rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, qiov->size,
+                                    qiov, 0, rwco->flags);
     aio_wait_kick();
 }
 
@@ -1262,6 +1296,7 @@
         .ret    = NOT_DONE,
     };
 
+    blk_inc_in_flight(blk);
     if (qemu_in_coroutine()) {
         /* Fast-path if already in coroutine context */
         co_entry(&rwco);
@@ -1270,6 +1305,7 @@
         bdrv_coroutine_enter(blk_bs(blk), co);
         BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE);
     }
+    blk_dec_in_flight(blk);
 
     return rwco.ret;
 }
@@ -1387,14 +1423,8 @@
     BlkRwCo *rwco = &acb->rwco;
     QEMUIOVector *qiov = rwco->iobuf;
 
-    if (rwco->blk->quiesce_counter) {
-        blk_dec_in_flight(rwco->blk);
-        blk_wait_while_drained(rwco->blk);
-        blk_inc_in_flight(rwco->blk);
-    }
-
     assert(qiov->size == acb->bytes);
-    rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes,
+    rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, acb->bytes,
                               qiov, rwco->flags);
     blk_aio_complete(acb);
 }
@@ -1405,15 +1435,9 @@
     BlkRwCo *rwco = &acb->rwco;
     QEMUIOVector *qiov = rwco->iobuf;
 
-    if (rwco->blk->quiesce_counter) {
-        blk_dec_in_flight(rwco->blk);
-        blk_wait_while_drained(rwco->blk);
-        blk_inc_in_flight(rwco->blk);
-    }
-
     assert(!qiov || qiov->size == acb->bytes);
-    rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes,
-                               qiov, rwco->flags);
+    rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes,
+                                    qiov, 0, rwco->flags);
     blk_aio_complete(acb);
 }
 
@@ -1488,38 +1512,6 @@
                         blk_aio_write_entry, flags, cb, opaque);
 }
 
-static void blk_aio_flush_entry(void *opaque)
-{
-    BlkAioEmAIOCB *acb = opaque;
-    BlkRwCo *rwco = &acb->rwco;
-
-    rwco->ret = blk_co_flush(rwco->blk);
-    blk_aio_complete(acb);
-}
-
-BlockAIOCB *blk_aio_flush(BlockBackend *blk,
-                          BlockCompletionFunc *cb, void *opaque)
-{
-    return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque);
-}
-
-static void blk_aio_pdiscard_entry(void *opaque)
-{
-    BlkAioEmAIOCB *acb = opaque;
-    BlkRwCo *rwco = &acb->rwco;
-
-    rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, acb->bytes);
-    blk_aio_complete(acb);
-}
-
-BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk,
-                             int64_t offset, int bytes,
-                             BlockCompletionFunc *cb, void *opaque)
-{
-    return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, 0,
-                        cb, opaque);
-}
-
 void blk_aio_cancel(BlockAIOCB *acb)
 {
     bdrv_aio_cancel(acb);
@@ -1530,7 +1522,9 @@
     bdrv_aio_cancel_async(acb);
 }
 
-int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
+/* To be called between exactly one pair of blk_inc/dec_in_flight() */
+static int coroutine_fn
+blk_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
 {
     blk_wait_while_drained(blk);
 
@@ -1546,8 +1540,7 @@
     BlkRwCo *rwco = opaque;
     QEMUIOVector *qiov = rwco->iobuf;
 
-    rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
-                             qiov->iov[0].iov_base);
+    rwco->ret = blk_do_ioctl(rwco->blk, rwco->offset, qiov->iov[0].iov_base);
     aio_wait_kick();
 }
 
@@ -1561,7 +1554,7 @@
     BlkAioEmAIOCB *acb = opaque;
     BlkRwCo *rwco = &acb->rwco;
 
-    rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, rwco->iobuf);
+    rwco->ret = blk_do_ioctl(rwco->blk, rwco->offset, rwco->iobuf);
 
     blk_aio_complete(acb);
 }
@@ -1572,7 +1565,9 @@
     return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque);
 }
 
-int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
+/* To be called between exactly one pair of blk_inc/dec_in_flight() */
+static int coroutine_fn
+blk_do_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
 {
     int ret;
 
@@ -1586,7 +1581,50 @@
     return bdrv_co_pdiscard(blk->root, offset, bytes);
 }
 
-int blk_co_flush(BlockBackend *blk)
+static void blk_aio_pdiscard_entry(void *opaque)
+{
+    BlkAioEmAIOCB *acb = opaque;
+    BlkRwCo *rwco = &acb->rwco;
+
+    rwco->ret = blk_do_pdiscard(rwco->blk, rwco->offset, acb->bytes);
+    blk_aio_complete(acb);
+}
+
+BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk,
+                             int64_t offset, int bytes,
+                             BlockCompletionFunc *cb, void *opaque)
+{
+    return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, 0,
+                        cb, opaque);
+}
+
+int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
+{
+    int ret;
+
+    blk_inc_in_flight(blk);
+    ret = blk_do_pdiscard(blk, offset, bytes);
+    blk_dec_in_flight(blk);
+
+    return ret;
+}
+
+static void blk_pdiscard_entry(void *opaque)
+{
+    BlkRwCo *rwco = opaque;
+    QEMUIOVector *qiov = rwco->iobuf;
+
+    rwco->ret = blk_do_pdiscard(rwco->blk, rwco->offset, qiov->size);
+    aio_wait_kick();
+}
+
+int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
+{
+    return blk_prw(blk, offset, NULL, bytes, blk_pdiscard_entry, 0);
+}
+
+/* To be called between exactly one pair of blk_inc/dec_in_flight() */
+static int coroutine_fn blk_do_flush(BlockBackend *blk)
 {
     blk_wait_while_drained(blk);
 
@@ -1597,10 +1635,36 @@
     return bdrv_co_flush(blk_bs(blk));
 }
 
+static void blk_aio_flush_entry(void *opaque)
+{
+    BlkAioEmAIOCB *acb = opaque;
+    BlkRwCo *rwco = &acb->rwco;
+
+    rwco->ret = blk_do_flush(rwco->blk);
+    blk_aio_complete(acb);
+}
+
+BlockAIOCB *blk_aio_flush(BlockBackend *blk,
+                          BlockCompletionFunc *cb, void *opaque)
+{
+    return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque);
+}
+
+int coroutine_fn blk_co_flush(BlockBackend *blk)
+{
+    int ret;
+
+    blk_inc_in_flight(blk);
+    ret = blk_do_flush(blk);
+    blk_dec_in_flight(blk);
+
+    return ret;
+}
+
 static void blk_flush_entry(void *opaque)
 {
     BlkRwCo *rwco = opaque;
-    rwco->ret = blk_co_flush(rwco->blk);
+    rwco->ret = blk_do_flush(rwco->blk);
     aio_wait_kick();
 }
 
@@ -2083,20 +2147,6 @@
     return bdrv_truncate(blk->root, offset, exact, prealloc, errp);
 }
 
-static void blk_pdiscard_entry(void *opaque)
-{
-    BlkRwCo *rwco = opaque;
-    QEMUIOVector *qiov = rwco->iobuf;
-
-    rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size);
-    aio_wait_kick();
-}
-
-int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
-{
-    return blk_prw(blk, offset, NULL, bytes, blk_pdiscard_entry, 0);
-}
-
 int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
                      int64_t pos, int size)
 {
diff --git a/block/qcow2.c b/block/qcow2.c
index 2bb536b..b524b0c 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -3784,6 +3784,12 @@
     int ret;
     BDRVQcow2State *s = bs->opaque;
 
+    /* If the image does not support QCOW_OFLAG_ZERO then discarding
+     * clusters could expose stale data from the backing file. */
+    if (s->qcow_version < 3 && bs->backing) {
+        return -ENOTSUP;
+    }
+
     if (!QEMU_IS_ALIGNED(offset | bytes, s->cluster_size)) {
         assert(bytes < s->cluster_size);
         /* Ignore partial clusters, except for the special case of the
@@ -4349,6 +4355,11 @@
         return -EINVAL;
     }
 
+    if (offset_into_cluster(s, bytes) &&
+        (offset + bytes) != (bs->total_sectors << BDRV_SECTOR_BITS)) {
+        return -EINVAL;
+    }
+
     while (bytes && aio_task_pool_status(aio) == 0) {
         uint64_t chunk_size = MIN(bytes, s->cluster_size);
 
diff --git a/block/replication.c b/block/replication.c
index 413d954..da013c2 100644
--- a/block/replication.c
+++ b/block/replication.c
@@ -144,12 +144,15 @@
 static void replication_close(BlockDriverState *bs)
 {
     BDRVReplicationState *s = bs->opaque;
+    Job *commit_job;
 
     if (s->stage == BLOCK_REPLICATION_RUNNING) {
         replication_stop(s->rs, false, NULL);
     }
     if (s->stage == BLOCK_REPLICATION_FAILOVER) {
-        job_cancel_sync(&s->commit_job->job);
+        commit_job = &s->commit_job->job;
+        assert(commit_job->aio_context == qemu_get_current_aio_context());
+        job_cancel_sync(commit_job);
     }
 
     if (s->mode == REPLICATION_MODE_SECONDARY) {
diff --git a/block/vpc.c b/block/vpc.c
index 6df75e2..d8141b5 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -835,7 +835,7 @@
 
     /* Write the footer (twice: at the beginning and at the end) */
     block_size = 0x200000;
-    num_bat_entries = (total_sectors + block_size / 512) / (block_size / 512);
+    num_bat_entries = DIV_ROUND_UP(total_sectors, block_size / 512);
 
     ret = blk_pwrite(blk, offset, buf, HEADER_SIZE, 0);
     if (ret < 0) {
diff --git a/blockdev.c b/blockdev.c
index fa8630c..5faddaa 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3612,7 +3612,16 @@
     }
 
     trace_qmp_block_job_finalize(job);
+    job_ref(&job->job);
     job_finalize(&job->job, errp);
+
+    /*
+     * Job's context might have changed via job_finalize (and job_txn_apply
+     * automatically acquires the new one), so make sure we release the correct
+     * one.
+     */
+    aio_context = blk_get_aio_context(job->blk);
+    job_unref(&job->job);
     aio_context_release(aio_context);
 }
 
diff --git a/configure b/configure
index 22870f3..233c671 100755
--- a/configure
+++ b/configure
@@ -2119,7 +2119,7 @@
 fi
 
 if test "$static" = "yes"; then
-  if test "$pie" != "no" && compile_prog "-fPIE -DPIE" "-static-pie"; then
+  if test "$pie" != "no" && compile_prog "-Werror -fPIE -DPIE" "-static-pie"; then
     QEMU_CFLAGS="-fPIE -DPIE $QEMU_CFLAGS"
     QEMU_LDFLAGS="-static-pie $QEMU_LDFLAGS"
     pie="yes"
@@ -2132,7 +2132,7 @@
 elif test "$pie" = "no"; then
   QEMU_CFLAGS="$CFLAGS_NOPIE $QEMU_CFLAGS"
   QEMU_LDFLAGS="$LDFLAGS_NOPIE $QEMU_LDFLAGS"
-elif compile_prog "-fPIE -DPIE" "-pie"; then
+elif compile_prog "-Werror -fPIE -DPIE" "-pie"; then
   QEMU_CFLAGS="-fPIE -DPIE $QEMU_CFLAGS"
   QEMU_LDFLAGS="-pie $QEMU_LDFLAGS"
   pie="yes"
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 301ce3b..ae6ba71 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -5856,6 +5856,9 @@
         zSig1 = 0;
         zSig0 = aSig + bSig;
         if ( aExp == 0 ) {
+            if (zSig0 == 0) {
+                return packFloatx80(zSign, 0, 0);
+            }
             normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
             goto roundAndPack;
         }
diff --git a/gdbstub.c b/gdbstub.c
index 013fb1a..171e150 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -2060,8 +2060,8 @@
         /* Print the CPU model and name in multiprocess mode */
         ObjectClass *oc = object_get_class(OBJECT(cpu));
         const char *cpu_model = object_class_get_name(oc);
-        g_autofree char *cpu_name;
-        cpu_name  = object_get_canonical_path_component(OBJECT(cpu));
+        g_autofree char *cpu_name =
+            object_get_canonical_path_component(OBJECT(cpu));
         g_string_printf(rs, "%s %s [%s]", cpu_model, cpu_name,
                         cpu->halted ? "halted " : "running");
     } else {
diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c
index 288a87a..5f8f157 100644
--- a/hw/block/dataplane/xen-block.c
+++ b/hw/block/dataplane/xen-block.c
@@ -64,6 +64,8 @@
     AioContext *ctx;
 };
 
+static int xen_block_send_response(XenBlockRequest *request);
+
 static void reset_request(XenBlockRequest *request)
 {
     memset(&request->req, 0, sizeof(request->req));
@@ -115,23 +117,26 @@
     return request;
 }
 
-static void xen_block_finish_request(XenBlockRequest *request)
+static void xen_block_complete_request(XenBlockRequest *request)
 {
     XenBlockDataPlane *dataplane = request->dataplane;
 
+    if (xen_block_send_response(request)) {
+        Error *local_err = NULL;
+
+        xen_device_notify_event_channel(dataplane->xendev,
+                                        dataplane->event_channel,
+                                        &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+        }
+    }
+
     QLIST_REMOVE(request, list);
     dataplane->requests_inflight--;
-}
-
-static void xen_block_release_request(XenBlockRequest *request)
-{
-    XenBlockDataPlane *dataplane = request->dataplane;
-
-    QLIST_REMOVE(request, list);
     reset_request(request);
     request->dataplane = dataplane;
     QLIST_INSERT_HEAD(&dataplane->freelist, request, list);
-    dataplane->requests_inflight--;
 }
 
 /*
@@ -246,7 +251,6 @@
 }
 
 static int xen_block_do_aio(XenBlockRequest *request);
-static int xen_block_send_response(XenBlockRequest *request);
 
 static void xen_block_complete_aio(void *opaque, int ret)
 {
@@ -286,7 +290,6 @@
     }
 
     request->status = request->aio_errors ? BLKIF_RSP_ERROR : BLKIF_RSP_OKAY;
-    xen_block_finish_request(request);
 
     switch (request->req.operation) {
     case BLKIF_OP_WRITE:
@@ -306,17 +309,8 @@
     default:
         break;
     }
-    if (xen_block_send_response(request)) {
-        Error *local_err = NULL;
 
-        xen_device_notify_event_channel(dataplane->xendev,
-                                        dataplane->event_channel,
-                                        &local_err);
-        if (local_err) {
-            error_report_err(local_err);
-        }
-    }
-    xen_block_release_request(request);
+    xen_block_complete_request(request);
 
     if (dataplane->more_work) {
         qemu_bh_schedule(dataplane->bh);
@@ -420,8 +414,8 @@
     return 0;
 
 err:
-    xen_block_finish_request(request);
     request->status = BLKIF_RSP_ERROR;
+    xen_block_complete_request(request);
     return -1;
 }
 
@@ -575,17 +569,7 @@
                 break;
             };
 
-            if (xen_block_send_response(request)) {
-                Error *local_err = NULL;
-
-                xen_device_notify_event_channel(dataplane->xendev,
-                                                dataplane->event_channel,
-                                                &local_err);
-                if (local_err) {
-                    error_report_err(local_err);
-                }
-            }
-            xen_block_release_request(request);
+            xen_block_complete_request(request);
             continue;
         }
 
diff --git a/hw/block/xen-block.c b/hw/block/xen-block.c
index 07bb32e..99cb4c6 100644
--- a/hw/block/xen-block.c
+++ b/hw/block/xen-block.c
@@ -860,7 +860,7 @@
     XenBlockIOThread *iothread = g_new(XenBlockIOThread, 1);
     Error *local_err = NULL;
     QDict *opts;
-    QObject *ret_data;
+    QObject *ret_data = NULL;
 
     iothread->id = g_strdup(id);
 
diff --git a/hw/core/loader.c b/hw/core/loader.c
index eeef6da..8bbb179 100644
--- a/hw/core/loader.c
+++ b/hw/core/loader.c
@@ -1447,6 +1447,7 @@
     uint32_t current_rom_index;
     uint32_t rom_start_address;
     AddressSpace *as;
+    bool complete;
 } HexParser;
 
 /* return size or -1 if error */
@@ -1484,6 +1485,7 @@
                                   parser->current_rom_index,
                                   parser->rom_start_address, parser->as);
         }
+        parser->complete = true;
         return parser->total_size;
     case EXT_SEG_ADDR_RECORD:
     case EXT_LINEAR_ADDR_RECORD:
@@ -1548,11 +1550,12 @@
         .bin_buf = g_malloc(hex_blob_size),
         .start_addr = addr,
         .as = as,
+        .complete = false
     };
 
     rom_transaction_begin();
 
-    for (; hex_blob < end; ++hex_blob) {
+    for (; hex_blob < end && !parser.complete; ++hex_blob) {
         switch (*hex_blob) {
         case '\r':
         case '\n':
diff --git a/hw/display/ati_2d.c b/hw/display/ati_2d.c
index 42e8231..23a8ae0 100644
--- a/hw/display/ati_2d.c
+++ b/hw/display/ati_2d.c
@@ -53,12 +53,20 @@
             s->vga.vbe_start_addr, surface_data(ds), surface_stride(ds),
             surface_bits_per_pixel(ds),
             (s->regs.dp_mix & GMC_ROP3_MASK) >> 16);
-    int dst_x = (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT ?
-                 s->regs.dst_x : s->regs.dst_x + 1 - s->regs.dst_width);
-    int dst_y = (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM ?
-                 s->regs.dst_y : s->regs.dst_y + 1 - s->regs.dst_height);
+    unsigned dst_x = (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT ?
+                      s->regs.dst_x : s->regs.dst_x + 1 - s->regs.dst_width);
+    unsigned dst_y = (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM ?
+                      s->regs.dst_y : s->regs.dst_y + 1 - s->regs.dst_height);
     int bpp = ati_bpp_from_datatype(s);
+    if (!bpp) {
+        qemu_log_mask(LOG_GUEST_ERROR, "Invalid bpp\n");
+        return;
+    }
     int dst_stride = DEFAULT_CNTL ? s->regs.dst_pitch : s->regs.default_pitch;
+    if (!dst_stride) {
+        qemu_log_mask(LOG_GUEST_ERROR, "Zero dest pitch\n");
+        return;
+    }
     uint8_t *dst_bits = s->vga.vram_ptr + (DEFAULT_CNTL ?
                         s->regs.dst_offset : s->regs.default_offset);
 
@@ -82,12 +90,16 @@
     switch (s->regs.dp_mix & GMC_ROP3_MASK) {
     case ROP3_SRCCOPY:
     {
-        int src_x = (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT ?
-                     s->regs.src_x : s->regs.src_x + 1 - s->regs.dst_width);
-        int src_y = (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM ?
-                     s->regs.src_y : s->regs.src_y + 1 - s->regs.dst_height);
+        unsigned src_x = (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT ?
+                       s->regs.src_x : s->regs.src_x + 1 - s->regs.dst_width);
+        unsigned src_y = (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM ?
+                       s->regs.src_y : s->regs.src_y + 1 - s->regs.dst_height);
         int src_stride = DEFAULT_CNTL ?
                          s->regs.src_pitch : s->regs.default_pitch;
+        if (!src_stride) {
+            qemu_log_mask(LOG_GUEST_ERROR, "Zero source pitch\n");
+            return;
+        }
         uint8_t *src_bits = s->vga.vram_ptr + (DEFAULT_CNTL ?
                             s->regs.src_offset : s->regs.default_offset);
 
@@ -137,8 +149,10 @@
                                     dst_y * surface_stride(ds),
                                     s->regs.dst_height * surface_stride(ds));
         }
-        s->regs.dst_x += s->regs.dst_width;
-        s->regs.dst_y += s->regs.dst_height;
+        s->regs.dst_x = (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT ?
+                         dst_x + s->regs.dst_width : dst_x);
+        s->regs.dst_y = (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM ?
+                         dst_y + s->regs.dst_height : dst_y);
         break;
     }
     case ROP3_PATCOPY:
@@ -179,7 +193,8 @@
                                     dst_y * surface_stride(ds),
                                     s->regs.dst_height * surface_stride(ds));
         }
-        s->regs.dst_y += s->regs.dst_height;
+        s->regs.dst_y = (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM ?
+                         dst_y + s->regs.dst_height : dst_y);
         break;
     }
     default:
diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
index 854cd3a..0d1f411 100644
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@@ -1047,6 +1047,10 @@
     }
 
     filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, payload_name);
+    if (!filename) {
+        error_report("could not find firmware/kernel file '%s'", payload_name);
+        exit(1);
+    }
 
     payload_size = load_elf(filename, NULL, NULL, NULL,
                             &bios_entry, &loadaddr, NULL, NULL,
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index b75ad06..c9cb6fa 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -571,10 +571,29 @@
 
 static void pnv_reset(MachineState *machine)
 {
+    PnvMachineState *pnv = PNV_MACHINE(machine);
+    IPMIBmc *bmc;
     void *fdt;
 
     qemu_devices_reset();
 
+    /*
+     * The machine should provide by default an internal BMC simulator.
+     * If not, try to use the BMC device that was provided on the command
+     * line.
+     */
+    bmc = pnv_bmc_find(&error_fatal);
+    if (!pnv->bmc) {
+        if (!bmc) {
+            warn_report("machine has no BMC device. Use '-device "
+                        "ipmi-bmc-sim,id=bmc0 -device isa-ipmi-bt,bmc=bmc0,irq=10' "
+                        "to define one");
+        } else {
+            pnv_bmc_set_pnor(bmc, pnv->pnor);
+            pnv->bmc = bmc;
+        }
+    }
+
     fdt = pnv_dt_create(machine);
 
     /* Pack resulting tree */
@@ -833,9 +852,6 @@
     }
     g_free(chip_typename);
 
-    /* Create the machine BMC simulator */
-    pnv->bmc = pnv_bmc_create(pnv->pnor);
-
     /* Instantiate ISA bus on chip 0 */
     pnv->isa_bus = pnv_isa_create(pnv->chips[0], &error_fatal);
 
@@ -845,8 +861,14 @@
     /* Create an RTC ISA device too */
     mc146818_rtc_init(pnv->isa_bus, 2000, NULL);
 
-    /* Create the IPMI BT device for communication with the BMC */
-    pnv_ipmi_bt_init(pnv->isa_bus, pnv->bmc, 10);
+    /*
+     * Create the machine BMC simulator and the IPMI BT device for
+     * communication with the BMC
+     */
+    if (defaults_enabled()) {
+        pnv->bmc = pnv_bmc_create(pnv->pnor);
+        pnv_ipmi_bt_init(pnv->isa_bus, pnv->bmc, 10);
+    }
 
     /*
      * OpenPOWER systems use a IPMI SEL Event message to notify the
diff --git a/hw/ppc/pnv_bmc.c b/hw/ppc/pnv_bmc.c
index 8863354..4e018b8 100644
--- a/hw/ppc/pnv_bmc.c
+++ b/hw/ppc/pnv_bmc.c
@@ -213,6 +213,18 @@
     .cmd_handlers = hiomap_cmds
 };
 
+
+void pnv_bmc_set_pnor(IPMIBmc *bmc, PnvPnor *pnor)
+{
+    object_ref(OBJECT(pnor));
+    object_property_add_const_link(OBJECT(bmc), "pnor", OBJECT(pnor),
+                                   &error_abort);
+
+    /* Install the HIOMAP protocol handlers to access the PNOR */
+    ipmi_sim_register_netfn(IPMI_BMC_SIMULATOR(bmc), IPMI_NETFN_OEM,
+                            &hiomap_netfn);
+}
+
 /*
  * Instantiate the machine BMC. PowerNV uses the QEMU internal
  * simulator but it could also be external.
@@ -232,3 +244,36 @@
 
     return IPMI_BMC(obj);
 }
+
+typedef struct ForeachArgs {
+    const char *name;
+    Object *obj;
+} ForeachArgs;
+
+static int bmc_find(Object *child, void *opaque)
+{
+    ForeachArgs *args = opaque;
+
+    if (object_dynamic_cast(child, args->name)) {
+        if (args->obj) {
+            return 1;
+        }
+        args->obj = child;
+    }
+    return 0;
+}
+
+IPMIBmc *pnv_bmc_find(Error **errp)
+{
+    ForeachArgs args = { TYPE_IPMI_BMC_SIMULATOR, NULL };
+    int ret;
+
+    ret = object_child_foreach_recursive(object_get_root(), bmc_find, &args);
+    if (ret) {
+        error_setg(errp, "machine should have only one BMC device. "
+                   "Use '-nodefaults'");
+        return NULL;
+    }
+
+    return args.obj ? IPMI_BMC(args.obj) : NULL;
+}
diff --git a/hw/ppc/ppc440_uc.c b/hw/ppc/ppc440_uc.c
index d5ea962..b30e093 100644
--- a/hw/ppc/ppc440_uc.c
+++ b/hw/ppc/ppc440_uc.c
@@ -13,7 +13,6 @@
 #include "qemu/error-report.h"
 #include "qapi/error.h"
 #include "qemu/log.h"
-#include "qemu/main-loop.h"
 #include "qemu/module.h"
 #include "cpu.h"
 #include "hw/irq.h"
@@ -1183,9 +1182,7 @@
     case PEGPL_CFGMSK:
         s->cfg_mask = val;
         size = ~(val & 0xfffffffe) + 1;
-        qemu_mutex_lock_iothread();
         pcie_host_mmcfg_update(PCIE_HOST_BRIDGE(s), val & 1, s->cfg_base, size);
-        qemu_mutex_unlock_iothread();
         break;
     case PEGPL_MSGBAH:
         s->msg_base = ((uint64_t)val << 32) | (s->msg_base & 0xffffffff);
diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index 679ae79..eb54f94 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -517,9 +517,10 @@
     }
 
     if (kvm_enabled()) {
-        if (kvmppc_set_fwnmi() < 0) {
-            error_setg(errp, "Firmware Assisted Non-Maskable Interrupts(FWNMI) "
-                             "not supported by KVM");
+        if (!kvmppc_get_fwnmi()) {
+            error_setg(errp,
+"Firmware Assisted Non-Maskable Interrupts(FWNMI) not supported by KVM.");
+            error_append_hint(errp, "Try appending -machine cap-fwnmi=off\n");
         }
     }
 }
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index a4a540f..1069d01 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -833,11 +833,28 @@
     /* get rtas addr from fdt */
     rtas_addr = spapr_get_rtas_addr();
     if (!rtas_addr) {
-        qemu_system_guest_panicked(NULL);
+        if (!recovered) {
+            error_report(
+"FWNMI: Unable to deliver machine check to guest: rtas_addr not found.");
+            qemu_system_guest_panicked(NULL);
+        } else {
+            warn_report(
+"FWNMI: Unable to deliver machine check to guest: rtas_addr not found. "
+"Machine check recovered.");
+        }
         g_free(ext_elog);
         return;
     }
 
+    /*
+     * By taking the interlock, we assume that the MCE will be
+     * delivered to the guest. CAUTION: don't add anything that could
+     * prevent the MCE to be delivered after this line, otherwise the
+     * guest won't be able to release the interlock and ultimately
+     * hang/crash?
+     */
+    spapr->fwnmi_machine_check_interlock = cpu->vcpu_id;
+
     stq_be_phys(&address_space_memory, rtas_addr + RTAS_ERROR_LOG_OFFSET,
                 env->gpr[3]);
     cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET +
@@ -860,17 +877,13 @@
     Error *local_err = NULL;
 
     if (spapr->fwnmi_machine_check_addr == -1) {
-        /*
-         * This implies that we have hit a machine check either when the
-         * guest has not registered FWNMI (i.e., "ibm,nmi-register" not
-         * called) or between system reset and "ibm,nmi-register".
-         * Fall back to the old machine check behavior in such cases.
-         */
+        /* Non-FWNMI case, deliver it like an architected CPU interrupt. */
         cs->exception_index = POWERPC_EXCP_MCHECK;
         ppc_cpu_do_interrupt(cs);
         return;
     }
 
+    /* Wait for FWNMI interlock. */
     while (spapr->fwnmi_machine_check_interlock != -1) {
         /*
          * Check whether the same CPU got machine check error
@@ -878,12 +891,25 @@
          * that CPU called "ibm,nmi-interlock")
          */
         if (spapr->fwnmi_machine_check_interlock == cpu->vcpu_id) {
-            qemu_system_guest_panicked(NULL);
+            if (!recovered) {
+                error_report(
+"FWNMI: Unable to deliver machine check to guest: nested machine check.");
+                qemu_system_guest_panicked(NULL);
+            } else {
+                warn_report(
+"FWNMI: Unable to deliver machine check to guest: nested machine check. "
+"Machine check recovered.");
+            }
             return;
         }
         qemu_cond_wait_iothread(&spapr->fwnmi_machine_check_interlock_cond);
-        /* Meanwhile if the system is reset, then just return */
         if (spapr->fwnmi_machine_check_addr == -1) {
+            /*
+             * If the machine was reset while waiting for the interlock,
+             * abort the delivery. The machine check applies to a context
+             * that no longer exists, so it wouldn't make sense to deliver
+             * it now.
+             */
             return;
         }
     }
@@ -894,12 +920,13 @@
          * We don't want to abort so we let the migration to continue.
          * In a rare case, the machine check handler will run on the target.
          * Though this is not preferable, it is better than aborting
-         * the migration or killing the VM.
+         * the migration or killing the VM. It is okay to call
+         * migrate_del_blocker on a blocker that was not added (which the
+         * nmi-interlock handler would do when it's called after this).
          */
         warn_report("Received a fwnmi while migration was in progress");
     }
 
-    spapr->fwnmi_machine_check_interlock = cpu->vcpu_id;
     spapr_mce_dispatch_elog(cpu, recovered);
 }
 
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 709a527..55ca9de 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -1663,6 +1663,7 @@
 
         if (pc->is_bridge) {
             error_setg(errp, "PCI: Hot unplug of PCI bridges not supported");
+            return;
         }
 
         /* ensure any other present functions are pending unplug */
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index 9fb8c86..bcac0d0 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -437,6 +437,13 @@
         return;
     }
 
+    if (kvm_enabled()) {
+        if (kvmppc_set_fwnmi() < 0) {
+            rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+            return;
+        }
+    }
+
     spapr->fwnmi_system_reset_addr = sreset_addr;
     spapr->fwnmi_machine_check_addr = mce_addr;
 
@@ -455,6 +462,9 @@
     }
 
     if (spapr->fwnmi_machine_check_addr == -1) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+"FWNMI: ibm,nmi-interlock RTAS called with FWNMI not registered.\n");
+
         /* NMI register not called */
         rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
         return;
diff --git a/hw/usb/xen-usb.c b/hw/usb/xen-usb.c
index 1fc2f32..961190d 100644
--- a/hw/usb/xen-usb.c
+++ b/hw/usb/xen-usb.c
@@ -347,13 +347,11 @@
     return -ESHUTDOWN;
 }
 
-static void usbback_packet_complete(USBPacket *packet)
+static void usbback_packet_complete(struct usbback_req *usbback_req)
 {
-    struct usbback_req *usbback_req;
+    USBPacket *packet = &usbback_req->packet;
     int32_t status;
 
-    usbback_req = container_of(packet, struct usbback_req, packet);
-
     QTAILQ_REMOVE(&usbback_req->stub->submit_q, usbback_req, q);
 
     status = usbback_xlat_status(packet->status);
@@ -566,7 +564,7 @@
 
     usb_handle_packet(usbback_req->stub->dev, &usbback_req->packet);
     if (usbback_req->packet.status != USB_RET_ASYNC) {
-        usbback_packet_complete(&usbback_req->packet);
+        usbback_packet_complete(usbback_req);
     }
     return;
 
@@ -993,7 +991,7 @@
 
     usbif = usbback_req->usbif;
     TR_REQ(&usbif->xendev, "\n");
-    usbback_packet_complete(packet);
+    usbback_packet_complete(usbback_req);
 }
 
 static USBPortOps xen_usb_port_ops = {
diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c
index 33692fc..2900bd1 100644
--- a/hw/vfio/spapr.c
+++ b/hw/vfio/spapr.c
@@ -147,7 +147,7 @@
 {
     int ret = 0;
     IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr);
-    uint64_t pagesize = memory_region_iommu_get_min_page_size(iommu_mr);
+    uint64_t pagesize = memory_region_iommu_get_min_page_size(iommu_mr), pgmask;
     unsigned entries, bits_total, bits_per_level, max_levels;
     struct vfio_iommu_spapr_tce_create create = { .argsz = sizeof(create) };
     long rampagesize = qemu_minrampagesize();
@@ -159,8 +159,8 @@
     if (pagesize > rampagesize) {
         pagesize = rampagesize;
     }
-    pagesize = 1ULL << (63 - clz64(container->pgsizes &
-                                   (pagesize | (pagesize - 1))));
+    pgmask = container->pgsizes & (pagesize | (pagesize - 1));
+    pagesize = pgmask ? (1ULL << (63 - clz64(pgmask))) : 0;
     if (!pagesize) {
         error_report("Host doesn't support page size 0x%"PRIx64
                      ", the supported mask is 0x%lx",
diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h
index afeeb18..716d263 100644
--- a/include/block/aio-wait.h
+++ b/include/block/aio-wait.h
@@ -26,6 +26,7 @@
 #define QEMU_AIO_WAIT_H
 
 #include "block/aio.h"
+#include "qemu/main-loop.h"
 
 /**
  * AioWait:
@@ -124,4 +125,25 @@
  */
 void aio_wait_bh_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque);
 
+/**
+ * in_aio_context_home_thread:
+ * @ctx: the aio context
+ *
+ * Return whether we are running in the thread that normally runs @ctx.  Note
+ * that acquiring/releasing ctx does not affect the outcome, each AioContext
+ * still only has one home thread that is responsible for running it.
+ */
+static inline bool in_aio_context_home_thread(AioContext *ctx)
+{
+    if (ctx == qemu_get_current_aio_context()) {
+        return true;
+    }
+
+    if (ctx == qemu_get_aio_context()) {
+        return qemu_mutex_iothread_locked();
+    } else {
+        return false;
+    }
+}
+
 #endif /* QEMU_AIO_WAIT_H */
diff --git a/include/block/aio.h b/include/block/aio.h
index cb19891..62ed954 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -133,12 +133,16 @@
     AioHandlerList deleted_aio_handlers;
 
     /* Used to avoid unnecessary event_notifier_set calls in aio_notify;
-     * accessed with atomic primitives.  If this field is 0, everything
-     * (file descriptors, bottom halves, timers) will be re-evaluated
-     * before the next blocking poll(), thus the event_notifier_set call
-     * can be skipped.  If it is non-zero, you may need to wake up a
-     * concurrent aio_poll or the glib main event loop, making
-     * event_notifier_set necessary.
+     * only written from the AioContext home thread, or under the BQL in
+     * the case of the main AioContext.  However, it is read from any
+     * thread so it is still accessed with atomic primitives.
+     *
+     * If this field is 0, everything (file descriptors, bottom halves,
+     * timers) will be re-evaluated before the next blocking poll() or
+     * io_uring wait; therefore, the event_notifier_set call can be
+     * skipped.  If it is non-zero, you may need to wake up a concurrent
+     * aio_poll or the glib main event loop, making event_notifier_set
+     * necessary.
      *
      * Bit 0 is reserved for GSource usage of the AioContext, and is 1
      * between a call to aio_ctx_prepare and the next call to aio_ctx_check.
@@ -682,19 +686,6 @@
 AioContext *qemu_get_current_aio_context(void);
 
 /**
- * in_aio_context_home_thread:
- * @ctx: the aio context
- *
- * Return whether we are running in the thread that normally runs @ctx.  Note
- * that acquiring/releasing ctx does not affect the outcome, each AioContext
- * still only has one home thread that is responsible for running it.
- */
-static inline bool in_aio_context_home_thread(AioContext *ctx)
-{
-    return ctx == qemu_get_current_aio_context();
-}
-
-/**
  * aio_context_setup:
  * @ctx: the aio context
  *
diff --git a/include/hw/elf_ops.h b/include/hw/elf_ops.h
index a1411bf..e0bb47b 100644
--- a/include/hw/elf_ops.h
+++ b/include/hw/elf_ops.h
@@ -104,19 +104,21 @@
         : ((sym0->st_value > sym1->st_value) ? 1 : 0);
 }
 
-static int glue(load_symbols, SZ)(struct elfhdr *ehdr, int fd, int must_swab,
-                                  int clear_lsb, symbol_fn_t sym_cb)
+static void glue(load_symbols, SZ)(struct elfhdr *ehdr, int fd, int must_swab,
+                                   int clear_lsb, symbol_fn_t sym_cb)
 {
-    struct elf_shdr *symtab, *strtab, *shdr_table = NULL;
-    struct elf_sym *syms = NULL;
+    struct elf_shdr *symtab, *strtab;
+    g_autofree struct elf_shdr *shdr_table = NULL;
+    g_autofree struct elf_sym *syms = NULL;
+    g_autofree char *str = NULL;
     struct syminfo *s;
     int nsyms, i;
-    char *str = NULL;
 
     shdr_table = load_at(fd, ehdr->e_shoff,
                          sizeof(struct elf_shdr) * ehdr->e_shnum);
-    if (!shdr_table)
-        return -1;
+    if (!shdr_table) {
+        return ;
+    }
 
     if (must_swab) {
         for (i = 0; i < ehdr->e_shnum; i++) {
@@ -125,23 +127,25 @@
     }
 
     symtab = glue(find_section, SZ)(shdr_table, ehdr->e_shnum, SHT_SYMTAB);
-    if (!symtab)
-        goto fail;
+    if (!symtab) {
+        return;
+    }
     syms = load_at(fd, symtab->sh_offset, symtab->sh_size);
-    if (!syms)
-        goto fail;
+    if (!syms) {
+        return;
+    }
 
     nsyms = symtab->sh_size / sizeof(struct elf_sym);
 
     /* String table */
     if (symtab->sh_link >= ehdr->e_shnum) {
-        goto fail;
+        return;
     }
     strtab = &shdr_table[symtab->sh_link];
 
     str = load_at(fd, strtab->sh_offset, strtab->sh_size);
     if (!str) {
-        goto fail;
+        return;
     }
 
     i = 0;
@@ -170,8 +174,13 @@
         }
         i++;
     }
-    syms = g_realloc(syms, nsyms * sizeof(*syms));
 
+    /* check we have symbols left */
+    if (nsyms == 0) {
+        return;
+    }
+
+    syms = g_realloc(syms, nsyms * sizeof(*syms));
     qsort(syms, nsyms, sizeof(*syms), glue(symcmp, SZ));
     for (i = 0; i < nsyms - 1; i++) {
         if (syms[i].st_size == 0) {
@@ -182,18 +191,11 @@
     /* Commit */
     s = g_malloc0(sizeof(*s));
     s->lookup_symbol = glue(lookup_symbol, SZ);
-    glue(s->disas_symtab.elf, SZ) = syms;
+    glue(s->disas_symtab.elf, SZ) = g_steal_pointer(&syms);
     s->disas_num_syms = nsyms;
-    s->disas_strtab = str;
+    s->disas_strtab = g_steal_pointer(&str);
     s->next = syminfos;
     syminfos = s;
-    g_free(shdr_table);
-    return 0;
- fail:
-    g_free(syms);
-    g_free(str);
-    g_free(shdr_table);
-    return -1;
 }
 
 static int glue(elf_reloc, SZ)(struct elfhdr *ehdr, int fd, int must_swab,
diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h
index fb4d0c0..d4b0b0e 100644
--- a/include/hw/ppc/pnv.h
+++ b/include/hw/ppc/pnv.h
@@ -241,6 +241,8 @@
 void pnv_dt_bmc_sensors(IPMIBmc *bmc, void *fdt);
 void pnv_bmc_powerdown(IPMIBmc *bmc);
 IPMIBmc *pnv_bmc_create(PnvPnor *pnor);
+IPMIBmc *pnv_bmc_find(Error **errp);
+void pnv_bmc_set_pnor(IPMIBmc *bmc, PnvPnor *pnor);
 
 /*
  * POWER8 MMIO base addresses
diff --git a/include/qemu/selfmap.h b/include/qemu/selfmap.h
new file mode 100644
index 0000000..8382c4c
--- /dev/null
+++ b/include/qemu/selfmap.h
@@ -0,0 +1,44 @@
+/*
+ * Utility functions to read our own memory map
+ *
+ * Copyright (c) 2020 Linaro Ltd
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef _SELFMAP_H_
+#define _SELFMAP_H_
+
+typedef struct {
+    unsigned long start;
+    unsigned long end;
+
+    /* flags */
+    bool is_read;
+    bool is_write;
+    bool is_exec;
+    bool is_priv;
+
+    unsigned long offset;
+    gchar *dev;
+    uint64_t inode;
+    gchar *path;
+} MapInfo;
+
+
+/**
+ * read_self_maps:
+ *
+ * Read /proc/self/maps and return a list of MapInfo structures.
+ */
+GSList *read_self_maps(void);
+
+/**
+ * free_self_maps:
+ * @info: a GSlist
+ *
+ * Free a list of MapInfo structures.
+ */
+void free_self_maps(GSList *info);
+
+#endif /* _SELFMAP_H_ */
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index b198dec..9bbdbd6 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -171,7 +171,6 @@
                              BlockCompletionFunc *cb, void *opaque);
 void blk_aio_cancel(BlockAIOCB *acb);
 void blk_aio_cancel_async(BlockAIOCB *acb);
-int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf);
 int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf);
 BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
                           BlockCompletionFunc *cb, void *opaque);
diff --git a/job-qmp.c b/job-qmp.c
index fecc939..f9a5883 100644
--- a/job-qmp.c
+++ b/job-qmp.c
@@ -114,7 +114,16 @@
     }
 
     trace_qmp_job_finalize(job);
+    job_ref(job);
     job_finalize(job, errp);
+
+    /*
+     * Job's context might have changed via job_finalize (and job_txn_apply
+     * automatically acquires the new one), so make sure we release the correct
+     * one.
+     */
+    aio_context = job->aio_context;
+    job_unref(job);
     aio_context_release(aio_context);
 }
 
diff --git a/job.c b/job.c
index 134a07b..53be57a 100644
--- a/job.c
+++ b/job.c
@@ -136,17 +136,38 @@
     }
 }
 
-static int job_txn_apply(JobTxn *txn, int fn(Job *))
+static int job_txn_apply(Job *job, int fn(Job *))
 {
-    Job *job, *next;
+    AioContext *inner_ctx;
+    Job *other_job, *next;
+    JobTxn *txn = job->txn;
     int rc = 0;
 
-    QLIST_FOREACH_SAFE(job, &txn->jobs, txn_list, next) {
-        rc = fn(job);
+    /*
+     * Similar to job_completed_txn_abort, we take each job's lock before
+     * applying fn, but since we assume that outer_ctx is held by the caller,
+     * we need to release it here to avoid holding the lock twice - which would
+     * break AIO_WAIT_WHILE from within fn.
+     */
+    job_ref(job);
+    aio_context_release(job->aio_context);
+
+    QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) {
+        inner_ctx = other_job->aio_context;
+        aio_context_acquire(inner_ctx);
+        rc = fn(other_job);
+        aio_context_release(inner_ctx);
         if (rc) {
             break;
         }
     }
+
+    /*
+     * Note that job->aio_context might have been changed by calling fn, so we
+     * can't use a local variable to cache it.
+     */
+    aio_context_acquire(job->aio_context);
+    job_unref(job);
     return rc;
 }
 
@@ -774,11 +795,11 @@
     assert(job && job->txn);
 
     /* prepare the transaction to complete */
-    rc = job_txn_apply(job->txn, job_prepare);
+    rc = job_txn_apply(job, job_prepare);
     if (rc) {
         job_completed_txn_abort(job);
     } else {
-        job_txn_apply(job->txn, job_finalize_single);
+        job_txn_apply(job, job_finalize_single);
     }
 }
 
@@ -824,10 +845,10 @@
         assert(other_job->ret == 0);
     }
 
-    job_txn_apply(txn, job_transition_to_pending);
+    job_txn_apply(job, job_transition_to_pending);
 
     /* If no jobs need manual finalization, automatically do so */
-    if (job_txn_apply(txn, job_needs_finalize) == 0) {
+    if (job_txn_apply(job, job_needs_finalize) == 0) {
         job_do_finalize(job);
     }
 }
@@ -849,9 +870,10 @@
 static void job_exit(void *opaque)
 {
     Job *job = (Job *)opaque;
-    AioContext *ctx = job->aio_context;
+    AioContext *ctx;
 
-    aio_context_acquire(ctx);
+    job_ref(job);
+    aio_context_acquire(job->aio_context);
 
     /* This is a lie, we're not quiescent, but still doing the completion
      * callbacks. However, completion callbacks tend to involve operations that
@@ -862,6 +884,14 @@
 
     job_completed(job);
 
+    /*
+     * Note that calling job_completed can move the job to a different
+     * aio_context, so we cannot cache from above. job_txn_apply takes care of
+     * acquiring the new lock, and we ref/unref to avoid job_completed freeing
+     * the job underneath us.
+     */
+    ctx = job->aio_context;
+    job_unref(job);
     aio_context_release(ctx);
 }
 
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 8198be0..619c054 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -2172,6 +2172,8 @@
 
         /* Check to see if the address is valid.  */
         if (host_start && real_start != current_start) {
+            qemu_log_mask(CPU_LOG_PAGE, "invalid %lx && %lx != %lx\n",
+                          host_start, real_start, current_start);
             goto try_again;
         }
 
@@ -2240,7 +2242,11 @@
          * probably a bad strategy if not, which means we got here
          * because of trouble with ARM commpage setup.
          */
-        munmap((void *)real_start, real_size);
+        if (munmap((void *)real_start, real_size) != 0) {
+            error_report("%s: failed to unmap %lx:%lx (%s)", __func__,
+                         real_start, real_size, strerror(errno));
+            abort();
+        }
         current_start += align;
         if (host_start == current_start) {
             /* Theoretically possible if host doesn't have any suitably
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 5af55fc..6495ddc 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -117,6 +117,7 @@
 
 #include "qemu.h"
 #include "qemu/guest-random.h"
+#include "qemu/selfmap.h"
 #include "user/syscall-trace.h"
 #include "qapi/error.h"
 #include "fd-trans.h"
@@ -7232,58 +7233,61 @@
 {
     CPUState *cpu = env_cpu((CPUArchState *)cpu_env);
     TaskState *ts = cpu->opaque;
-    FILE *fp;
-    char *line = NULL;
-    size_t len = 0;
-    ssize_t read;
+    GSList *map_info = read_self_maps();
+    GSList *s;
+    int count;
 
-    fp = fopen("/proc/self/maps", "r");
-    if (fp == NULL) {
-        return -1;
-    }
+    for (s = map_info; s; s = g_slist_next(s)) {
+        MapInfo *e = (MapInfo *) s->data;
 
-    while ((read = getline(&line, &len, fp)) != -1) {
-        int fields, dev_maj, dev_min, inode;
-        uint64_t min, max, offset;
-        char flag_r, flag_w, flag_x, flag_p;
-        char path[512] = "";
-        fields = sscanf(line, "%"PRIx64"-%"PRIx64" %c%c%c%c %"PRIx64" %x:%x %d"
-                        " %512s", &min, &max, &flag_r, &flag_w, &flag_x,
-                        &flag_p, &offset, &dev_maj, &dev_min, &inode, path);
-
-        if ((fields < 10) || (fields > 11)) {
-            continue;
-        }
-        if (h2g_valid(min)) {
+        if (h2g_valid(e->start)) {
+            unsigned long min = e->start;
+            unsigned long max = e->end;
             int flags = page_get_flags(h2g(min));
-            max = h2g_valid(max - 1) ? max : (uintptr_t)g2h(GUEST_ADDR_MAX) + 1;
+            const char *path;
+
+            max = h2g_valid(max - 1) ?
+                max : (uintptr_t) g2h(GUEST_ADDR_MAX) + 1;
+
             if (page_check_range(h2g(min), max - min, flags) == -1) {
                 continue;
             }
+
             if (h2g(min) == ts->info->stack_limit) {
-                pstrcpy(path, sizeof(path), "      [stack]");
+                path = "[stack]";
+            } else {
+                path = e->path;
             }
-            dprintf(fd, TARGET_ABI_FMT_ptr "-" TARGET_ABI_FMT_ptr
-                    " %c%c%c%c %08" PRIx64 " %02x:%02x %d %s%s\n",
-                    h2g(min), h2g(max - 1) + 1, flag_r, flag_w,
-                    flag_x, flag_p, offset, dev_maj, dev_min, inode,
-                    path[0] ? "         " : "", path);
+
+            count = dprintf(fd, TARGET_ABI_FMT_ptr "-" TARGET_ABI_FMT_ptr
+                            " %c%c%c%c %08" PRIx64 " %s %"PRId64,
+                            h2g(min), h2g(max - 1) + 1,
+                            e->is_read ? 'r' : '-',
+                            e->is_write ? 'w' : '-',
+                            e->is_exec ? 'x' : '-',
+                            e->is_priv ? 'p' : '-',
+                            (uint64_t) e->offset, e->dev, e->inode);
+            if (path) {
+                dprintf(fd, "%*s%s\n", 73 - count, "", path);
+            } else {
+                dprintf(fd, "\n");
+            }
         }
     }
 
+    free_self_maps(map_info);
+
 #ifdef TARGET_VSYSCALL_PAGE
     /*
      * We only support execution from the vsyscall page.
      * This is as if CONFIG_LEGACY_VSYSCALL_XONLY=y from v5.3.
      */
-    dprintf(fd, TARGET_FMT_lx "-" TARGET_FMT_lx
-            " --xp 00000000 00:00 0 [vsyscall]\n",
-            TARGET_VSYSCALL_PAGE, TARGET_VSYSCALL_PAGE + TARGET_PAGE_SIZE);
+    count = dprintf(fd, TARGET_FMT_lx "-" TARGET_FMT_lx
+                    " --xp 00000000 00:00 0",
+                    TARGET_VSYSCALL_PAGE, TARGET_VSYSCALL_PAGE + TARGET_PAGE_SIZE);
+    dprintf(fd, "%*s%s\n", 73 - count, "",  "[vsyscall]");
 #endif
 
-    free(line);
-    fclose(fp);
-
     return 0;
 }
 
@@ -11331,11 +11335,11 @@
            This is a hint, so ignoring and returning success is ok.  */
         return 0;
 #endif
-#if TARGET_ABI_BITS == 32
+#ifdef TARGET_NR_fcntl64
     case TARGET_NR_fcntl64:
     {
-	int cmd;
-	struct flock64 fl;
+        int cmd;
+        struct flock64 fl;
         from_flock64_fn *copyfrom = copy_from_user_flock64;
         to_flock64_fn *copyto = copy_to_user_flock64;
 
@@ -11346,7 +11350,7 @@
         }
 #endif
 
-	cmd = target_to_host_fcntl_cmd(arg2);
+        cmd = target_to_host_fcntl_cmd(arg2);
         if (cmd == -TARGET_EINVAL) {
             return cmd;
         }
diff --git a/pc-bios/README b/pc-bios/README
index f54c274..a5a770f 100644
--- a/pc-bios/README
+++ b/pc-bios/README
@@ -14,7 +14,7 @@
 - SLOF (Slimline Open Firmware) is a free IEEE 1275 Open Firmware
   implementation for certain IBM POWER hardware.  The sources are at
   https://github.com/aik/SLOF, and the image currently in qemu is
-  built from git tag qemu-slof-20200317.
+  built from git tag qemu-slof-20200327.
 
 - sgabios (the Serial Graphics Adapter option ROM) provides a means for
   legacy x86 software to communicate with an attached serial console as
diff --git a/pc-bios/slof.bin b/pc-bios/slof.bin
index 40499a1..80bbf91 100644
--- a/pc-bios/slof.bin
+++ b/pc-bios/slof.bin
Binary files differ
diff --git a/qemu-img.c b/qemu-img.c
index b167376..821cbf6 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -1924,8 +1924,8 @@
         if (status == BLK_DATA && !copy_range) {
             ret = convert_co_read(s, sector_num, n, buf);
             if (ret < 0) {
-                error_report("error while reading sector %" PRId64
-                             ": %s", sector_num, strerror(-ret));
+                error_report("error while reading at byte %lld: %s",
+                             sector_num * BDRV_SECTOR_SIZE, strerror(-ret));
                 s->ret = ret;
             }
         } else if (!s->min_sparse && status == BLK_ZERO) {
@@ -1953,8 +1953,8 @@
                 ret = convert_co_write(s, sector_num, n, buf, status);
             }
             if (ret < 0) {
-                error_report("error while writing sector %" PRId64
-                             ": %s", sector_num, strerror(-ret));
+                error_report("error while writing at byte %lld: %s",
+                             sector_num * BDRV_SECTOR_SIZE, strerror(-ret));
                 s->ret = ret;
             }
         }
diff --git a/qobject/json-streamer.c b/qobject/json-streamer.c
index 47dd7ea..b93d97b 100644
--- a/qobject/json-streamer.c
+++ b/qobject/json-streamer.c
@@ -85,7 +85,7 @@
     g_queue_push_tail(&parser->tokens, token);
 
     if ((parser->brace_count > 0 || parser->bracket_count > 0)
-        && parser->bracket_count >= 0 && parser->bracket_count >= 0) {
+        && parser->brace_count >= 0 && parser->bracket_count >= 0) {
         return;
     }
 
diff --git a/roms/SLOF b/roms/SLOF
index ab6984f..8e012d6 160000
--- a/roms/SLOF
+++ b/roms/SLOF
@@ -1 +1 @@
-Subproject commit ab6984f5a6d054e1f634dda855b32e5357111974
+Subproject commit 8e012d6fddb62be833d746cef3f03e6c8beecde0
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 597f72b..03d0667 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -88,6 +88,7 @@
 static int cap_ppc_count_cache_flush_assist;
 static int cap_ppc_nested_kvm_hv;
 static int cap_large_decr;
+static int cap_fwnmi;
 
 static uint32_t debug_inst_opcode;
 
@@ -136,6 +137,7 @@
     kvmppc_get_cpu_characteristics(s);
     cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV);
     cap_large_decr = kvmppc_get_dec_bits();
+    cap_fwnmi = kvm_vm_check_extension(s, KVM_CAP_PPC_FWNMI);
     /*
      * Note: setting it to false because there is not such capability
      * in KVM at this moment.
@@ -2064,6 +2066,11 @@
     }
 }
 
+bool kvmppc_get_fwnmi(void)
+{
+    return cap_fwnmi;
+}
+
 int kvmppc_set_fwnmi(void)
 {
     PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
index 332fa0a..fcaf745 100644
--- a/target/ppc/kvm_ppc.h
+++ b/target/ppc/kvm_ppc.h
@@ -27,6 +27,7 @@
 void kvmppc_set_papr(PowerPCCPU *cpu);
 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr);
 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy);
+bool kvmppc_get_fwnmi(void);
 int kvmppc_set_fwnmi(void);
 int kvmppc_smt_threads(void);
 void kvmppc_error_append_smt_possible_hint(Error *const *errp);
@@ -163,6 +164,11 @@
 {
 }
 
+static inline bool kvmppc_get_fwnmi(void)
+{
+    return false;
+}
+
 static inline int kvmppc_set_fwnmi(void)
 {
     return -1;
diff --git a/target/rx/translate.c b/target/rx/translate.c
index b3d7305..61e8665 100644
--- a/target/rx/translate.c
+++ b/target/rx/translate.c
@@ -2362,6 +2362,7 @@
         break;
     case DISAS_UPDATE:
         tcg_gen_movi_i32(cpu_pc, ctx->base.pc_next);
+        /* fall through */
     case DISAS_EXIT:
         tcg_gen_exit_tb(NULL, 0);
         break;
diff --git a/target/xtensa/cpu.h b/target/xtensa/cpu.h
index c0d69fa..7a46dcc 100644
--- a/target/xtensa/cpu.h
+++ b/target/xtensa/cpu.h
@@ -213,6 +213,9 @@
 #define MEMCTL_IL0EN 0x1
 
 #define MAX_INSN_LENGTH 64
+#define MAX_INSNBUF_LENGTH \
+    ((MAX_INSN_LENGTH + sizeof(xtensa_insnbuf_word) - 1) / \
+     sizeof(xtensa_insnbuf_word))
 #define MAX_INSN_SLOTS 32
 #define MAX_OPCODE_ARGS 16
 #define MAX_NAREG 64
diff --git a/target/xtensa/helper.c b/target/xtensa/helper.c
index 376a61f..7073381 100644
--- a/target/xtensa/helper.c
+++ b/target/xtensa/helper.c
@@ -96,6 +96,7 @@
 
     config->isa = xtensa_isa_init(config->isa_internal, NULL, NULL);
     assert(xtensa_isa_maxlength(config->isa) <= MAX_INSN_LENGTH);
+    assert(xtensa_insnbuf_size(config->isa) <= MAX_INSNBUF_LENGTH);
     opcodes = xtensa_isa_num_opcodes(config->isa);
     formats = xtensa_isa_num_formats(config->isa);
     regfiles = xtensa_isa_num_regfiles(config->isa);
diff --git a/target/xtensa/translate.c b/target/xtensa/translate.c
index 8aa972c..e0beaf7 100644
--- a/target/xtensa/translate.c
+++ b/target/xtensa/translate.c
@@ -72,8 +72,8 @@
     unsigned cpenable;
 
     uint32_t op_flags;
-    xtensa_insnbuf insnbuf;
-    xtensa_insnbuf slotbuf;
+    xtensa_insnbuf_word insnbuf[MAX_INSNBUF_LENGTH];
+    xtensa_insnbuf_word slotbuf[MAX_INSNBUF_LENGTH];
 };
 
 static TCGv_i32 cpu_pc;
@@ -1173,11 +1173,6 @@
     dc->cwoe = tb_flags & XTENSA_TBFLAG_CWOE;
     dc->callinc = ((tb_flags & XTENSA_TBFLAG_CALLINC_MASK) >>
                    XTENSA_TBFLAG_CALLINC_SHIFT);
-
-    if (dc->config->isa) {
-        dc->insnbuf = xtensa_insnbuf_alloc(dc->config->isa);
-        dc->slotbuf = xtensa_insnbuf_alloc(dc->config->isa);
-    }
     init_sar_tracker(dc);
 }
 
@@ -1267,10 +1262,6 @@
     DisasContext *dc = container_of(dcbase, DisasContext, base);
 
     reset_sar_tracker(dc);
-    if (dc->config->isa) {
-        xtensa_insnbuf_free(dc->config->isa, dc->insnbuf);
-        xtensa_insnbuf_free(dc->config->isa, dc->slotbuf);
-    }
     if (dc->icount) {
         tcg_temp_free(dc->next_icount);
     }
@@ -3741,7 +3732,7 @@
         .name = "pfwait.a",
         .translate = translate_nop,
     }, {
-        .name = "pfwait.o",
+        .name = "pfwait.r",
         .translate = translate_nop,
     }, {
         .name = "pitlb",
diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c
index 7f61eee..ec083bd 100644
--- a/tcg/i386/tcg-target.inc.c
+++ b/tcg/i386/tcg-target.inc.c
@@ -3737,7 +3737,7 @@
         } else {
             /* Choose R12 because, as a base, it requires a SIB byte. */
             x86_guest_base_index = TCG_REG_R12;
-            tcg_out_mov(s, TCG_TYPE_PTR, x86_guest_base_index, guest_base);
+            tcg_out_movi(s, TCG_TYPE_PTR, x86_guest_base_index, guest_base);
             tcg_regset_set_reg(s->reserved_regs, x86_guest_base_index);
         }
     }
diff --git a/tcg/mips/tcg-target.inc.c b/tcg/mips/tcg-target.inc.c
index 1da663c..4d32ebc 100644
--- a/tcg/mips/tcg-target.inc.c
+++ b/tcg/mips/tcg-target.inc.c
@@ -404,11 +404,11 @@
 
     /* MIPS r6 introduced names for weaker variants of SYNC.  These are
        backward compatible to previous architecture revisions.  */
-    OPC_SYNC_WMB     = OPC_SYNC | 0x04 << 5,
-    OPC_SYNC_MB      = OPC_SYNC | 0x10 << 5,
-    OPC_SYNC_ACQUIRE = OPC_SYNC | 0x11 << 5,
-    OPC_SYNC_RELEASE = OPC_SYNC | 0x12 << 5,
-    OPC_SYNC_RMB     = OPC_SYNC | 0x13 << 5,
+    OPC_SYNC_WMB     = OPC_SYNC | 0x04 << 6,
+    OPC_SYNC_MB      = OPC_SYNC | 0x10 << 6,
+    OPC_SYNC_ACQUIRE = OPC_SYNC | 0x11 << 6,
+    OPC_SYNC_RELEASE = OPC_SYNC | 0x12 << 6,
+    OPC_SYNC_RMB     = OPC_SYNC | 0x13 << 6,
 
     /* Aliases for convenience.  */
     ALIAS_PADD     = sizeof(void *) == 4 ? OPC_ADDU : OPC_DADDU,
diff --git a/tests/acceptance/machine_sparc_leon3.py b/tests/acceptance/machine_sparc_leon3.py
index f77e210..2405cd7 100644
--- a/tests/acceptance/machine_sparc_leon3.py
+++ b/tests/acceptance/machine_sparc_leon3.py
@@ -7,12 +7,16 @@
 
 from avocado_qemu import Test
 from avocado_qemu import wait_for_console_pattern
+from avocado import skip
 
 
 class Leon3Machine(Test):
 
     timeout = 60
 
+    @skip("Test currently broken")
+    # A Window Underflow exception occurs before booting the kernel,
+    # and QEMU exit calling cpu_abort(), which makes this test to fail.
     def test_leon3_helenos_uimage(self):
         """
         :avocado: tags=arch:sparc
diff --git a/tests/acceptance/migration.py b/tests/acceptance/migration.py
index a8367ca..0365289 100644
--- a/tests/acceptance/migration.py
+++ b/tests/acceptance/migration.py
@@ -70,8 +70,8 @@
 
     @skipUnless(find_command('nc', default=False), "'nc' command not found")
     def test_migration_with_exec(self):
-        """
-        The test works for both netcat-traditional and netcat-openbsd packages
-        """
+        """The test works for both netcat-traditional and netcat-openbsd packages."""
         free_port = self._get_free_port()
         dest_uri = 'exec:nc -l localhost %u' % free_port
+        src_uri = 'exec:nc localhost %u' % free_port
+        self.do_migrate(dest_uri, src_uri)
diff --git a/tests/acceptance/ppc_prep_40p.py b/tests/acceptance/ppc_prep_40p.py
index b27572f..1515561 100644
--- a/tests/acceptance/ppc_prep_40p.py
+++ b/tests/acceptance/ppc_prep_40p.py
@@ -30,11 +30,12 @@
         :avocado: tags=machine:40p
         :avocado: tags=slowness:high
         """
-        bios_url = ('ftp://ftp.boulder.ibm.com/rs6000/firmware/'
+        bios_url = ('http://ftpmirror.your.org/pub/misc/'
+                    'ftp.software.ibm.com/rs6000/firmware/'
                     '7020-40p/P12H0456.IMG')
         bios_hash = '1775face4e6dc27f3a6ed955ef6eb331bf817f03'
         bios_path = self.fetch_asset(bios_url, asset_hash=bios_hash)
-        drive_url = ('https://ftp.netbsd.org/pub/NetBSD/NetBSD-archive/'
+        drive_url = ('https://cdn.netbsd.org/pub/NetBSD/NetBSD-archive/'
                      'NetBSD-4.0/prep/installation/floppy/generic_com0.fs')
         drive_hash = 'dbcfc09912e71bd5f0d82c7c1ee43082fb596ceb'
         drive_path = self.fetch_asset(drive_url, asset_hash=drive_hash)
@@ -66,7 +67,7 @@
         :avocado: tags=arch:ppc
         :avocado: tags=machine:40p
         """
-        drive_url = ('https://ftp.netbsd.org/pub/NetBSD/iso/7.1.2/'
+        drive_url = ('https://cdn.netbsd.org/pub/NetBSD/iso/7.1.2/'
                      'NetBSD-7.1.2-prep.iso')
         drive_hash = 'ac6fa2707d888b36d6fa64de6e7fe48e'
         drive_path = self.fetch_asset(drive_url, asset_hash=drive_hash,
diff --git a/tests/qemu-iotests/046 b/tests/qemu-iotests/046
index a066eec..ecbe5fc 100755
--- a/tests/qemu-iotests/046
+++ b/tests/qemu-iotests/046
@@ -193,8 +193,8 @@
 verify_io()
 {
     if ($QEMU_IMG info -U -f "$IMGFMT" "$TEST_IMG" | grep "compat: 0.10" > /dev/null); then
-        # For v2 images, discarded clusters are read from the backing file
-        # Keep the variable empty so that the backing file value can be used as
+        # In v2 images clusters are not discarded when there is a backing file.
+        # Keep the variable empty so that the previous value can be used as
         # the default below
         discarded=
     else
@@ -230,14 +230,16 @@
     echo read -P 70 0x78000 0x6000
     echo read -P 7  0x7e000 0x2000
 
-    echo read -P ${discarded:-8} 0x80000 0x6000
+    echo read -P ${discarded:-89} 0x80000 0x1000
+    echo read -P ${discarded:-8} 0x81000 0x5000
     echo read -P 80 0x86000 0x2000
     echo read -P ${discarded:-8} 0x88000 0x2000
     echo read -P 81 0x8a000 0xe000
     echo read -P 90 0x98000 0x6000
     echo read -P 9  0x9e000 0x2000
 
-    echo read -P ${discarded:-10} 0xa0000 0x6000
+    echo read -P ${discarded:-109} 0xa0000 0x1000
+    echo read -P ${discarded:-10} 0xa1000 0x5000
     echo read -P 100 0xa6000 0x2000
     echo read -P ${discarded:-10} 0xa8000 0x2000
     echo read -P 101 0xaa000 0xe000
diff --git a/tests/qemu-iotests/046.out b/tests/qemu-iotests/046.out
index ca2c740..7078304 100644
--- a/tests/qemu-iotests/046.out
+++ b/tests/qemu-iotests/046.out
@@ -187,8 +187,10 @@
 24 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 8192/8192 bytes at offset 516096
 8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-read 24576/24576 bytes at offset 524288
-24 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 4096/4096 bytes at offset 524288
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 20480/20480 bytes at offset 528384
+20 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 8192/8192 bytes at offset 548864
 8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 8192/8192 bytes at offset 557056
@@ -199,8 +201,10 @@
 24 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 8192/8192 bytes at offset 647168
 8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-read 24576/24576 bytes at offset 655360
-24 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 4096/4096 bytes at offset 655360
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 20480/20480 bytes at offset 659456
+20 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 8192/8192 bytes at offset 679936
 8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 8192/8192 bytes at offset 688128
diff --git a/tests/qemu-iotests/060 b/tests/qemu-iotests/060
index 043f129..32c0ecc 100755
--- a/tests/qemu-iotests/060
+++ b/tests/qemu-iotests/060
@@ -160,18 +160,16 @@
 
 $QEMU_IO -c 'write 0k 64k' "$BACKING_IMG" | _filter_qemu_io
 
-# compat=0.10 is required in order to make the following discard actually
-# unallocate the sector rather than make it a zero sector - we want COW, after
-# all.
-_make_test_img -o 'compat=0.10' -b "$BACKING_IMG" 1G
+_make_test_img -b "$BACKING_IMG" 1G
 # Write two clusters, the second one enforces creation of an L2 table after
 # the first data cluster.
 $QEMU_IO -c 'write 0k 64k' -c 'write 512M 64k' "$TEST_IMG" | _filter_qemu_io
-# Discard the first cluster. This cluster will soon enough be reallocated and
+# Free the first cluster. This cluster will soon enough be reallocated and
 # used for COW.
-$QEMU_IO -c 'discard 0k 64k' "$TEST_IMG" | _filter_qemu_io
+poke_file "$TEST_IMG" "$l2_offset" "\x00\x00\x00\x00\x00\x00\x00\x00"
+poke_file "$TEST_IMG" "$(($rb_offset+10))" "\x00\x00"
 # Now, corrupt the image by marking the second L2 table cluster as free.
-poke_file "$TEST_IMG" '131084' "\x00\x00" # 0x2000c
+poke_file "$TEST_IMG" "$(($rb_offset+12))" "\x00\x00"
 # Start a write operation requiring COW on the image stopping it right before
 # doing the read; then, trigger the corruption prevention by writing anything to
 # any unallocated cluster, leading to an attempt to overwrite the second L2
diff --git a/tests/qemu-iotests/060.out b/tests/qemu-iotests/060.out
index d27692a..09caaea 100644
--- a/tests/qemu-iotests/060.out
+++ b/tests/qemu-iotests/060.out
@@ -105,8 +105,6 @@
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 wrote 65536/65536 bytes at offset 536870912
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-discard 65536/65536 bytes at offset 0
-64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 qcow2: Marking image as corrupt: Preventing invalid write on metadata (overlaps with active L2 table); further corruption events will be suppressed
 blkdebug: Suspended request '0'
 write failed: Input/output error
diff --git a/tests/qemu-iotests/177 b/tests/qemu-iotests/177
index 752d29f..eadc2c7 100755
--- a/tests/qemu-iotests/177
+++ b/tests/qemu-iotests/177
@@ -89,8 +89,9 @@
 {
     if ($QEMU_IMG info -f "$IMGFMT" "$TEST_IMG" |
 	    grep "compat: 0.10" > /dev/null); then
-        # For v2 images, discarded clusters are read from the backing file
-        discarded=11
+        # In v2 images clusters are not discarded when there is a backing file
+        # so the previous value is read
+        discarded=22
     else
         # Discarded clusters are zeroed for v3 or later
         discarded=0
diff --git a/tests/qemu-iotests/244.out b/tests/qemu-iotests/244.out
index e6f4dc7..56329de 100644
--- a/tests/qemu-iotests/244.out
+++ b/tests/qemu-iotests/244.out
@@ -33,7 +33,7 @@
 Formatting 'TEST_DIR/t.IMGFMT.src', fmt=IMGFMT size=67108864
 wrote 1048576/1048576 bytes at offset 0
 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-qemu-img: error while writing sector 0: Operation not supported
+qemu-img: error while writing at byte 0: Operation not supported
 
 Convert uncompressed, then write compressed data manually:
 Images are identical.
diff --git a/tests/qemu-iotests/290 b/tests/qemu-iotests/290
new file mode 100755
index 0000000..776b65e
--- /dev/null
+++ b/tests/qemu-iotests/290
@@ -0,0 +1,97 @@
+#!/usr/bin/env bash
+#
+# Test how 'qemu-io -c discard' behaves on v2 and v3 qcow2 images
+#
+# Copyright (C) 2020 Igalia, S.L.
+# Author: Alberto Garcia <berto@igalia.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=berto@igalia.com
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+status=1    # failure is the default!
+
+_cleanup()
+{
+    _cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+
+_supported_fmt qcow2
+_supported_proto file
+_supported_os Linux
+_unsupported_imgopts 'compat=0.10' refcount_bits data_file
+
+echo
+echo "### Test 'qemu-io -c discard' on a QCOW2 image without a backing file"
+echo
+for qcow2_compat in 0.10 1.1; do
+    echo "# Create an image with compat=$qcow2_compat without a backing file"
+    _make_test_img -o "compat=$qcow2_compat" 128k
+
+    echo "# Fill all clusters with data and then discard them"
+    $QEMU_IO -c 'write -P 0x01 0 128k' "$TEST_IMG" | _filter_qemu_io
+    $QEMU_IO -c 'discard 0 128k' "$TEST_IMG" | _filter_qemu_io
+
+    echo "# Read the data from the discarded clusters"
+    $QEMU_IO -c 'read -P 0x00 0 128k' "$TEST_IMG" | _filter_qemu_io
+
+    echo "# Output of qemu-img map"
+    $QEMU_IMG map "$TEST_IMG" | _filter_testdir
+done
+
+echo
+echo "### Test 'qemu-io -c discard' on a QCOW2 image with a backing file"
+echo
+
+echo "# Create a backing image and fill it with data"
+BACKING_IMG="$TEST_IMG.base"
+TEST_IMG="$BACKING_IMG" _make_test_img 128k
+$QEMU_IO -c 'write -P 0xff 0 128k' "$BACKING_IMG" | _filter_qemu_io
+
+for qcow2_compat in 0.10 1.1; do
+    echo "# Create an image with compat=$qcow2_compat and a backing file"
+    _make_test_img -o "compat=$qcow2_compat" -b "$BACKING_IMG"
+
+    echo "# Fill all clusters with data and then discard them"
+    $QEMU_IO -c 'write -P 0x01 0 128k' "$TEST_IMG" | _filter_qemu_io
+    $QEMU_IO -c 'discard 0 128k' "$TEST_IMG" | _filter_qemu_io
+
+    echo "# Read the data from the discarded clusters"
+    if [ "$qcow2_compat" = "1.1" ]; then
+        # In qcow2 v3 clusters are zeroed (with QCOW_OFLAG_ZERO)
+        $QEMU_IO -c 'read -P 0x00 0 128k' "$TEST_IMG" | _filter_qemu_io
+    else
+        # In qcow2 v2 if there's a backing image we cannot zero the clusters
+        # without exposing the backing file data so discard does nothing
+        $QEMU_IO -c 'read -P 0x01 0 128k' "$TEST_IMG" | _filter_qemu_io
+    fi
+
+    echo "# Output of qemu-img map"
+    $QEMU_IMG map "$TEST_IMG" | _filter_testdir
+done
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/290.out b/tests/qemu-iotests/290.out
new file mode 100644
index 0000000..d2259c8
--- /dev/null
+++ b/tests/qemu-iotests/290.out
@@ -0,0 +1,61 @@
+QA output created by 290
+
+### Test 'qemu-io -c discard' on a QCOW2 image without a backing file
+
+# Create an image with compat=0.10 without a backing file
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=131072
+# Fill all clusters with data and then discard them
+wrote 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+discard 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+# Read the data from the discarded clusters
+read 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+# Output of qemu-img map
+Offset          Length          Mapped to       File
+# Create an image with compat=1.1 without a backing file
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=131072
+# Fill all clusters with data and then discard them
+wrote 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+discard 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+# Read the data from the discarded clusters
+read 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+# Output of qemu-img map
+Offset          Length          Mapped to       File
+
+### Test 'qemu-io -c discard' on a QCOW2 image with a backing file
+
+# Create a backing image and fill it with data
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=131072
+wrote 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+# Create an image with compat=0.10 and a backing file
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=131072 backing_file=TEST_DIR/t.IMGFMT.base
+# Fill all clusters with data and then discard them
+wrote 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+discard 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+# Read the data from the discarded clusters
+read 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+# Output of qemu-img map
+Offset          Length          Mapped to       File
+0               0x20000         0x50000         TEST_DIR/t.qcow2
+# Create an image with compat=1.1 and a backing file
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=131072 backing_file=TEST_DIR/t.IMGFMT.base
+# Fill all clusters with data and then discard them
+wrote 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+discard 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+# Read the data from the discarded clusters
+read 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+# Output of qemu-img map
+Offset          Length          Mapped to       File
+*** done
diff --git a/tests/qemu-iotests/common.pattern b/tests/qemu-iotests/common.pattern
index 4f5e5bc..4caa5de 100644
--- a/tests/qemu-iotests/common.pattern
+++ b/tests/qemu-iotests/common.pattern
@@ -23,7 +23,7 @@
     local count=$4
 
     for ((i=1;i<=$count;i++)); do
-        echo alloc $(( start + (i - 1) * step )) $size
+        echo "alloc $(( start + (i - 1) * step )) $size"
     done
 }
 
@@ -39,9 +39,9 @@
     local count=$5
     local pattern=$6
 
-    echo === IO: pattern $pattern >&2
+    echo "=== IO: pattern $pattern" >&2
     for ((i=1;i<=$count;i++)); do
-        echo $op -P $pattern $(( start + (i - 1) * step )) $size
+        echo "$op -P $pattern $(( start + (i - 1) * step )) $size"
     done
 }
 
@@ -110,31 +110,31 @@
     #        free - free - compressed
 
     # Write the clusters to be compressed
-    echo === Clusters to be compressed [1]
+    echo '=== Clusters to be compressed [1]'
     io_pattern writev $((offset + 4 * $cluster_size)) $cluster_size $((9 * $cluster_size)) $num 165
-    echo === Clusters to be compressed [2]
+    echo '=== Clusters to be compressed [2]'
     io_pattern writev $((offset + 5 * $cluster_size)) $cluster_size $((9 * $cluster_size)) $num 165
-    echo === Clusters to be compressed [3]
+    echo '=== Clusters to be compressed [3]'
     io_pattern writev $((offset + 8 * $cluster_size)) $cluster_size $((9 * $cluster_size)) $num 165
 
     mv "$TEST_IMG" "$TEST_IMG.orig"
     $QEMU_IMG convert -f $IMGFMT -O $IMGFMT -c "$TEST_IMG.orig" "$TEST_IMG"
 
     # Write the used clusters
-    echo === Used clusters [1]
+    echo '=== Used clusters [1]'
     io_pattern writev $((offset + 0 * $cluster_size)) $cluster_size $((9 * $cluster_size)) $num 165
-    echo === Used clusters [2]
+    echo '=== Used clusters [2]'
     io_pattern writev $((offset + 1 * $cluster_size)) $cluster_size $((9 * $cluster_size)) $num 165
-    echo === Used clusters [3]
+    echo '=== Used clusters [3]'
     io_pattern writev $((offset + 3 * $cluster_size)) $cluster_size $((9 * $cluster_size)) $num 165
 
     # Read them
-    echo === Read used/compressed clusters
+    echo '=== Read used/compressed clusters'
     io_pattern readv $((offset + 0 * $cluster_size)) $((2 * $cluster_size)) $((9 * $cluster_size)) $num 165
     io_pattern readv $((offset + 3 * $cluster_size)) $((3 * $cluster_size)) $((9 * $cluster_size)) $num 165
     io_pattern readv $((offset + 8 * $cluster_size)) $((1 * $cluster_size)) $((9 * $cluster_size)) $num 165
 
-    echo === Read zeros
+    echo '=== Read zeros'
     io_zero readv $((offset + 2 * $cluster_size)) $((1 * $cluster_size)) $((9 * $cluster_size)) $num
     io_zero readv $((offset + 6 * $cluster_size)) $((2 * $cluster_size)) $((9 * $cluster_size)) $num
 }
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 79c6dfc..435dccd 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -296,3 +296,4 @@
 286 rw quick
 288 quick
 289 rw quick
+290 rw auto quick
diff --git a/tests/tcg/x86_64/system/boot.S b/tests/tcg/x86_64/system/boot.S
index 205cfbd..73b19a2 100644
--- a/tests/tcg/x86_64/system/boot.S
+++ b/tests/tcg/x86_64/system/boot.S
@@ -41,10 +41,7 @@
 #define XEN_ELFNOTE_PHYS32_ENTRY  18
 
 #define __ASM_FORM(x)	x
-#define __ASM_FORM_RAW(x)     x
-#define __ASM_FORM_COMMA(x) x,
-#define __ASM_SEL(a,b)           __ASM_FORM(b)
-#define __ASM_SEL_RAW(a,b)      __ASM_FORM_RAW(b)
+#define __ASM_SEL(a,b)  __ASM_FORM(b)
 #define _ASM_PTR	__ASM_SEL(.long, .quad)
 
 	ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE,      _ASM_PTR 0x100000)
diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c
index 4eeb184..7519847 100644
--- a/tests/test-blockjob.c
+++ b/tests/test-blockjob.c
@@ -367,7 +367,9 @@
     aio_poll(qemu_get_aio_context(), true);
     assert(job->status == JOB_STATUS_PENDING);
 
+    aio_context_acquire(job->aio_context);
     job_finalize(job, &error_abort);
+    aio_context_release(job->aio_context);
     assert(job->status == JOB_STATUS_CONCLUDED);
 
     cancel_common(s);
diff --git a/util/Makefile.objs b/util/Makefile.objs
index 6718a38..fe339c2 100644
--- a/util/Makefile.objs
+++ b/util/Makefile.objs
@@ -63,3 +63,4 @@
 util-obj-$(CONFIG_GIO) += dbus.o
 dbus.o-cflags = $(GIO_CFLAGS)
 dbus.o-libs = $(GIO_LIBS)
+util-obj-$(CONFIG_USER_ONLY) += selfmap.o
diff --git a/util/aio-posix.c b/util/aio-posix.c
index cd6cf0a..c3613d2 100644
--- a/util/aio-posix.c
+++ b/util/aio-posix.c
@@ -559,6 +559,11 @@
     int64_t timeout;
     int64_t start = 0;
 
+    /*
+     * There cannot be two concurrent aio_poll calls for the same AioContext (or
+     * an aio_poll concurrent with a GSource prepare/check/dispatch callback).
+     * We rely on this below to avoid slow locked accesses to ctx->notify_me.
+     */
     assert(in_aio_context_home_thread(ctx));
 
     /* aio_notify can avoid the expensive event_notifier_set if
@@ -569,7 +574,13 @@
      * so disable the optimization now.
      */
     if (blocking) {
-        atomic_add(&ctx->notify_me, 2);
+        atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2);
+        /*
+         * Write ctx->notify_me before computing the timeout
+         * (reading bottom half flags, etc.).  Pairs with
+         * smp_mb in aio_notify().
+         */
+        smp_mb();
     }
 
     qemu_lockcnt_inc(&ctx->list_lock);
@@ -590,7 +601,8 @@
     }
 
     if (blocking) {
-        atomic_sub(&ctx->notify_me, 2);
+        /* Finish the poll before clearing the flag.  */
+        atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) - 2);
         aio_notify_accept(ctx);
     }
 
diff --git a/util/aio-win32.c b/util/aio-win32.c
index a23b9c3..729d533 100644
--- a/util/aio-win32.c
+++ b/util/aio-win32.c
@@ -321,6 +321,12 @@
     int count;
     int timeout;
 
+    /*
+     * There cannot be two concurrent aio_poll calls for the same AioContext (or
+     * an aio_poll concurrent with a GSource prepare/check/dispatch callback).
+     * We rely on this below to avoid slow locked accesses to ctx->notify_me.
+     */
+    assert(in_aio_context_home_thread(ctx));
     progress = false;
 
     /* aio_notify can avoid the expensive event_notifier_set if
@@ -331,7 +337,13 @@
      * so disable the optimization now.
      */
     if (blocking) {
-        atomic_add(&ctx->notify_me, 2);
+        atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2);
+        /*
+         * Write ctx->notify_me before computing the timeout
+         * (reading bottom half flags, etc.).  Pairs with
+         * smp_mb in aio_notify().
+         */
+        smp_mb();
     }
 
     qemu_lockcnt_inc(&ctx->list_lock);
@@ -364,8 +376,7 @@
         ret = WaitForMultipleObjects(count, events, FALSE, timeout);
         if (blocking) {
             assert(first);
-            assert(in_aio_context_home_thread(ctx));
-            atomic_sub(&ctx->notify_me, 2);
+            atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) - 2);
             aio_notify_accept(ctx);
         }
 
diff --git a/util/async.c b/util/async.c
index b94518b..3165a28 100644
--- a/util/async.c
+++ b/util/async.c
@@ -249,7 +249,14 @@
 {
     AioContext *ctx = (AioContext *) source;
 
-    atomic_or(&ctx->notify_me, 1);
+    atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) | 1);
+
+    /*
+     * Write ctx->notify_me before computing the timeout
+     * (reading bottom half flags, etc.).  Pairs with
+     * smp_mb in aio_notify().
+     */
+    smp_mb();
 
     /* We assume there is no timeout already supplied */
     *timeout = qemu_timeout_ns_to_ms(aio_compute_timeout(ctx));
@@ -268,7 +275,8 @@
     QEMUBH *bh;
     BHListSlice *s;
 
-    atomic_and(&ctx->notify_me, ~1);
+    /* Finish computing the timeout before clearing the flag.  */
+    atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) & ~1);
     aio_notify_accept(ctx);
 
     QSLIST_FOREACH_RCU(bh, &ctx->bh_list, next) {
@@ -411,10 +419,10 @@
 void aio_notify(AioContext *ctx)
 {
     /* Write e.g. bh->scheduled before reading ctx->notify_me.  Pairs
-     * with atomic_or in aio_ctx_prepare or atomic_add in aio_poll.
+     * with smp_mb in aio_ctx_prepare or aio_poll.
      */
     smp_mb();
-    if (ctx->notify_me) {
+    if (atomic_read(&ctx->notify_me)) {
         event_notifier_set(&ctx->notifier);
         atomic_mb_set(&ctx->notified, true);
     }
diff --git a/util/fdmon-io_uring.c b/util/fdmon-io_uring.c
index b4d6109..d5a80ed 100644
--- a/util/fdmon-io_uring.c
+++ b/util/fdmon-io_uring.c
@@ -88,7 +88,10 @@
     }
 
     /* No free sqes left, submit pending sqes first */
-    ret = io_uring_submit(ring);
+    do {
+        ret = io_uring_submit(ring);
+    } while (ret == -EINTR);
+
     assert(ret > 1);
     sqe = io_uring_get_sqe(ring);
     assert(sqe);
@@ -282,7 +285,10 @@
 
     fill_sq_ring(ctx);
 
-    ret = io_uring_submit_and_wait(&ctx->fdmon_io_uring, wait_nr);
+    do {
+        ret = io_uring_submit_and_wait(&ctx->fdmon_io_uring, wait_nr);
+    } while (ret == -EINTR);
+
     assert(ret >= 0);
 
     return process_cq_ring(ctx, ready_list);
diff --git a/util/selfmap.c b/util/selfmap.c
new file mode 100644
index 0000000..2ec99df
--- /dev/null
+++ b/util/selfmap.c
@@ -0,0 +1,78 @@
+/*
+ * Utility function to get QEMU's own process map
+ *
+ * Copyright (c) 2020 Linaro Ltd
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include "qemu/selfmap.h"
+
+GSList *read_self_maps(void)
+{
+    gchar *maps;
+    GSList *map_info = NULL;
+
+    if (g_file_get_contents("/proc/self/maps", &maps, NULL, NULL)) {
+        gchar **lines = g_strsplit(maps, "\n", 0);
+        int i, entries = g_strv_length(lines);
+
+        for (i = 0; i < entries; i++) {
+            gchar **fields = g_strsplit(lines[i], " ", 6);
+            if (g_strv_length(fields) > 4) {
+                MapInfo *e = g_new0(MapInfo, 1);
+                int errors;
+                const char *end;
+
+                errors  = qemu_strtoul(fields[0], &end, 16, &e->start);
+                errors += qemu_strtoul(end + 1, NULL, 16, &e->end);
+
+                e->is_read  = fields[1][0] == 'r';
+                e->is_write = fields[1][1] == 'w';
+                e->is_exec  = fields[1][2] == 'x';
+                e->is_priv  = fields[1][3] == 'p';
+
+                errors += qemu_strtoul(fields[2], NULL, 16, &e->offset);
+                e->dev = g_strdup(fields[3]);
+                errors += qemu_strtou64(fields[4], NULL, 10, &e->inode);
+
+                /*
+                 * The last field may have leading spaces which we
+                 * need to strip.
+                 */
+                if (g_strv_length(fields) == 6) {
+                    e->path = g_strdup(g_strchug(fields[5]));
+                }
+                map_info = g_slist_prepend(map_info, e);
+            }
+
+            g_strfreev(fields);
+        }
+        g_strfreev(lines);
+        g_free(maps);
+    }
+
+    /* ensure the map data is in the same order we collected it */
+    return g_slist_reverse(map_info);
+}
+
+/**
+ * free_self_maps:
+ * @info: a GSlist
+ *
+ * Free a list of MapInfo structures.
+ */
+static void free_info(gpointer data)
+{
+    MapInfo *e = (MapInfo *) data;
+    g_free(e->dev);
+    g_free(e->path);
+    g_free(e);
+}
+
+void free_self_maps(GSList *info)
+{
+    g_slist_free_full(info, &free_info);
+}