Merge tag 'for-upstream' of https://repo.or.cz/qemu/kevin into staging Block layer patches - Fix crash with unaligned prefetch requests (e.g. in stream jobs) - vdpa-dev: Fix initialisation order to restore VDUSE compatibility - iotests fixes # -----BEGIN PGP SIGNATURE----- # # iQJFBAABCAAvFiEE3D3rFZqa+V09dFb+fwmycsiPL9YFAmYCzGQRHGt3b2xmQHJl # ZGhhdC5jb20ACgkQfwmycsiPL9YYtg//XCFC4ANO1wVw6mqByvbPEi4rG9dYiTcz # WCgS5QOVx8YMsxvyU7xg7vqT3V+7888UFx/cNC58Z5sY3TmohLus/a3Udap3ywpY # RM+Lg8qwPEyFrmATyoU41sms7q8a8V7pwPm4nHXSP7O3npS/7hRMoET4ZFLJrVUk # 72oNCHHpLeB8nnEXMvDXfMXmkHv9HthygKXlQBxX/WnjQQZObvfLsrUzk+gqUmzy # hF9ojN5jrorydI/9lbkHaFkLc6+qOVxQRqrRjjPeKt/SuqJjAhQko0QU1jWjzcm9 # 5W7hQQluVB6B7Eol/ujOzSp6wxabxT/HRq2kwLwuWW6qpNNKIECCxrUyFR4WWEt9 # TI3DDsniq/bbd+CBtL1t68PhN9S7gGorA8UPfwfgp75N3BmEPhe10BEmazpjvLCC # zTmJYgpvDOJdwhM3loeli/2CA9l4xF0jTQWfry1vaaddC4wQKmK+mJaDhCvns/RL # MPCMaZ0kuGKdFSAIqshbffEaTdOk5liuQ5l45AtUyXkZh+mcR8tjmb59RfnNCZRc # 2N8MvDF03RyUVplD3fsnapiTc+Yfzkdxc93dUGybfolvPecp+xrgQojrTyDOhCTY # ZQXjgEPNkkhMukcnRWPrG2BzQxXKeUODTFetUTjpsvGCt2RttW4EuAP000JMd7Tl # DziVCvmn6c0= # =OIUm # -----END PGP SIGNATURE----- # gpg: Signature made Tue 26 Mar 2024 13:23:48 GMT # gpg: using RSA key DC3DEB159A9AF95D3D7456FE7F09B272C88F2FD6 # gpg: issuer "kwolf@redhat.com" # gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>" [full] # Primary key fingerprint: DC3D EB15 9A9A F95D 3D74 56FE 7F09 B272 C88F 2FD6 * tag 'for-upstream' of https://repo.or.cz/qemu/kevin: iotests: add test for stream job with an unaligned prefetch read block-backend: fix edge case in bdrv_next_cleanup() where BDS associated to BB changes block-backend: fix edge case in bdrv_next() where BDS associated to BB changes block/io: accept NULL qiov in bdrv_pad_request vdpa-dev: Fix initialisation order to restore VDUSE compatibility tests/qemu-iotests: Test 157 and 227 require virtio-blk Signed-off-by: Peter Maydell <peter.maydell@linaro.org>

commit: 5107022a616247216a7f7338bd7c62b4399d89eb [log] [tgz]
author: Peter Maydell <peter.maydell@linaro.org> Tue Mar 26 15:50:19 2024 +0000
committer: Peter Maydell <peter.maydell@linaro.org> Tue Mar 26 15:50:19 2024 +0000
tree: 58f6a9106e9cfa68f4c6f5d23d94b962ac041393
parent: 096ae430a7b5a704af4cd94dca7200d6cb069991 [diff]
parent: 12d7b3bbd3333cededd3b695501d8d247239d769 [diff]
diff --git a/block/block-backend.c b/block/block-backend.c
index 9c4de79..db6f9b9 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c

@@ -599,14 +599,14 @@
     /* Must be called from the main loop */
     assert(qemu_get_current_aio_context() == qemu_get_aio_context());
 
+    old_bs = it->bs;
+
     /* First, return all root nodes of BlockBackends. In order to avoid
      * returning a BDS twice when multiple BBs refer to it, we only return it
      * if the BB is the first one in the parent list of the BDS. */
     if (it->phase == BDRV_NEXT_BACKEND_ROOTS) {
         BlockBackend *old_blk = it->blk;
 
-        old_bs = old_blk ? blk_bs(old_blk) : NULL;
-
         do {
             it->blk = blk_all_next(it->blk);
             bs = it->blk ? blk_bs(it->blk) : NULL;
@@ -620,11 +620,10 @@
         if (bs) {
             bdrv_ref(bs);
             bdrv_unref(old_bs);
+            it->bs = bs;
             return bs;
         }
         it->phase = BDRV_NEXT_MONITOR_OWNED;
-    } else {
-        old_bs = it->bs;
     }
 
     /* Then return the monitor-owned BDSes without a BB attached. Ignore all
@@ -664,13 +663,10 @@
     /* Must be called from the main loop */
     assert(qemu_get_current_aio_context() == qemu_get_aio_context());
 
-    if (it->phase == BDRV_NEXT_BACKEND_ROOTS) {
-        if (it->blk) {
-            bdrv_unref(blk_bs(it->blk));
-            blk_unref(it->blk);
-        }
-    } else {
-        bdrv_unref(it->bs);
+    bdrv_unref(it->bs);
+
+    if (it->phase == BDRV_NEXT_BACKEND_ROOTS && it->blk) {
+        blk_unref(it->blk);
     }
 
     bdrv_next_reset(it);

diff --git a/block/io.c b/block/io.c
index 33150c0..395bea3 100644
--- a/block/io.c
+++ b/block/io.c

@@ -1726,22 +1726,29 @@
         return 0;
     }
 
-    sliced_iov = qemu_iovec_slice(*qiov, *qiov_offset, *bytes,
-                                  &sliced_head, &sliced_tail,
-                                  &sliced_niov);
+    /*
+     * For prefetching in stream_populate(), no qiov is passed along, because
+     * only copy-on-read matters.
+     */
+    if (qiov && *qiov) {
+        sliced_iov = qemu_iovec_slice(*qiov, *qiov_offset, *bytes,
+                                      &sliced_head, &sliced_tail,
+                                      &sliced_niov);
 
-    /* Guaranteed by bdrv_check_request32() */
-    assert(*bytes <= SIZE_MAX);
-    ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov,
-                                  sliced_head, *bytes);
-    if (ret < 0) {
-        bdrv_padding_finalize(pad);
-        return ret;
+        /* Guaranteed by bdrv_check_request32() */
+        assert(*bytes <= SIZE_MAX);
+        ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov,
+                                      sliced_head, *bytes);
+        if (ret < 0) {
+            bdrv_padding_finalize(pad);
+            return ret;
+        }
+        *qiov = &pad->local_qiov;
+        *qiov_offset = 0;
     }
+
     *bytes += pad->head + pad->tail;
     *offset -= pad->head;
-    *qiov = &pad->local_qiov;
-    *qiov_offset = 0;
     if (padded) {
         *padded = true;
     }

diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index e8e1661..fd1a937 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c

@@ -541,6 +541,16 @@
     VHostNetState *net = get_vhost_net(nc);
     const VhostOps *vhost_ops = net->dev.vhost_ops;
 
+    /*
+     * vhost-vdpa network devices need to enable dataplane virtqueues after
+     * DRIVER_OK, so they can recover device state before starting dataplane.
+     * Because of that, we don't enable virtqueues here and leave it to
+     * net/vhost-vdpa.c.
+     */
+    if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
+        return 0;
+    }
+
     nc->vring_enable = enable;
 
     if (vhost_ops && vhost_ops->vhost_set_vring_enable) {

diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
index 13b6991..96632fd 100644
--- a/hw/virtio/trace-events
+++ b/hw/virtio/trace-events

@@ -49,7 +49,7 @@
 vhost_vdpa_get_device_id(void *dev, uint32_t device_id) "dev: %p device_id %"PRIu32
 vhost_vdpa_reset_device(void *dev) "dev: %p"
 vhost_vdpa_get_vq_index(void *dev, int idx, int vq_idx) "dev: %p idx: %d vq idx: %d"
-vhost_vdpa_set_vring_ready(void *dev, unsigned i, int r) "dev: %p, idx: %u, r: %d"
+vhost_vdpa_set_vring_enable_one(void *dev, unsigned i, int enable, int r) "dev: %p, idx: %u, enable: %u, r: %d"
 vhost_vdpa_dump_config(void *dev, const char *line) "dev: %p %s"
 vhost_vdpa_set_config(void *dev, uint32_t offset, uint32_t size, uint32_t flags) "dev: %p offset: %"PRIu32" size: %"PRIu32" flags: 0x%"PRIx32
 vhost_vdpa_get_config(void *dev, void *config, uint32_t config_len) "dev: %p config: %p config_len: %"PRIu32

diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c
index eb9ecea..13e87f0 100644
--- a/hw/virtio/vdpa-dev.c
+++ b/hw/virtio/vdpa-dev.c

@@ -253,14 +253,11 @@
 
     s->dev.acked_features = vdev->guest_features;
 
-    ret = vhost_dev_start(&s->dev, vdev, false);
+    ret = vhost_dev_start(&s->dev, vdev, true);
     if (ret < 0) {
         error_setg_errno(errp, -ret, "Error starting vhost");
         goto err_guest_notifiers;
     }
-    for (i = 0; i < s->dev.nvqs; ++i) {
-        vhost_vdpa_set_vring_ready(&s->vdpa, i);
-    }
     s->started = true;
 
     /*

diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 3bcd05c..e827b91 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c

@@ -896,19 +896,41 @@
     return idx;
 }
 
-int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, unsigned idx)
+static int vhost_vdpa_set_vring_enable_one(struct vhost_vdpa *v, unsigned idx,
+                                           int enable)
 {
     struct vhost_dev *dev = v->dev;
     struct vhost_vring_state state = {
         .index = idx,
-        .num = 1,
+        .num = enable,
     };
     int r = vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state);
 
-    trace_vhost_vdpa_set_vring_ready(dev, idx, r);
+    trace_vhost_vdpa_set_vring_enable_one(dev, idx, enable, r);
     return r;
 }
 
+static int vhost_vdpa_set_vring_enable(struct vhost_dev *dev, int enable)
+{
+    struct vhost_vdpa *v = dev->opaque;
+    unsigned int i;
+    int ret;
+
+    for (i = 0; i < dev->nvqs; ++i) {
+        ret = vhost_vdpa_set_vring_enable_one(v, i, enable);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    return 0;
+}
+
+int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, unsigned idx)
+{
+    return vhost_vdpa_set_vring_enable_one(v, idx, 1);
+}
+
 static int vhost_vdpa_set_config_call(struct vhost_dev *dev,
                                        int fd)
 {
@@ -1536,6 +1558,7 @@
         .vhost_set_features = vhost_vdpa_set_features,
         .vhost_reset_device = vhost_vdpa_reset_device,
         .vhost_get_vq_index = vhost_vdpa_get_vq_index,
+        .vhost_set_vring_enable = vhost_vdpa_set_vring_enable,
         .vhost_get_config  = vhost_vdpa_get_config,
         .vhost_set_config = vhost_vdpa_set_config,
         .vhost_requires_shm_log = NULL,

diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 2e4e040..f50180e 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c

@@ -1984,7 +1984,13 @@
     return hdev->vhost_ops->vhost_set_vring_enable(hdev, enable);
 }
 
-/* Host notifiers must be enabled at this point. */
+/*
+ * Host notifiers must be enabled at this point.
+ *
+ * If @vrings is true, this function will enable all vrings before starting the
+ * device. If it is false, the vring initialization is left to be done by the
+ * caller.
+ */
 int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
 {
     int i, r;

diff --git a/tests/qemu-iotests/157 b/tests/qemu-iotests/157
index 0dc9ba6..aa2ebbf 100755
--- a/tests/qemu-iotests/157
+++ b/tests/qemu-iotests/157

@@ -40,6 +40,8 @@
 _supported_fmt generic
 _supported_proto file
 
+_require_devices virtio-blk
+
 do_run_qemu()
 {
     (

diff --git a/tests/qemu-iotests/227 b/tests/qemu-iotests/227
index 7e45a47..eddaad6 100755
--- a/tests/qemu-iotests/227
+++ b/tests/qemu-iotests/227

@@ -40,6 +40,8 @@
 _supported_fmt generic
 _supported_proto file
 
+_require_devices virtio-blk
+
 do_run_qemu()
 {
     echo Testing: "$@"

diff --git a/tests/qemu-iotests/tests/stream-unaligned-prefetch b/tests/qemu-iotests/tests/stream-unaligned-prefetch
new file mode 100755
index 0000000..546db1d
--- /dev/null
+++ b/tests/qemu-iotests/tests/stream-unaligned-prefetch

@@ -0,0 +1,86 @@
+#!/usr/bin/env python3
+# group: rw quick
+#
+# Test what happens when a stream job does an unaligned prefetch read
+# which requires padding while having a NULL qiov.
+#
+# Copyright (C) Proxmox Server Solutions GmbH
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import os
+import iotests
+from iotests import imgfmt, qemu_img_create, qemu_io, QMPTestCase
+
+image_size = 1 * 1024 * 1024
+cluster_size = 64 * 1024
+base = os.path.join(iotests.test_dir, 'base.img')
+top = os.path.join(iotests.test_dir, 'top.img')
+
+class TestStreamUnalignedPrefetch(QMPTestCase):
+    def setUp(self) -> None:
+        """
+        Create two images:
+        - base image {base} with {cluster_size // 2} bytes allocated
+        - top image {top} without any data allocated and coarser
+          cluster size
+
+        Attach a compress filter for the top image, because that
+        requires that the request alignment is the top image's cluster
+        size.
+        """
+        qemu_img_create('-f', imgfmt,
+                        '-o', 'cluster_size={}'.format(cluster_size // 2),
+                        base, str(image_size))
+        qemu_io('-c', f'write 0 {cluster_size // 2}', base)
+        qemu_img_create('-f', imgfmt,
+                        '-o', 'cluster_size={}'.format(cluster_size),
+                        top, str(image_size))
+
+        self.vm = iotests.VM()
+        self.vm.add_blockdev(self.vm.qmp_to_opts({
+            'driver': imgfmt,
+            'node-name': 'base',
+            'file': {
+                'driver': 'file',
+                'filename': base
+            }
+        }))
+        self.vm.add_blockdev(self.vm.qmp_to_opts({
+            'driver': 'compress',
+            'node-name': 'compress-top',
+            'file': {
+                'driver': imgfmt,
+                'node-name': 'top',
+                'file': {
+                    'driver': 'file',
+                    'filename': top
+                },
+                'backing': 'base'
+            }
+        }))
+        self.vm.launch()
+
+    def tearDown(self) -> None:
+        self.vm.shutdown()
+        os.remove(top)
+        os.remove(base)
+
+    def test_stream_unaligned_prefetch(self) -> None:
+        self.vm.cmd('block-stream', job_id='stream', device='compress-top')
+
+
+if __name__ == '__main__':
+    iotests.main(supported_fmts=['qcow2'], supported_protocols=['file'])

diff --git a/tests/qemu-iotests/tests/stream-unaligned-prefetch.out b/tests/qemu-iotests/tests/stream-unaligned-prefetch.out
new file mode 100644
index 0000000..ae1213e
--- /dev/null
+++ b/tests/qemu-iotests/tests/stream-unaligned-prefetch.out

@@ -0,0 +1,5 @@
+.
+----------------------------------------------------------------------
+Ran 1 tests
+
+OK
commit	5107022a616247216a7f7338bd7c62b4399d89eb	[log] [tgz]
author	Peter Maydell <peter.maydell@linaro.org>	Tue Mar 26 15:50:19 2024 +0000
committer	Peter Maydell <peter.maydell@linaro.org>	Tue Mar 26 15:50:19 2024 +0000
tree	58f6a9106e9cfa68f4c6f5d23d94b962ac041393
parent	096ae430a7b5a704af4cd94dca7200d6cb069991 [diff]
parent	12d7b3bbd3333cededd3b695501d8d247239d769 [diff]