Merge tag 'migration-20230412-pull-request' of https://gitlab.com/juan.quintela/qemu into staging

Migration Pull request for 8.0

Last patches found:
- peter xu preempt channel fixes.
  needed for backward compatibility with old machine types.
- lukas fix to get compress working again.

- fix ram on s390x.  Get back to the old code, even when it shouldn't
  be needed, but as it fails on s390x, just revert.

Later, Juan.

# -----BEGIN PGP SIGNATURE-----
#
# iQIzBAABCAAdFiEEGJn/jt6/WMzuA0uC9IfvGFhy1yMFAmQ3HgQACgkQ9IfvGFhy
# 1yPXGQ/+Pf6HepNUlIr7naYOcpRriXPQF+q1zqo74F9fy2vrGcwJOI6qmRTjsX4E
# 9KgXipOz7+b5wSemF7PDKcnBiwyt6UHCH+XXe0h4TpyuORbtABKRgtOhA1/sa84D
# HnKp0TwImpAO26tzPa7u49aau/EEVBKAzFVcyn4w56S9qiDWicOpd5kG0CJBIsMJ
# Mnvy5fXaqQRewnKiwFoJGWfyhzEToDO6Z/SkT5xYON94P+eiM2xMwXOC5WcGfmY7
# wFGDB+SuyEP8TTn7mV0mmnlFjYe4G07hVARHSDFX3ho4b6q5F+WzfW095G6QKiu9
# n3Pzr7IBGX3sgetPtYwOwGsE9JrfHMFzBRxQZZwq5GSmjk7+agkbXmV7RyV82EYs
# KYOhuNF91ca0qvCrGA/eGbbJqVrd7SR5FhS4SQ7oKd5n2au/ZHoKwAgm5lBdcvES
# 2TB0MBN1s0JPh6KMV8tPB2miZyqPRa++oA8qIX7Asoe1X4xVT1FwiDaFL8TO8i2A
# 7uBis3KLZqOHC6dAiXlCDtaADAWgQxjcdoS1l8jTF6MgBSe+zQhXG+pcIDuSiV9N
# WfDiUPY97iqPTvpzdz3Is+LbBax2uY5ZR05KSdmCBpIgfvSWMqXtwRydclt6G5h7
# ZiOcTwrgMpXdbhdsFZTqVWAJG2sTkj4TA+IezVpXzPeQNLZ+T8k=
# =kW3P
# -----END PGP SIGNATURE-----
# gpg: Signature made Wed 12 Apr 2023 22:09:24 BST
# gpg:                using RSA key 1899FF8EDEBF58CCEE034B82F487EF185872D723
# gpg: Good signature from "Juan Quintela <quintela@redhat.com>" [full]
# gpg:                 aka "Juan Quintela <quintela@trasno.org>" [full]
# Primary key fingerprint: 1899 FF8E DEBF 58CC EE03  4B82 F487 EF18 5872 D723

* tag 'migration-20230412-pull-request' of https://gitlab.com/juan.quintela/qemu:
  migration: fix ram_state_pending_exact()
  migration/ram.c: Fix migration with compress enabled
  migration: Recover behavior of preempt channel creation for pre-7.2
  migration: Fix potential race on postcopy_qemufile_src
  io: tls: Inherit QIO_CHANNEL_FEATURE_SHUTDOWN on server side

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 45e3d24..cd13b8b 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -42,6 +42,7 @@
 GlobalProperty hw_compat_7_2[] = {
     { "e1000e", "migrate-timadj", "off" },
     { "virtio-mem", "x-early-migration", "false" },
+    { "migration", "x-preempt-pre-7-2", "true" },
 };
 const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2);
 
diff --git a/io/channel-tls.c b/io/channel-tls.c
index 5a7a3d4..9805dd0 100644
--- a/io/channel-tls.c
+++ b/io/channel-tls.c
@@ -74,6 +74,9 @@
     ioc = QIO_CHANNEL_TLS(object_new(TYPE_QIO_CHANNEL_TLS));
 
     ioc->master = master;
+    if (qio_channel_has_feature(master, QIO_CHANNEL_FEATURE_SHUTDOWN)) {
+        qio_channel_set_feature(QIO_CHANNEL(ioc), QIO_CHANNEL_FEATURE_SHUTDOWN);
+    }
     object_ref(OBJECT(master));
 
     ioc->session = qcrypto_tls_session_new(
diff --git a/migration/migration.c b/migration/migration.c
index ae2025d..bda4789 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -3464,8 +3464,12 @@
         qemu_savevm_state_complete_postcopy(s->to_dst_file);
         qemu_mutex_unlock_iothread();
 
-        /* Shutdown the postcopy fast path thread */
-        if (migrate_postcopy_preempt()) {
+        /*
+         * Shutdown the postcopy fast path thread.  This is only needed
+         * when dest QEMU binary is old (7.1/7.2).  QEMU 8.0+ doesn't need
+         * this.
+         */
+        if (migrate_postcopy_preempt() && s->preempt_pre_7_2) {
             postcopy_preempt_shutdown_file(s);
         }
 
@@ -4384,6 +4388,15 @@
         }
     }
 
+    /*
+     * This needs to be done before resuming a postcopy.  Note: for newer
+     * QEMUs we will delay the channel creation until postcopy_start(), to
+     * avoid disorder of channel creations.
+     */
+    if (migrate_postcopy_preempt() && s->preempt_pre_7_2) {
+        postcopy_preempt_setup(s);
+    }
+
     if (resume) {
         /* Wakeup the main migration thread to do the recovery */
         migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_PAUSED,
@@ -4443,6 +4456,8 @@
                       decompress_error_check, true),
     DEFINE_PROP_UINT8("x-clear-bitmap-shift", MigrationState,
                       clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT),
+    DEFINE_PROP_BOOL("x-preempt-pre-7-2", MigrationState,
+                     preempt_pre_7_2, false),
 
     /* Migration parameters */
     DEFINE_PROP_UINT8("x-compress-level", MigrationState,
diff --git a/migration/migration.h b/migration/migration.h
index 2da2f8a..310ae89 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -65,6 +65,12 @@
     bool all_zero;
 } PostcopyTmpPage;
 
+typedef enum {
+    PREEMPT_THREAD_NONE = 0,
+    PREEMPT_THREAD_CREATED,
+    PREEMPT_THREAD_QUIT,
+} PreemptThreadStatus;
+
 /* State for the incoming migration */
 struct MigrationIncomingState {
     QEMUFile *from_src_file;
@@ -124,7 +130,12 @@
     QemuSemaphore postcopy_qemufile_dst_done;
     /* Postcopy priority thread is used to receive postcopy requested pages */
     QemuThread postcopy_prio_thread;
-    bool postcopy_prio_thread_created;
+    /*
+     * Always set by the main vm load thread only, but can be read by the
+     * postcopy preempt thread.  "volatile" makes sure all reads will be
+     * uptodate across cores.
+     */
+    volatile PreemptThreadStatus preempt_thread_status;
     /*
      * Used to sync between the ram load main thread and the fast ram load
      * thread.  It protects postcopy_qemufile_dst, which is the postcopy
@@ -364,6 +375,34 @@
      * do not trigger spurious decompression errors.
      */
     bool decompress_error_check;
+    /*
+     * This variable only affects behavior when postcopy preempt mode is
+     * enabled.
+     *
+     * When set:
+     *
+     * - postcopy preempt src QEMU instance will generate an EOS message at
+     *   the end of migration to shut the preempt channel on dest side.
+     *
+     * - postcopy preempt channel will be created at the setup phase on src
+         QEMU.
+     *
+     * When clear:
+     *
+     * - postcopy preempt src QEMU instance will _not_ generate an EOS
+     *   message at the end of migration, the dest qemu will shutdown the
+     *   channel itself.
+     *
+     * - postcopy preempt channel will be created at the switching phase
+     *   from precopy -> postcopy (to avoid race condtion of misordered
+     *   creation of channels).
+     *
+     * NOTE: See message-id <ZBoShWArKDPpX/D7@work-vm> on qemu-devel
+     * mailing list for more information on the possible race.  Everyone
+     * should probably just keep this value untouched after set by the
+     * machine type (or the default).
+     */
+    bool preempt_pre_7_2;
 
     /*
      * This decides the size of guest memory chunk that will be used
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 41c0713..93f39f8 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -568,9 +568,14 @@
 {
     trace_postcopy_ram_incoming_cleanup_entry();
 
-    if (mis->postcopy_prio_thread_created) {
+    if (mis->preempt_thread_status == PREEMPT_THREAD_CREATED) {
+        /* Notify the fast load thread to quit */
+        mis->preempt_thread_status = PREEMPT_THREAD_QUIT;
+        if (mis->postcopy_qemufile_dst) {
+            qemu_file_shutdown(mis->postcopy_qemufile_dst);
+        }
         qemu_thread_join(&mis->postcopy_prio_thread);
-        mis->postcopy_prio_thread_created = false;
+        mis->preempt_thread_status = PREEMPT_THREAD_NONE;
     }
 
     if (mis->have_fault_thread) {
@@ -1203,7 +1208,7 @@
          */
         postcopy_thread_create(mis, &mis->postcopy_prio_thread, "fault-fast",
                                postcopy_preempt_thread, QEMU_THREAD_JOINABLE);
-        mis->postcopy_prio_thread_created = true;
+        mis->preempt_thread_status = PREEMPT_THREAD_CREATED;
     }
 
     trace_postcopy_ram_enable_notify();
@@ -1625,8 +1630,14 @@
         return 0;
     }
 
-    /* Kick off async task to establish preempt channel */
-    postcopy_preempt_setup(s);
+    /*
+     * Kick off async task to establish preempt channel.  Only do so with
+     * 8.0+ machines, because 7.1/7.2 require the channel to be created in
+     * setup phase of migration (even if racy in an unreliable network).
+     */
+    if (!s->preempt_pre_7_2) {
+        postcopy_preempt_setup(s);
+    }
 
     /*
      * We need the postcopy preempt channel to be established before
@@ -1652,6 +1663,11 @@
     trace_postcopy_pause_fast_load_continued();
 }
 
+static bool preempt_thread_should_run(MigrationIncomingState *mis)
+{
+    return mis->preempt_thread_status != PREEMPT_THREAD_QUIT;
+}
+
 void *postcopy_preempt_thread(void *opaque)
 {
     MigrationIncomingState *mis = opaque;
@@ -1671,11 +1687,11 @@
 
     /* Sending RAM_SAVE_FLAG_EOS to terminate this thread */
     qemu_mutex_lock(&mis->postcopy_prio_thread_mutex);
-    while (1) {
+    while (preempt_thread_should_run(mis)) {
         ret = ram_load_postcopy(mis->postcopy_qemufile_dst,
                                 RAM_CHANNEL_POSTCOPY);
         /* If error happened, go into recovery routine */
-        if (ret) {
+        if (ret && preempt_thread_should_run(mis)) {
             postcopy_pause_ram_fast_load(mis);
         } else {
             /* We're done */
diff --git a/migration/ram.c b/migration/ram.c
index 96e8a19..79d881f 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -688,12 +688,11 @@
  * @offset: offset inside the block for the page
  *          in the lower bits, it contains flags
  */
-static size_t save_page_header(PageSearchStatus *pss, RAMBlock *block,
-                               ram_addr_t offset)
+static size_t save_page_header(PageSearchStatus *pss, QEMUFile *f,
+                               RAMBlock *block, ram_addr_t offset)
 {
     size_t size, len;
     bool same_block = (block == pss->last_sent_block);
-    QEMUFile *f = pss->pss_channel;
 
     if (same_block) {
         offset |= RAM_SAVE_FLAG_CONTINUE;
@@ -867,7 +866,7 @@
     }
 
     /* Send XBZRLE based compressed page */
-    bytes_xbzrle = save_page_header(pss, block,
+    bytes_xbzrle = save_page_header(pss, pss->pss_channel, block,
                                     offset | RAM_SAVE_FLAG_XBZRLE);
     qemu_put_byte(file, ENCODING_FLAG_XBZRLE);
     qemu_put_be16(file, encoded_len);
@@ -1302,15 +1301,14 @@
  * @block: block that contains the page we want to send
  * @offset: offset inside the block for the page
  */
-static int save_zero_page_to_file(PageSearchStatus *pss,
+static int save_zero_page_to_file(PageSearchStatus *pss, QEMUFile *file,
                                   RAMBlock *block, ram_addr_t offset)
 {
     uint8_t *p = block->host + offset;
-    QEMUFile *file = pss->pss_channel;
     int len = 0;
 
     if (buffer_is_zero(p, TARGET_PAGE_SIZE)) {
-        len += save_page_header(pss, block, offset | RAM_SAVE_FLAG_ZERO);
+        len += save_page_header(pss, file, block, offset | RAM_SAVE_FLAG_ZERO);
         qemu_put_byte(file, 0);
         len += 1;
         ram_release_page(block->idstr, offset);
@@ -1327,10 +1325,10 @@
  * @block: block that contains the page we want to send
  * @offset: offset inside the block for the page
  */
-static int save_zero_page(PageSearchStatus *pss, RAMBlock *block,
+static int save_zero_page(PageSearchStatus *pss, QEMUFile *f, RAMBlock *block,
                           ram_addr_t offset)
 {
-    int len = save_zero_page_to_file(pss, block, offset);
+    int len = save_zero_page_to_file(pss, f, block, offset);
 
     if (len) {
         stat64_add(&ram_atomic_counters.duplicate, 1);
@@ -1394,7 +1392,7 @@
 {
     QEMUFile *file = pss->pss_channel;
 
-    ram_transferred_add(save_page_header(pss, block,
+    ram_transferred_add(save_page_header(pss, pss->pss_channel, block,
                                          offset | RAM_SAVE_FLAG_PAGE));
     if (async) {
         qemu_put_buffer_async(file, buf, TARGET_PAGE_SIZE,
@@ -1473,11 +1471,11 @@
     uint8_t *p = block->host + offset;
     int ret;
 
-    if (save_zero_page_to_file(pss, block, offset)) {
+    if (save_zero_page_to_file(pss, f, block, offset)) {
         return true;
     }
 
-    save_page_header(pss, block, offset | RAM_SAVE_FLAG_COMPRESS_PAGE);
+    save_page_header(pss, f, block, offset | RAM_SAVE_FLAG_COMPRESS_PAGE);
 
     /*
      * copy it to a internal buffer to avoid it being modified by VM
@@ -2355,7 +2353,7 @@
         return 1;
     }
 
-    res = save_zero_page(pss, block, offset);
+    res = save_zero_page(pss, pss->pss_channel, block, offset);
     if (res > 0) {
         /* Must let xbzrle know, otherwise a previous (now 0'd) cached
          * page would be stale
@@ -3508,12 +3506,13 @@
 static void ram_state_pending_exact(void *opaque, uint64_t *must_precopy,
                                     uint64_t *can_postcopy)
 {
+    MigrationState *s = migrate_get_current();
     RAMState **temp = opaque;
     RAMState *rs = *temp;
 
     uint64_t remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
 
-    if (!migration_in_postcopy()) {
+    if (!migration_in_postcopy() && remaining_size < s->threshold_size) {
         qemu_mutex_lock_iothread();
         WITH_RCU_READ_LOCK_GUARD() {
             migration_bitmap_sync_precopy(rs);