Merge tag 'pull-ppc-for-9.1-2-20240726-1' of https://gitlab.com/npiggin/qemu into staging fixes # -----BEGIN PGP SIGNATURE----- # # iQIzBAABCgAdFiEETkN92lZhb0MpsKeVZ7MCdqhiHK4FAmai5TsACgkQZ7MCdqhi # HK4rgA//eh0ax3JnBGma1rVEDL5n5cdEYV+ATFYGc529CUZFUar3IMqSw3in8bJy # uvQ6Cr/7IuusNEtoiYtdN1yNasqsm3fZB/hZ/Ekz32TsbpBRdkJW3ucavAu2rGM/ # EKRo7Y8gciy/Mj9y2JlIZqsDqYe+gribfGQvIg27DX+caAW/lKQdAdt4oJMTSdmr # XR8JjtMdhUazKrI+bc/4EG6tIQyUdp+S1/z1q6Wthqt58dNRElTjkD9op4AsUWMu # CE4a8ALCZoj3P3m+xf7xi7fT2JC2xgmNRCi3KbbhVEHdbFB6ViNYNuEYRS6GmpdC # C6J/ZR6QXs6KB1KO7EyB+vsuxLX4Eb8aeCFxwMlzJ9Fo4g8JudABXOFzYTKX1xBn # DUIGX91YACV43M2MvP/KuEU4zWpREO+U8MbQs/6s6fYsnCO2eKVJt/0Aaf1hmk37 # gY5Ak2DRx5TBvxlFy87zgHxHWTh/dGZodpN3IvCIDzVLnHGFlfluJbFRaoZSOecb # 1vxDHORjIruLcAxNVEGkJ/6MxOrnjjoUzSPUQcbgJ5BpFZOdeGLiMAULu/HBLBd9 # 7dvVw+PeNEPJttYumljOD6nYc/jENhLQsvkc3++bwGNc/rpi4YngtB4jhT1HV2Cl # oLool2ooKZgV4qx6IzeYo9feElvWVNK5XPzqDpSDlt9MaI+yTYM= # =FxPm # -----END PGP SIGNATURE----- # gpg: Signature made Fri 26 Jul 2024 09:52:27 AM AEST # gpg: using RSA key 4E437DDA56616F4329B0A79567B30276A8621CAE # gpg: Good signature from "Nicholas Piggin <npiggin@gmail.com>" [unknown] # gpg: WARNING: This key is not certified with a trusted signature! # gpg: There is no indication that the signature belongs to the owner. # Primary key fingerprint: 4E43 7DDA 5661 6F43 29B0 A795 67B3 0276 A862 1CAE * tag 'pull-ppc-for-9.1-2-20240726-1' of https://gitlab.com/npiggin/qemu: (96 commits) target/ppc: Remove includes from mmu-book3s-v3.h target/ppc/mmu-radix64: Remove externally unused parts from header target/ppc: Unexport some functions from mmu-book3s-v3.h target/ppc/mmu-hash32.c: Move get_pteg_offset32() to the header target/ppc/mmu-hash32.c: Inline and remove ppc_hash32_pte_raddr() target/ppc/mmu_common.c: Remove mmu_ctx_t target/ppc/mmu_common.c: Stop using ctx in get_bat_6xx_tlb() target/ppc: Remove bat_size_prot() target/ppc/mmu_common.c: Use defines instead of numeric constants target/ppc/mmu_common.c: Rename function parameter target/ppc/mmu_common.c: Stop using ctx in ppc6xx_tlb_check() target/ppc/mmu_common.c: Remove key field from mmu_ctx_t target/ppc/mmu_common.c: Init variable in function that relies on it target/ppc/mmu-hash32.c: Inline and remove ppc_hash32_pte_prot() target/ppc: Add function to get protection key for hash32 MMU target/ppc/mmu_common.c: Remove ptem field from mmu_ctx_t target/ppc/mmu_common.c: Inline and remove ppc6xx_tlb_pte_check() target/ppc/mmu_common.c: Simplify a switch statement target/ppc/mmu_common.c: Remove single use local variable target/ppc/mmu_common.c: Convert local variable to bool ... Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

commit: 93b799fafd9170da3a79a533ea6f73a18de82e22 [log] [tgz]
author: Richard Henderson <richard.henderson@linaro.org> Fri Jul 26 15:10:45 2024 +1000
committer: Richard Henderson <richard.henderson@linaro.org> Fri Jul 26 15:10:45 2024 +1000
tree: 9fe0972bbff5c2ec6e899ae4dc76cfcc349575f9
parent: 8e466dd092469e5ab0f355775c571ea96f3a8e23 [diff]
parent: d741ecffd2ca260ce7875a4596f17736b5ccb7c3 [diff]
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 67b7736..e1d1386 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c

@@ -398,6 +398,18 @@
     return 0;
 }
 
+int kvm_create_and_park_vcpu(CPUState *cpu)
+{
+    int ret = 0;
+
+    ret = kvm_create_vcpu(cpu);
+    if (!ret) {
+        kvm_park_vcpu(cpu);
+    }
+
+    return ret;
+}
+
 static int do_kvm_destroy_vcpu(CPUState *cpu)
 {
     KVMState *s = kvm_state;

diff --git a/cpu-common.c b/cpu-common.c
index 7ae136f..6b26223 100644
--- a/cpu-common.c
+++ b/cpu-common.c

@@ -57,14 +57,12 @@
     qemu_mutex_unlock(&qemu_cpu_list_lock);
 }
 
-static bool cpu_index_auto_assigned;
 
-static int cpu_get_free_index(void)
+int cpu_get_free_index(void)
 {
     CPUState *some_cpu;
     int max_cpu_index = 0;
 
-    cpu_index_auto_assigned = true;
     CPU_FOREACH(some_cpu) {
         if (some_cpu->cpu_index >= max_cpu_index) {
             max_cpu_index = some_cpu->cpu_index + 1;
@@ -83,8 +81,11 @@
 
 void cpu_list_add(CPUState *cpu)
 {
+    static bool cpu_index_auto_assigned;
+
     QEMU_LOCK_GUARD(&qemu_cpu_list_lock);
     if (cpu->cpu_index == UNASSIGNED_CPU_INDEX) {
+        cpu_index_auto_assigned = true;
         cpu->cpu_index = cpu_get_free_index();
         assert(cpu->cpu_index != UNASSIGNED_CPU_INDEX);
     } else {

diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
index 9e99107..0b94af3 100644
--- a/hw/block/m25p80.c
+++ b/hw/block/m25p80.c

@@ -357,6 +357,9 @@
       .sfdp_read = m25p80_sfdp_w25q512jv },
     { INFO("w25q01jvq",   0xef4021,      0,  64 << 10, 2048, ER_4K),
       .sfdp_read = m25p80_sfdp_w25q01jvq },
+
+    /* Microchip */
+    { INFO("25csm04",      0x29cc00,      0x100,  64 << 10,  8, 0) },
 };
 
 typedef enum {

diff --git a/hw/intc/pnv_xive2.c b/hw/intc/pnv_xive2.c
index 2fb4fa2..7860910 100644
--- a/hw/intc/pnv_xive2.c
+++ b/hw/intc/pnv_xive2.c

@@ -25,6 +25,7 @@
 #include "hw/ppc/ppc.h"
 #include "hw/qdev-properties.h"
 #include "sysemu/reset.h"
+#include "sysemu/qtest.h"
 
 #include <libfdt.h>
 
@@ -32,6 +33,16 @@
 
 #undef XIVE2_DEBUG
 
+/* XIVE Sync or Flush Notification Block */
+typedef struct XiveSfnBlock {
+    uint8_t bytes[32];
+} XiveSfnBlock;
+
+/* XIVE Thread Sync or Flush Notification Area */
+typedef struct XiveThreadNA {
+    XiveSfnBlock topo[16];
+} XiveThreadNA;
+
 /*
  * Virtual structures table (VST)
  */
@@ -45,16 +56,16 @@
 
 static const XiveVstInfo vst_infos[] = {
 
-    [VST_EAS]  = { "EAT",  sizeof(Xive2Eas),  16 },
-    [VST_ESB]  = { "ESB",  1,                  16 },
-    [VST_END]  = { "ENDT", sizeof(Xive2End),  16 },
+    [VST_EAS]  = { "EAT",  sizeof(Xive2Eas),     16 },
+    [VST_ESB]  = { "ESB",  1,                    16 },
+    [VST_END]  = { "ENDT", sizeof(Xive2End),     16 },
 
-    [VST_NVP]  = { "NVPT", sizeof(Xive2Nvp),  16 },
-    [VST_NVG]  = { "NVGT", sizeof(Xive2Nvgc), 16 },
-    [VST_NVC]  = { "NVCT", sizeof(Xive2Nvgc), 16 },
+    [VST_NVP]  = { "NVPT", sizeof(Xive2Nvp),     16 },
+    [VST_NVG]  = { "NVGT", sizeof(Xive2Nvgc),    16 },
+    [VST_NVC]  = { "NVCT", sizeof(Xive2Nvgc),    16 },
 
-    [VST_IC]  =  { "IC",   1 /* ? */         , 16 }, /* Topology # */
-    [VST_SYNC] = { "SYNC", 1 /* ? */         , 16 }, /* Topology # */
+    [VST_IC]  =  { "IC",   1, /* ? */            16 }, /* Topology # */
+    [VST_SYNC] = { "SYNC", sizeof(XiveThreadNA), 16 }, /* Topology # */
 
     /*
      * This table contains the backing store pages for the interrupt
@@ -206,6 +217,20 @@
     return pnv_xive2_vst_addr_direct(xive, type, vsd, (idx % vst_per_page));
 }
 
+static uint8_t pnv_xive2_nvc_table_compress_shift(PnvXive2 *xive)
+{
+    uint8_t shift =  GETFIELD(PC_NXC_PROC_CONFIG_NVC_TABLE_COMPRESS,
+                              xive->pc_regs[PC_NXC_PROC_CONFIG >> 3]);
+    return shift > 8 ? 0 : shift;
+}
+
+static uint8_t pnv_xive2_nvg_table_compress_shift(PnvXive2 *xive)
+{
+    uint8_t shift = GETFIELD(PC_NXC_PROC_CONFIG_NVG_TABLE_COMPRESS,
+                             xive->pc_regs[PC_NXC_PROC_CONFIG >> 3]);
+    return shift > 8 ? 0 : shift;
+}
+
 static uint64_t pnv_xive2_vst_addr(PnvXive2 *xive, uint32_t type, uint8_t blk,
                                    uint32_t idx)
 {
@@ -219,6 +244,11 @@
     }
 
     vsd = xive->vsds[type][blk];
+    if (vsd == 0) {
+        xive2_error(xive, "VST: vsd == 0 block id %d for VST %s %d !?",
+                   blk, info->name, idx);
+        return 0;
+    }
 
     /* Remote VST access */
     if (GETFIELD(VSD_MODE, vsd) == VSD_MODE_FORWARD) {
@@ -227,6 +257,12 @@
         return xive ? pnv_xive2_vst_addr(xive, type, blk, idx) : 0;
     }
 
+    if (type == VST_NVG) {
+        idx >>= pnv_xive2_nvg_table_compress_shift(xive);
+    } else if (type == VST_NVC) {
+        idx >>= pnv_xive2_nvc_table_compress_shift(xive);
+    }
+
     if (VSD_INDIRECT & vsd) {
         return pnv_xive2_vst_addr_indirect(xive, type, vsd, idx);
     }
@@ -329,40 +365,115 @@
                               word_number);
 }
 
-static int pnv_xive2_end_update(PnvXive2 *xive)
+static inline int pnv_xive2_get_current_pir(PnvXive2 *xive)
 {
-    uint8_t  blk = GETFIELD(VC_ENDC_WATCH_BLOCK_ID,
-                           xive->vc_regs[(VC_ENDC_WATCH0_SPEC >> 3)]);
-    uint32_t idx = GETFIELD(VC_ENDC_WATCH_INDEX,
-                           xive->vc_regs[(VC_ENDC_WATCH0_SPEC >> 3)]);
-    int i;
+    if (!qtest_enabled()) {
+        PowerPCCPU *cpu = POWERPC_CPU(current_cpu);
+        return ppc_cpu_pir(cpu);
+    }
+    return 0;
+}
+
+/*
+ * After SW injects a Queue Sync or Cache Flush operation, HW will notify
+ * SW of the completion of the operation by writing a byte of all 1's (0xff)
+ * to a specific memory location.  The memory location is calculated by first
+ * looking up a base address in the SYNC VSD using the Topology ID of the
+ * originating thread as the "block" number.  This points to a
+ * 64k block of memory that is further divided into 128 512 byte chunks of
+ * memory, which is indexed by the thread id of the requesting thread.
+ * Finally, this 512 byte chunk of memory is divided into 16 32 byte
+ * chunks which are indexed by the topology id of the targeted IC's chip.
+ * The values below are the offsets into that 32 byte chunk of memory for
+ * each type of cache flush or queue sync operation.
+ */
+#define PNV_XIVE2_QUEUE_IPI              0x00
+#define PNV_XIVE2_QUEUE_HW               0x01
+#define PNV_XIVE2_QUEUE_NXC              0x02
+#define PNV_XIVE2_QUEUE_INT              0x03
+#define PNV_XIVE2_QUEUE_OS               0x04
+#define PNV_XIVE2_QUEUE_POOL             0x05
+#define PNV_XIVE2_QUEUE_HARD             0x06
+#define PNV_XIVE2_CACHE_ENDC             0x08
+#define PNV_XIVE2_CACHE_ESBC             0x09
+#define PNV_XIVE2_CACHE_EASC             0x0a
+#define PNV_XIVE2_QUEUE_NXC_LD_LCL_NCO   0x10
+#define PNV_XIVE2_QUEUE_NXC_LD_LCL_CO    0x11
+#define PNV_XIVE2_QUEUE_NXC_ST_LCL_NCI   0x12
+#define PNV_XIVE2_QUEUE_NXC_ST_LCL_CI    0x13
+#define PNV_XIVE2_QUEUE_NXC_ST_RMT_NCI   0x14
+#define PNV_XIVE2_QUEUE_NXC_ST_RMT_CI    0x15
+#define PNV_XIVE2_CACHE_NXC              0x18
+
+static int pnv_xive2_inject_notify(PnvXive2 *xive, int type)
+{
+    uint64_t addr;
+    int pir = pnv_xive2_get_current_pir(xive);
+    int thread_nr = PNV10_PIR2THREAD(pir);
+    int thread_topo_id = PNV10_PIR2CHIP(pir);
+    int ic_topo_id = xive->chip->chip_id;
+    uint64_t offset = ic_topo_id * sizeof(XiveSfnBlock);
+    uint8_t byte = 0xff;
+    MemTxResult result;
+
+    /* Retrieve the address of requesting thread's notification area */
+    addr = pnv_xive2_vst_addr(xive, VST_SYNC, thread_topo_id, thread_nr);
+
+    if (!addr) {
+        xive2_error(xive, "VST: no SYNC entry %x/%x !?",
+                    thread_topo_id, thread_nr);
+        return -1;
+    }
+
+    address_space_stb(&address_space_memory, addr + offset + type, byte,
+                      MEMTXATTRS_UNSPECIFIED, &result);
+    assert(result == MEMTX_OK);
+
+    return 0;
+}
+
+static int pnv_xive2_end_update(PnvXive2 *xive, uint8_t watch_engine)
+{
+    uint8_t  blk;
+    uint32_t idx;
+    int i, spec_reg, data_reg;
     uint64_t endc_watch[4];
 
+    assert(watch_engine < ARRAY_SIZE(endc_watch));
+
+    spec_reg = (VC_ENDC_WATCH0_SPEC + watch_engine * 0x40) >> 3;
+    data_reg = (VC_ENDC_WATCH0_DATA0 + watch_engine * 0x40) >> 3;
+    blk = GETFIELD(VC_ENDC_WATCH_BLOCK_ID, xive->vc_regs[spec_reg]);
+    idx = GETFIELD(VC_ENDC_WATCH_INDEX, xive->vc_regs[spec_reg]);
+
     for (i = 0; i < ARRAY_SIZE(endc_watch); i++) {
-        endc_watch[i] =
-            cpu_to_be64(xive->vc_regs[(VC_ENDC_WATCH0_DATA0 >> 3) + i]);
+        endc_watch[i] = cpu_to_be64(xive->vc_regs[data_reg + i]);
     }
 
     return pnv_xive2_vst_write(xive, VST_END, blk, idx, endc_watch,
                               XIVE_VST_WORD_ALL);
 }
 
-static void pnv_xive2_end_cache_load(PnvXive2 *xive)
+static void pnv_xive2_end_cache_load(PnvXive2 *xive, uint8_t watch_engine)
 {
-    uint8_t  blk = GETFIELD(VC_ENDC_WATCH_BLOCK_ID,
-                           xive->vc_regs[(VC_ENDC_WATCH0_SPEC >> 3)]);
-    uint32_t idx = GETFIELD(VC_ENDC_WATCH_INDEX,
-                           xive->vc_regs[(VC_ENDC_WATCH0_SPEC >> 3)]);
+    uint8_t  blk;
+    uint32_t idx;
     uint64_t endc_watch[4] = { 0 };
-    int i;
+    int i, spec_reg, data_reg;
+
+    assert(watch_engine < ARRAY_SIZE(endc_watch));
+
+    spec_reg = (VC_ENDC_WATCH0_SPEC + watch_engine * 0x40) >> 3;
+    data_reg = (VC_ENDC_WATCH0_DATA0 + watch_engine * 0x40) >> 3;
+    blk = GETFIELD(VC_ENDC_WATCH_BLOCK_ID, xive->vc_regs[spec_reg]);
+    idx = GETFIELD(VC_ENDC_WATCH_INDEX, xive->vc_regs[spec_reg]);
 
     if (pnv_xive2_vst_read(xive, VST_END, blk, idx, endc_watch)) {
         xive2_error(xive, "VST: no END entry %x/%x !?", blk, idx);
     }
 
     for (i = 0; i < ARRAY_SIZE(endc_watch); i++) {
-        xive->vc_regs[(VC_ENDC_WATCH0_DATA0 >> 3) + i] =
-            be64_to_cpu(endc_watch[i]);
+        xive->vc_regs[data_reg + i] = be64_to_cpu(endc_watch[i]);
     }
 }
 
@@ -379,40 +490,75 @@
                               word_number);
 }
 
-static int pnv_xive2_nvp_update(PnvXive2 *xive)
+static int pnv_xive2_nxc_to_table_type(uint8_t nxc_type, uint32_t *table_type)
 {
-    uint8_t  blk = GETFIELD(PC_NXC_WATCH_BLOCK_ID,
-                            xive->pc_regs[(PC_NXC_WATCH0_SPEC >> 3)]);
-    uint32_t idx = GETFIELD(PC_NXC_WATCH_INDEX,
-                            xive->pc_regs[(PC_NXC_WATCH0_SPEC >> 3)]);
-    int i;
+    switch (nxc_type) {
+    case PC_NXC_WATCH_NXC_NVP:
+        *table_type = VST_NVP;
+        break;
+    case PC_NXC_WATCH_NXC_NVG:
+        *table_type = VST_NVG;
+        break;
+    case PC_NXC_WATCH_NXC_NVC:
+        *table_type = VST_NVC;
+        break;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "XIVE: invalid table type for nxc operation\n");
+        return -1;
+    }
+    return 0;
+}
+
+static int pnv_xive2_nxc_update(PnvXive2 *xive, uint8_t watch_engine)
+{
+    uint8_t  blk, nxc_type;
+    uint32_t idx, table_type = -1;
+    int i, spec_reg, data_reg;
     uint64_t nxc_watch[4];
 
+    assert(watch_engine < ARRAY_SIZE(nxc_watch));
+
+    spec_reg = (PC_NXC_WATCH0_SPEC + watch_engine * 0x40) >> 3;
+    data_reg = (PC_NXC_WATCH0_DATA0 + watch_engine * 0x40) >> 3;
+    nxc_type = GETFIELD(PC_NXC_WATCH_NXC_TYPE, xive->pc_regs[spec_reg]);
+    blk = GETFIELD(PC_NXC_WATCH_BLOCK_ID, xive->pc_regs[spec_reg]);
+    idx = GETFIELD(PC_NXC_WATCH_INDEX, xive->pc_regs[spec_reg]);
+
+    assert(!pnv_xive2_nxc_to_table_type(nxc_type, &table_type));
+
     for (i = 0; i < ARRAY_SIZE(nxc_watch); i++) {
-        nxc_watch[i] =
-            cpu_to_be64(xive->pc_regs[(PC_NXC_WATCH0_DATA0 >> 3) + i]);
+        nxc_watch[i] = cpu_to_be64(xive->pc_regs[data_reg + i]);
     }
 
-    return pnv_xive2_vst_write(xive, VST_NVP, blk, idx, nxc_watch,
+    return pnv_xive2_vst_write(xive, table_type, blk, idx, nxc_watch,
                               XIVE_VST_WORD_ALL);
 }
 
-static void pnv_xive2_nvp_cache_load(PnvXive2 *xive)
+static void pnv_xive2_nxc_cache_load(PnvXive2 *xive, uint8_t watch_engine)
 {
-    uint8_t  blk = GETFIELD(PC_NXC_WATCH_BLOCK_ID,
-                           xive->pc_regs[(PC_NXC_WATCH0_SPEC >> 3)]);
-    uint32_t idx = GETFIELD(PC_NXC_WATCH_INDEX,
-                           xive->pc_regs[(PC_NXC_WATCH0_SPEC >> 3)]);
+    uint8_t  blk, nxc_type;
+    uint32_t idx, table_type = -1;
     uint64_t nxc_watch[4] = { 0 };
-    int i;
+    int i, spec_reg, data_reg;
 
-    if (pnv_xive2_vst_read(xive, VST_NVP, blk, idx, nxc_watch)) {
-        xive2_error(xive, "VST: no NVP entry %x/%x !?", blk, idx);
+    assert(watch_engine < ARRAY_SIZE(nxc_watch));
+
+    spec_reg = (PC_NXC_WATCH0_SPEC + watch_engine * 0x40) >> 3;
+    data_reg = (PC_NXC_WATCH0_DATA0 + watch_engine * 0x40) >> 3;
+    nxc_type = GETFIELD(PC_NXC_WATCH_NXC_TYPE, xive->pc_regs[spec_reg]);
+    blk = GETFIELD(PC_NXC_WATCH_BLOCK_ID, xive->pc_regs[spec_reg]);
+    idx = GETFIELD(PC_NXC_WATCH_INDEX, xive->pc_regs[spec_reg]);
+
+    assert(!pnv_xive2_nxc_to_table_type(nxc_type, &table_type));
+
+    if (pnv_xive2_vst_read(xive, table_type, blk, idx, nxc_watch)) {
+        xive2_error(xive, "VST: no NXC entry %x/%x in %s table!?",
+                    blk, idx, vst_infos[table_type].name);
     }
 
     for (i = 0; i < ARRAY_SIZE(nxc_watch); i++) {
-        xive->pc_regs[(PC_NXC_WATCH0_DATA0 >> 3) + i] =
-            be64_to_cpu(nxc_watch[i]);
+        xive->pc_regs[data_reg + i] = be64_to_cpu(nxc_watch[i]);
     }
 }
 
@@ -581,6 +727,7 @@
     case CQ_TAR_NVPG:
     case CQ_TAR_ESB:
     case CQ_TAR_END:
+    case CQ_TAR_NVC:
         xive->tables[tsel][entry] = val;
         break;
     default:
@@ -641,6 +788,9 @@
          * entries provisioned by FW (such as skiboot) and resize the
          * ESB window accordingly.
          */
+        if (memory_region_is_mapped(&xsrc->esb_mmio)) {
+            memory_region_del_subregion(&xive->esb_mmio, &xsrc->esb_mmio);
+        }
         if (!(VSD_INDIRECT & vsd)) {
             memory_region_set_size(&xsrc->esb_mmio, vst_tsize * SBE_PER_BYTE
                                    * (1ull << xsrc->esb_shift));
@@ -656,6 +806,9 @@
         /*
          * Backing store pages for the END.
          */
+        if (memory_region_is_mapped(&end_xsrc->esb_mmio)) {
+            memory_region_del_subregion(&xive->end_mmio, &end_xsrc->esb_mmio);
+        }
         if (!(VSD_INDIRECT & vsd)) {
             memory_region_set_size(&end_xsrc->esb_mmio, (vst_tsize / info->size)
                                    * (1ull << end_xsrc->esb_shift));
@@ -680,13 +833,10 @@
  * Both PC and VC sub-engines are configured as each use the Virtual
  * Structure Tables
  */
-static void pnv_xive2_vst_set_data(PnvXive2 *xive, uint64_t vsd)
+static void pnv_xive2_vst_set_data(PnvXive2 *xive, uint64_t vsd,
+                                   uint8_t type, uint8_t blk)
 {
     uint8_t mode = GETFIELD(VSD_MODE, vsd);
-    uint8_t type = GETFIELD(VC_VSD_TABLE_SELECT,
-                            xive->vc_regs[VC_VSD_TABLE_ADDR >> 3]);
-    uint8_t blk = GETFIELD(VC_VSD_TABLE_ADDRESS,
-                           xive->vc_regs[VC_VSD_TABLE_ADDR >> 3]);
     uint64_t vst_addr = vsd & VSD_ADDRESS_MASK;
 
     if (type > VST_ERQ) {
@@ -721,6 +871,16 @@
     }
 }
 
+static void pnv_xive2_vc_vst_set_data(PnvXive2 *xive, uint64_t vsd)
+{
+    uint8_t type = GETFIELD(VC_VSD_TABLE_SELECT,
+                            xive->vc_regs[VC_VSD_TABLE_ADDR >> 3]);
+    uint8_t blk = GETFIELD(VC_VSD_TABLE_ADDRESS,
+                           xive->vc_regs[VC_VSD_TABLE_ADDR >> 3]);
+
+    pnv_xive2_vst_set_data(xive, vsd, type, blk);
+}
+
 /*
  * MMIO handlers
  */
@@ -964,12 +1124,70 @@
     },
 };
 
+static uint8_t pnv_xive2_cache_watch_assign(uint64_t engine_mask,
+                                            uint64_t *state)
+{
+    uint8_t val = 0xFF;
+    int i;
+
+    for (i = 3; i >= 0; i--) {
+        if (BIT(i) & engine_mask) {
+            if (!(BIT(i) & *state)) {
+                *state |= BIT(i);
+                val = 3 - i;
+                break;
+            }
+        }
+    }
+    return val;
+}
+
+static void pnv_xive2_cache_watch_release(uint64_t *state, uint8_t watch_engine)
+{
+    uint8_t engine_bit = 3 - watch_engine;
+
+    if (*state & BIT(engine_bit)) {
+        *state &= ~BIT(engine_bit);
+    }
+}
+
+static uint8_t pnv_xive2_endc_cache_watch_assign(PnvXive2 *xive)
+{
+    uint64_t engine_mask = GETFIELD(VC_ENDC_CFG_CACHE_WATCH_ASSIGN,
+                                    xive->vc_regs[VC_ENDC_CFG >> 3]);
+    uint64_t state = xive->vc_regs[VC_ENDC_WATCH_ASSIGN >> 3];
+    uint8_t val;
+
+    /*
+     * We keep track of which engines are currently busy in the
+     * VC_ENDC_WATCH_ASSIGN register directly. When the firmware reads
+     * the register, we don't return its value but the ID of an engine
+     * it can use.
+     * There are 4 engines. 0xFF means no engine is available.
+     */
+    val = pnv_xive2_cache_watch_assign(engine_mask, &state);
+    if (val != 0xFF) {
+        xive->vc_regs[VC_ENDC_WATCH_ASSIGN >> 3] = state;
+    }
+    return val;
+}
+
+static void pnv_xive2_endc_cache_watch_release(PnvXive2 *xive,
+                                               uint8_t watch_engine)
+{
+    uint64_t state = xive->vc_regs[VC_ENDC_WATCH_ASSIGN >> 3];
+
+    pnv_xive2_cache_watch_release(&state, watch_engine);
+    xive->vc_regs[VC_ENDC_WATCH_ASSIGN >> 3] = state;
+}
+
 static uint64_t pnv_xive2_ic_vc_read(void *opaque, hwaddr offset,
                                      unsigned size)
 {
     PnvXive2 *xive = PNV_XIVE2(opaque);
     uint64_t val = 0;
     uint32_t reg = offset >> 3;
+    uint8_t watch_engine;
 
     switch (offset) {
     /*
@@ -1000,24 +1218,44 @@
         val = xive->vc_regs[reg];
         break;
 
+    case VC_ENDC_WATCH_ASSIGN:
+        val = pnv_xive2_endc_cache_watch_assign(xive);
+        break;
+
+    case VC_ENDC_CFG:
+        val = xive->vc_regs[reg];
+        break;
+
     /*
      * END cache updates
      */
     case VC_ENDC_WATCH0_SPEC:
+    case VC_ENDC_WATCH1_SPEC:
+    case VC_ENDC_WATCH2_SPEC:
+    case VC_ENDC_WATCH3_SPEC:
+        watch_engine = (offset - VC_ENDC_WATCH0_SPEC) >> 6;
         xive->vc_regs[reg] &= ~(VC_ENDC_WATCH_FULL | VC_ENDC_WATCH_CONFLICT);
+        pnv_xive2_endc_cache_watch_release(xive, watch_engine);
         val = xive->vc_regs[reg];
         break;
 
     case VC_ENDC_WATCH0_DATA0:
+    case VC_ENDC_WATCH1_DATA0:
+    case VC_ENDC_WATCH2_DATA0:
+    case VC_ENDC_WATCH3_DATA0:
         /*
          * Load DATA registers from cache with data requested by the
          * SPEC register
          */
-        pnv_xive2_end_cache_load(xive);
+        watch_engine = (offset - VC_ENDC_WATCH0_DATA0) >> 6;
+        pnv_xive2_end_cache_load(xive, watch_engine);
         val = xive->vc_regs[reg];
         break;
 
     case VC_ENDC_WATCH0_DATA1 ... VC_ENDC_WATCH0_DATA3:
+    case VC_ENDC_WATCH1_DATA1 ... VC_ENDC_WATCH1_DATA3:
+    case VC_ENDC_WATCH2_DATA1 ... VC_ENDC_WATCH2_DATA3:
+    case VC_ENDC_WATCH3_DATA1 ... VC_ENDC_WATCH3_DATA3:
         val = xive->vc_regs[reg];
         break;
 
@@ -1063,6 +1301,7 @@
 {
     PnvXive2 *xive = PNV_XIVE2(opaque);
     uint32_t reg = offset >> 3;
+    uint8_t watch_engine;
 
     switch (offset) {
     /*
@@ -1071,7 +1310,7 @@
     case VC_VSD_TABLE_ADDR:
        break;
     case VC_VSD_TABLE_DATA:
-        pnv_xive2_vst_set_data(xive, val);
+        pnv_xive2_vc_vst_set_data(xive, val);
         break;
 
     /*
@@ -1083,6 +1322,10 @@
         /* ESB update */
         break;
 
+    case VC_ESBC_FLUSH_INJECT:
+        pnv_xive2_inject_notify(xive, PNV_XIVE2_CACHE_ESBC);
+        break;
+
     case VC_ESBC_CFG:
         break;
 
@@ -1095,19 +1338,36 @@
         /* EAS update */
         break;
 
+    case VC_EASC_FLUSH_INJECT:
+        pnv_xive2_inject_notify(xive, PNV_XIVE2_CACHE_EASC);
+        break;
+
+    case VC_ENDC_CFG:
+        break;
+
     /*
      * END cache updates
      */
     case VC_ENDC_WATCH0_SPEC:
+    case VC_ENDC_WATCH1_SPEC:
+    case VC_ENDC_WATCH2_SPEC:
+    case VC_ENDC_WATCH3_SPEC:
          val &= ~VC_ENDC_WATCH_CONFLICT; /* HW will set this bit */
         break;
 
     case VC_ENDC_WATCH0_DATA1 ... VC_ENDC_WATCH0_DATA3:
+    case VC_ENDC_WATCH1_DATA1 ... VC_ENDC_WATCH1_DATA3:
+    case VC_ENDC_WATCH2_DATA1 ... VC_ENDC_WATCH2_DATA3:
+    case VC_ENDC_WATCH3_DATA1 ... VC_ENDC_WATCH3_DATA3:
         break;
     case VC_ENDC_WATCH0_DATA0:
+    case VC_ENDC_WATCH1_DATA0:
+    case VC_ENDC_WATCH2_DATA0:
+    case VC_ENDC_WATCH3_DATA0:
         /* writing to DATA0 triggers the cache write */
+        watch_engine = (offset - VC_ENDC_WATCH0_DATA0) >> 6;
         xive->vc_regs[reg] = val;
-        pnv_xive2_end_update(xive);
+        pnv_xive2_end_update(xive, watch_engine);
         break;
 
 
@@ -1116,6 +1376,10 @@
         xive->vc_regs[VC_ENDC_FLUSH_CTRL >> 3] |= VC_ENDC_FLUSH_CTRL_POLL_VALID;
         break;
 
+    case VC_ENDC_FLUSH_INJECT:
+        pnv_xive2_inject_notify(xive, PNV_XIVE2_CACHE_ENDC);
+        break;
+
     /*
      * Indirect invalidation
      */
@@ -1157,12 +1421,43 @@
     },
 };
 
+static uint8_t pnv_xive2_nxc_cache_watch_assign(PnvXive2 *xive)
+{
+    uint64_t engine_mask = GETFIELD(PC_NXC_PROC_CONFIG_WATCH_ASSIGN,
+                                    xive->pc_regs[PC_NXC_PROC_CONFIG >> 3]);
+    uint64_t state = xive->pc_regs[PC_NXC_WATCH_ASSIGN >> 3];
+    uint8_t val;
+
+    /*
+     * We keep track of which engines are currently busy in the
+     * PC_NXC_WATCH_ASSIGN register directly. When the firmware reads
+     * the register, we don't return its value but the ID of an engine
+     * it can use.
+     * There are 4 engines. 0xFF means no engine is available.
+     */
+    val = pnv_xive2_cache_watch_assign(engine_mask, &state);
+    if (val != 0xFF) {
+        xive->pc_regs[PC_NXC_WATCH_ASSIGN >> 3] = state;
+    }
+    return val;
+}
+
+static void pnv_xive2_nxc_cache_watch_release(PnvXive2 *xive,
+                                              uint8_t watch_engine)
+{
+    uint64_t state = xive->pc_regs[PC_NXC_WATCH_ASSIGN >> 3];
+
+    pnv_xive2_cache_watch_release(&state, watch_engine);
+    xive->pc_regs[PC_NXC_WATCH_ASSIGN >> 3] = state;
+}
+
 static uint64_t pnv_xive2_ic_pc_read(void *opaque, hwaddr offset,
                                      unsigned size)
 {
     PnvXive2 *xive = PNV_XIVE2(opaque);
     uint64_t val = -1;
     uint32_t reg = offset >> 3;
+    uint8_t watch_engine;
 
     switch (offset) {
     /*
@@ -1173,24 +1468,44 @@
         val = xive->pc_regs[reg];
         break;
 
+    case PC_NXC_WATCH_ASSIGN:
+        val = pnv_xive2_nxc_cache_watch_assign(xive);
+        break;
+
+    case PC_NXC_PROC_CONFIG:
+        val = xive->pc_regs[reg];
+        break;
+
     /*
      * cache updates
      */
     case PC_NXC_WATCH0_SPEC:
+    case PC_NXC_WATCH1_SPEC:
+    case PC_NXC_WATCH2_SPEC:
+    case PC_NXC_WATCH3_SPEC:
+        watch_engine = (offset - PC_NXC_WATCH0_SPEC) >> 6;
         xive->pc_regs[reg] &= ~(PC_NXC_WATCH_FULL | PC_NXC_WATCH_CONFLICT);
+        pnv_xive2_nxc_cache_watch_release(xive, watch_engine);
         val = xive->pc_regs[reg];
         break;
 
     case PC_NXC_WATCH0_DATA0:
+    case PC_NXC_WATCH1_DATA0:
+    case PC_NXC_WATCH2_DATA0:
+    case PC_NXC_WATCH3_DATA0:
        /*
         * Load DATA registers from cache with data requested by the
         * SPEC register
         */
-        pnv_xive2_nvp_cache_load(xive);
+        watch_engine = (offset - PC_NXC_WATCH0_DATA0) >> 6;
+        pnv_xive2_nxc_cache_load(xive, watch_engine);
         val = xive->pc_regs[reg];
         break;
 
     case PC_NXC_WATCH0_DATA1 ... PC_NXC_WATCH0_DATA3:
+    case PC_NXC_WATCH1_DATA1 ... PC_NXC_WATCH1_DATA3:
+    case PC_NXC_WATCH2_DATA1 ... PC_NXC_WATCH2_DATA3:
+    case PC_NXC_WATCH3_DATA1 ... PC_NXC_WATCH3_DATA3:
         val = xive->pc_regs[reg];
         break;
 
@@ -1214,36 +1529,66 @@
     return val;
 }
 
+static void pnv_xive2_pc_vst_set_data(PnvXive2 *xive, uint64_t vsd)
+{
+    uint8_t type = GETFIELD(PC_VSD_TABLE_SELECT,
+                            xive->pc_regs[PC_VSD_TABLE_ADDR >> 3]);
+    uint8_t blk = GETFIELD(PC_VSD_TABLE_ADDRESS,
+                           xive->pc_regs[PC_VSD_TABLE_ADDR >> 3]);
+
+    pnv_xive2_vst_set_data(xive, vsd, type, blk);
+}
+
 static void pnv_xive2_ic_pc_write(void *opaque, hwaddr offset,
                                   uint64_t val, unsigned size)
 {
     PnvXive2 *xive = PNV_XIVE2(opaque);
     uint32_t reg = offset >> 3;
+    uint8_t watch_engine;
 
     switch (offset) {
 
     /*
-     * VSD table settings. Only taken into account in the VC
-     * sub-engine because the Xive2Router model combines both VC and PC
-     * sub-engines
+     * VSD table settings.
+     * The Xive2Router model combines both VC and PC sub-engines. We
+     * allow to configure the tables through both, for the rare cases
+     * where a table only really needs to be configured for one of
+     * them (e.g. the NVG table for the presenter). It assumes that
+     * firmware passes the same address to the VC and PC when tables
+     * are defined for both, which seems acceptable.
      */
     case PC_VSD_TABLE_ADDR:
+        break;
     case PC_VSD_TABLE_DATA:
+        pnv_xive2_pc_vst_set_data(xive, val);
+        break;
+
+    case PC_NXC_PROC_CONFIG:
         break;
 
     /*
      * cache updates
      */
     case PC_NXC_WATCH0_SPEC:
+    case PC_NXC_WATCH1_SPEC:
+    case PC_NXC_WATCH2_SPEC:
+    case PC_NXC_WATCH3_SPEC:
         val &= ~PC_NXC_WATCH_CONFLICT; /* HW will set this bit */
         break;
 
     case PC_NXC_WATCH0_DATA1 ... PC_NXC_WATCH0_DATA3:
+    case PC_NXC_WATCH1_DATA1 ... PC_NXC_WATCH1_DATA3:
+    case PC_NXC_WATCH2_DATA1 ... PC_NXC_WATCH2_DATA3:
+    case PC_NXC_WATCH3_DATA1 ... PC_NXC_WATCH3_DATA3:
         break;
     case PC_NXC_WATCH0_DATA0:
+    case PC_NXC_WATCH1_DATA0:
+    case PC_NXC_WATCH2_DATA0:
+    case PC_NXC_WATCH3_DATA0:
         /* writing to DATA0 triggers the cache write */
+        watch_engine = (offset - PC_NXC_WATCH0_DATA0) >> 6;
         xive->pc_regs[reg] = val;
-        pnv_xive2_nvp_update(xive);
+        pnv_xive2_nxc_update(xive, watch_engine);
         break;
 
    /* case PC_NXC_FLUSH_CTRL: */
@@ -1251,6 +1596,10 @@
         xive->pc_regs[PC_NXC_FLUSH_CTRL >> 3] |= PC_NXC_FLUSH_CTRL_POLL_VALID;
         break;
 
+    case PC_NXC_FLUSH_INJECT:
+        pnv_xive2_inject_notify(xive, PNV_XIVE2_CACHE_NXC);
+        break;
+
     /*
      * Indirect invalidation
      */
@@ -1547,13 +1896,19 @@
 /*
  * Sync MMIO page (write only)
  */
-#define PNV_XIVE2_SYNC_IPI      0x000
-#define PNV_XIVE2_SYNC_HW       0x080
-#define PNV_XIVE2_SYNC_NxC      0x100
-#define PNV_XIVE2_SYNC_INT      0x180
-#define PNV_XIVE2_SYNC_OS_ESC   0x200
-#define PNV_XIVE2_SYNC_POOL_ESC 0x280
-#define PNV_XIVE2_SYNC_HARD_ESC 0x300
+#define PNV_XIVE2_SYNC_IPI              0x000
+#define PNV_XIVE2_SYNC_HW               0x080
+#define PNV_XIVE2_SYNC_NxC              0x100
+#define PNV_XIVE2_SYNC_INT              0x180
+#define PNV_XIVE2_SYNC_OS_ESC           0x200
+#define PNV_XIVE2_SYNC_POOL_ESC         0x280
+#define PNV_XIVE2_SYNC_HARD_ESC         0x300
+#define PNV_XIVE2_SYNC_NXC_LD_LCL_NCO   0x800
+#define PNV_XIVE2_SYNC_NXC_LD_LCL_CO    0x880
+#define PNV_XIVE2_SYNC_NXC_ST_LCL_NCI   0x900
+#define PNV_XIVE2_SYNC_NXC_ST_LCL_CI    0x980
+#define PNV_XIVE2_SYNC_NXC_ST_RMT_NCI   0xA00
+#define PNV_XIVE2_SYNC_NXC_ST_RMT_CI    0xA80
 
 static uint64_t pnv_xive2_ic_sync_read(void *opaque, hwaddr offset,
                                        unsigned size)
@@ -1565,22 +1920,72 @@
     return -1;
 }
 
+/*
+ * The sync MMIO space spans two pages.  The lower page is use for
+ * queue sync "poll" requests while the upper page is used for queue
+ * sync "inject" requests.  Inject requests require the HW to write
+ * a byte of all 1's to a predetermined location in memory in order
+ * to signal completion of the request.  Both pages have the same
+ * layout, so it is easiest to handle both with a single function.
+ */
 static void pnv_xive2_ic_sync_write(void *opaque, hwaddr offset,
                                     uint64_t val, unsigned size)
 {
     PnvXive2 *xive = PNV_XIVE2(opaque);
+    int inject_type;
+    hwaddr pg_offset_mask = (1ull << xive->ic_shift) - 1;
 
-    switch (offset) {
+    /* adjust offset for inject page */
+    hwaddr adj_offset = offset & pg_offset_mask;
+
+    switch (adj_offset) {
     case PNV_XIVE2_SYNC_IPI:
+        inject_type = PNV_XIVE2_QUEUE_IPI;
+        break;
     case PNV_XIVE2_SYNC_HW:
+        inject_type = PNV_XIVE2_QUEUE_HW;
+        break;
     case PNV_XIVE2_SYNC_NxC:
+        inject_type = PNV_XIVE2_QUEUE_NXC;
+        break;
     case PNV_XIVE2_SYNC_INT:
+        inject_type = PNV_XIVE2_QUEUE_INT;
+        break;
     case PNV_XIVE2_SYNC_OS_ESC:
+        inject_type = PNV_XIVE2_QUEUE_OS;
+        break;
     case PNV_XIVE2_SYNC_POOL_ESC:
+        inject_type = PNV_XIVE2_QUEUE_POOL;
+        break;
     case PNV_XIVE2_SYNC_HARD_ESC:
+        inject_type = PNV_XIVE2_QUEUE_HARD;
+        break;
+    case PNV_XIVE2_SYNC_NXC_LD_LCL_NCO:
+        inject_type = PNV_XIVE2_QUEUE_NXC_LD_LCL_NCO;
+        break;
+    case PNV_XIVE2_SYNC_NXC_LD_LCL_CO:
+        inject_type = PNV_XIVE2_QUEUE_NXC_LD_LCL_CO;
+        break;
+    case PNV_XIVE2_SYNC_NXC_ST_LCL_NCI:
+        inject_type = PNV_XIVE2_QUEUE_NXC_ST_LCL_NCI;
+        break;
+    case PNV_XIVE2_SYNC_NXC_ST_LCL_CI:
+        inject_type = PNV_XIVE2_QUEUE_NXC_ST_LCL_CI;
+        break;
+    case PNV_XIVE2_SYNC_NXC_ST_RMT_NCI:
+        inject_type = PNV_XIVE2_QUEUE_NXC_ST_RMT_NCI;
+        break;
+    case PNV_XIVE2_SYNC_NXC_ST_RMT_CI:
+        inject_type = PNV_XIVE2_QUEUE_NXC_ST_RMT_CI;
         break;
     default:
         xive2_error(xive, "SYNC: invalid write @%"HWADDR_PRIx, offset);
+        return;
+    }
+
+    /* Write Queue Sync notification byte if writing to sync inject page */
+    if ((offset & ~pg_offset_mask) != 0) {
+        pnv_xive2_inject_notify(xive, inject_type);
     }
 }
 
@@ -1814,6 +2219,12 @@
     xive->cq_regs[CQ_XIVE_CFG >> 3] |=
         SETFIELD(CQ_XIVE_CFG_HYP_HARD_BLOCK_ID, 0ull, xive->chip->chip_id);
 
+    /* VC and PC cache watch assign mechanism */
+    xive->vc_regs[VC_ENDC_CFG >> 3] =
+        SETFIELD(VC_ENDC_CFG_CACHE_WATCH_ASSIGN, 0ull, 0b0111);
+    xive->pc_regs[PC_NXC_PROC_CONFIG >> 3] =
+        SETFIELD(PC_NXC_PROC_CONFIG_WATCH_ASSIGN, 0ull, 0b0111);
+
     /* Set default page size to 64k */
     xive->ic_shift = xive->esb_shift = xive->end_shift = 16;
     xive->nvc_shift = xive->nvpg_shift = xive->tm_shift = 16;
@@ -2025,33 +2436,6 @@
 
 type_init(pnv_xive2_register_types)
 
-static void xive2_nvp_pic_print_info(Xive2Nvp *nvp, uint32_t nvp_idx,
-                                     GString *buf)
-{
-    uint8_t  eq_blk = xive_get_field32(NVP2_W5_VP_END_BLOCK, nvp->w5);
-    uint32_t eq_idx = xive_get_field32(NVP2_W5_VP_END_INDEX, nvp->w5);
-
-    if (!xive2_nvp_is_valid(nvp)) {
-        return;
-    }
-
-    g_string_append_printf(buf, "  %08x end:%02x/%04x IPB:%02x",
-                           nvp_idx, eq_blk, eq_idx,
-                           xive_get_field32(NVP2_W2_IPB, nvp->w2));
-    /*
-     * When the NVP is HW controlled, more fields are updated
-     */
-    if (xive2_nvp_is_hw(nvp)) {
-        g_string_append_printf(buf, " CPPR:%02x",
-                               xive_get_field32(NVP2_W2_CPPR, nvp->w2));
-        if (xive2_nvp_is_co(nvp)) {
-            g_string_append_printf(buf, " CO:%04x",
-                                   xive_get_field32(NVP2_W1_CO_THRID, nvp->w1));
-        }
-    }
-    g_string_append_c(buf, '\n');
-}
-
 /*
  * If the table is direct, we can compute the number of PQ entries
  * provisioned by FW.

diff --git a/hw/intc/pnv_xive2_regs.h b/hw/intc/pnv_xive2_regs.h
index 7165dc8..e8b87b3 100644
--- a/hw/intc/pnv_xive2_regs.h
+++ b/hw/intc/pnv_xive2_regs.h

@@ -232,6 +232,10 @@
 #define  VC_ESBC_FLUSH_POLL_BLOCK_ID_MASK       PPC_BITMASK(32, 35)
 #define  VC_ESBC_FLUSH_POLL_OFFSET_MASK         PPC_BITMASK(36, 63) /* 28-bit */
 
+/* ESBC cache flush inject register */
+#define X_VC_ESBC_FLUSH_INJECT                  0x142
+#define VC_ESBC_FLUSH_INJECT                    0x210
+
 /* ESBC configuration */
 #define X_VC_ESBC_CFG                           0x148
 #define VC_ESBC_CFG                             0x240
@@ -250,6 +254,10 @@
 #define  VC_EASC_FLUSH_POLL_BLOCK_ID_MASK       PPC_BITMASK(32, 35)
 #define  VC_EASC_FLUSH_POLL_OFFSET_MASK         PPC_BITMASK(36, 63) /* 28-bit */
 
+/* EASC flush inject register */
+#define X_VC_EASC_FLUSH_INJECT                  0x162
+#define VC_EASC_FLUSH_INJECT                    0x310
+
 /*
  * VC2
  */
@@ -270,6 +278,10 @@
 #define  VC_ENDC_FLUSH_POLL_BLOCK_ID_MASK       PPC_BITMASK(36, 39)
 #define  VC_ENDC_FLUSH_POLL_OFFSET_MASK         PPC_BITMASK(40, 63) /* 24-bit */
 
+/* ENDC flush inject register */
+#define X_VC_ENDC_FLUSH_INJECT                  0x182
+#define VC_ENDC_FLUSH_INJECT                    0x410
+
 /* ENDC Sync done */
 #define X_VC_ENDC_SYNC_DONE                     0x184
 #define VC_ENDC_SYNC_DONE                       0x420
@@ -283,6 +295,15 @@
 #define   VC_ENDC_SYNC_QUEUE_HARD               PPC_BIT(6)
 #define   VC_QUEUE_COUNT                        7
 
+/* ENDC cache watch assign */
+#define X_VC_ENDC_WATCH_ASSIGN                  0x186
+#define VC_ENDC_WATCH_ASSIGN                    0x430
+
+/* ENDC configuration register */
+#define X_VC_ENDC_CFG                           0x188
+#define VC_ENDC_CFG                             0x440
+#define   VC_ENDC_CFG_CACHE_WATCH_ASSIGN        PPC_BITMASK(32, 35)
+
 /* ENDC cache watch specification 0  */
 #define X_VC_ENDC_WATCH0_SPEC                   0x1A0
 #define VC_ENDC_WATCH0_SPEC                     0x500
@@ -302,6 +323,42 @@
 #define VC_ENDC_WATCH0_DATA2                    0x530
 #define VC_ENDC_WATCH0_DATA3                    0x538
 
+/* ENDC cache watch 1  */
+#define X_VC_ENDC_WATCH1_SPEC                   0x1A8
+#define VC_ENDC_WATCH1_SPEC                     0x540
+#define X_VC_ENDC_WATCH1_DATA0                  0x1AC
+#define X_VC_ENDC_WATCH1_DATA1                  0x1AD
+#define X_VC_ENDC_WATCH1_DATA2                  0x1AE
+#define X_VC_ENDC_WATCH1_DATA3                  0x1AF
+#define VC_ENDC_WATCH1_DATA0                    0x560
+#define VC_ENDC_WATCH1_DATA1                    0x568
+#define VC_ENDC_WATCH1_DATA2                    0x570
+#define VC_ENDC_WATCH1_DATA3                    0x578
+
+/* ENDC cache watch 2  */
+#define X_VC_ENDC_WATCH2_SPEC                   0x1B0
+#define VC_ENDC_WATCH2_SPEC                     0x580
+#define X_VC_ENDC_WATCH2_DATA0                  0x1B4
+#define X_VC_ENDC_WATCH2_DATA1                  0x1B5
+#define X_VC_ENDC_WATCH2_DATA2                  0x1B6
+#define X_VC_ENDC_WATCH2_DATA3                  0x1B7
+#define VC_ENDC_WATCH2_DATA0                    0x5A0
+#define VC_ENDC_WATCH2_DATA1                    0x5A8
+#define VC_ENDC_WATCH2_DATA2                    0x5B0
+#define VC_ENDC_WATCH2_DATA3                    0x5B8
+
+/* ENDC cache watch 3  */
+#define X_VC_ENDC_WATCH3_SPEC                   0x1B8
+#define VC_ENDC_WATCH3_SPEC                     0x5C0
+#define X_VC_ENDC_WATCH3_DATA0                  0x1BC
+#define X_VC_ENDC_WATCH3_DATA1                  0x1BD
+#define X_VC_ENDC_WATCH3_DATA2                  0x1BE
+#define X_VC_ENDC_WATCH3_DATA3                  0x1BF
+#define VC_ENDC_WATCH3_DATA0                    0x5E0
+#define VC_ENDC_WATCH3_DATA1                    0x5E8
+#define VC_ENDC_WATCH3_DATA2                    0x5F0
+#define VC_ENDC_WATCH3_DATA3                    0x5F8
+
 /*
  * PC LSB1
  */
@@ -358,6 +415,21 @@
 #define  PC_NXC_FLUSH_POLL_BLOCK_ID_MASK        PPC_BITMASK(36, 39)
 #define  PC_NXC_FLUSH_POLL_OFFSET_MASK          PPC_BITMASK(40, 63) /* 24-bit */
 
+/* NxC Cache flush inject */
+#define X_PC_NXC_FLUSH_INJECT                   0x282
+#define PC_NXC_FLUSH_INJECT                     0x410
+
+/* NxC Cache watch assign */
+#define X_PC_NXC_WATCH_ASSIGN                   0x286
+#define PC_NXC_WATCH_ASSIGN                     0x430
+
+/* NxC Proc config */
+#define X_PC_NXC_PROC_CONFIG                    0x28A
+#define PC_NXC_PROC_CONFIG                      0x450
+#define   PC_NXC_PROC_CONFIG_WATCH_ASSIGN       PPC_BITMASK(0, 3)
+#define   PC_NXC_PROC_CONFIG_NVG_TABLE_COMPRESS PPC_BITMASK(32, 35)
+#define   PC_NXC_PROC_CONFIG_NVC_TABLE_COMPRESS PPC_BITMASK(36, 39)
+
 /* NxC Cache Watch 0 Specification */
 #define X_PC_NXC_WATCH0_SPEC                    0x2A0
 #define PC_NXC_WATCH0_SPEC                      0x500
@@ -381,6 +453,42 @@
 #define PC_NXC_WATCH0_DATA2                     0x530
 #define PC_NXC_WATCH0_DATA3                     0x538
 
+/* NxC Cache Watch 1 */
+#define X_PC_NXC_WATCH1_SPEC                    0x2A8
+#define PC_NXC_WATCH1_SPEC                      0x540
+#define X_PC_NXC_WATCH1_DATA0                   0x2AC
+#define X_PC_NXC_WATCH1_DATA1                   0x2AD
+#define X_PC_NXC_WATCH1_DATA2                   0x2AE
+#define X_PC_NXC_WATCH1_DATA3                   0x2AF
+#define PC_NXC_WATCH1_DATA0                     0x560
+#define PC_NXC_WATCH1_DATA1                     0x568
+#define PC_NXC_WATCH1_DATA2                     0x570
+#define PC_NXC_WATCH1_DATA3                     0x578
+
+/* NxC Cache Watch 2 */
+#define X_PC_NXC_WATCH2_SPEC                    0x2B0
+#define PC_NXC_WATCH2_SPEC                      0x580
+#define X_PC_NXC_WATCH2_DATA0                   0x2B4
+#define X_PC_NXC_WATCH2_DATA1                   0x2B5
+#define X_PC_NXC_WATCH2_DATA2                   0x2B6
+#define X_PC_NXC_WATCH2_DATA3                   0x2B7
+#define PC_NXC_WATCH2_DATA0                     0x5A0
+#define PC_NXC_WATCH2_DATA1                     0x5A8
+#define PC_NXC_WATCH2_DATA2                     0x5B0
+#define PC_NXC_WATCH2_DATA3                     0x5B8
+
+/* NxC Cache Watch 3 */
+#define X_PC_NXC_WATCH3_SPEC                    0x2B8
+#define PC_NXC_WATCH3_SPEC                      0x5C0
+#define X_PC_NXC_WATCH3_DATA0                   0x2BC
+#define X_PC_NXC_WATCH3_DATA1                   0x2BD
+#define X_PC_NXC_WATCH3_DATA2                   0x2BE
+#define X_PC_NXC_WATCH3_DATA3                   0x2BF
+#define PC_NXC_WATCH3_DATA0                     0x5E0
+#define PC_NXC_WATCH3_DATA1                     0x5E8
+#define PC_NXC_WATCH3_DATA2                     0x5F0
+#define PC_NXC_WATCH3_DATA3                     0x5F8
+
 /*
  * TCTXT Registers
  */

diff --git a/hw/intc/xive.c b/hw/intc/xive.c
index 70f11f9..5a02dd8 100644
--- a/hw/intc/xive.c
+++ b/hw/intc/xive.c

@@ -692,9 +692,15 @@
         }
     }
 
-    g_string_append_printf(buf, "CPU[%04x]:   "
-                           "QW   NSR CPPR IPB LSMFB ACK# INC AGE PIPR  W2\n",
-                           cpu_index);
+    if (xive_presenter_get_config(tctx->xptr) & XIVE_PRESENTER_GEN1_TIMA_OS) {
+        g_string_append_printf(buf, "CPU[%04x]:   "
+                               "QW   NSR CPPR IPB LSMFB ACK# INC AGE PIPR"
+                               "  W2\n", cpu_index);
+    } else {
+        g_string_append_printf(buf, "CPU[%04x]:   "
+                               "QW   NSR CPPR IPB LSMFB   -  LGS  T  PIPR"
+                               "  W2\n", cpu_index);
+    }
 
     for (i = 0; i < XIVE_TM_RING_COUNT; i++) {
         char *s = xive_tctx_ring_print(&tctx->regs[i * XIVE_TM_RING_SIZE]);

diff --git a/hw/intc/xive2.c b/hw/intc/xive2.c
index 3e7238c..1f15068 100644
--- a/hw/intc/xive2.c
+++ b/hw/intc/xive2.c

@@ -89,7 +89,7 @@
     pq = xive_get_field32(END2_W1_ESn, end->w1);
 
     g_string_append_printf(buf,
-                           "  %08x %c%c %c%c%c%c%c%c%c%c%c%c "
+                           "  %08x %c%c %c%c%c%c%c%c%c%c%c%c%c %c%c "
                            "prio:%d nvp:%02x/%04x",
                            end_idx,
                            pq & XIVE_ESB_VAL_P ? 'P' : '-',
@@ -98,12 +98,15 @@
                            xive2_end_is_enqueue(end)  ? 'q' : '-',
                            xive2_end_is_notify(end)   ? 'n' : '-',
                            xive2_end_is_backlog(end)  ? 'b' : '-',
+                           xive2_end_is_precluded_escalation(end) ? 'p' : '-',
                            xive2_end_is_escalate(end) ? 'e' : '-',
                            xive2_end_is_escalate_end(end) ? 'N' : '-',
                            xive2_end_is_uncond_escalation(end)   ? 'u' : '-',
                            xive2_end_is_silent_escalation(end)   ? 's' : '-',
                            xive2_end_is_firmware1(end)   ? 'f' : '-',
                            xive2_end_is_firmware2(end)   ? 'F' : '-',
+                           xive2_end_is_ignore(end) ? 'i' : '-',
+                           xive2_end_is_crowd(end)  ? 'c' : '-',
                            priority, nvp_blk, nvp_idx);
 
     if (qaddr_base) {
@@ -137,6 +140,32 @@
                            (uint32_t) xive_get_field64(EAS2_END_DATA, eas->w));
 }
 
+void xive2_nvp_pic_print_info(Xive2Nvp *nvp, uint32_t nvp_idx, GString *buf)
+{
+    uint8_t  eq_blk = xive_get_field32(NVP2_W5_VP_END_BLOCK, nvp->w5);
+    uint32_t eq_idx = xive_get_field32(NVP2_W5_VP_END_INDEX, nvp->w5);
+
+    if (!xive2_nvp_is_valid(nvp)) {
+        return;
+    }
+
+    g_string_append_printf(buf, "  %08x end:%02x/%04x IPB:%02x",
+                           nvp_idx, eq_blk, eq_idx,
+                           xive_get_field32(NVP2_W2_IPB, nvp->w2));
+    /*
+     * When the NVP is HW controlled, more fields are updated
+     */
+    if (xive2_nvp_is_hw(nvp)) {
+        g_string_append_printf(buf, " CPPR:%02x",
+                               xive_get_field32(NVP2_W2_CPPR, nvp->w2));
+        if (xive2_nvp_is_co(nvp)) {
+            g_string_append_printf(buf, " CO:%04x",
+                                   xive_get_field32(NVP2_W1_CO_THRID, nvp->w1));
+        }
+    }
+    g_string_append_c(buf, '\n');
+}
+
 static void xive2_end_enqueue(Xive2End *end, uint32_t data)
 {
     uint64_t qaddr_base = xive2_end_qaddr(end);
@@ -650,7 +679,7 @@
     }
 
     found = xive_presenter_notify(xrtr->xfb, format, nvp_blk, nvp_idx,
-                          xive_get_field32(END2_W6_IGNORE, end.w7),
+                          xive2_end_is_ignore(&end),
                           priority,
                           xive_get_field32(END2_W7_F1_LOG_SERVER_ID, end.w7));
 

diff --git a/hw/ppc/Kconfig b/hw/ppc/Kconfig
index 347212f..c235519 100644
--- a/hw/ppc/Kconfig
+++ b/hw/ppc/Kconfig

@@ -39,6 +39,9 @@
     select PCI_POWERNV
     select PCA9552
     select PCA9554
+    select SSI
+    select SSI_M25P80
+    select PNV_SPI
 
 config PPC405
     bool

diff --git a/hw/ppc/meson.build b/hw/ppc/meson.build
index 3ebbf32..7cd9189 100644
--- a/hw/ppc/meson.build
+++ b/hw/ppc/meson.build

@@ -42,6 +42,7 @@
 ppc_ss.add(when: 'CONFIG_POWERNV', if_true: files(
   'pnv.c',
   'pnv_xscom.c',
+  'pnv_adu.c',
   'pnv_core.c',
   'pnv_i2c.c',
   'pnv_lpc.c',

diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 6b41d1d..3526852 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c

@@ -141,9 +141,9 @@
     CPUPPCState *env = &cpu->env;
     PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
     PnvChipClass *pnv_cc = PNV_CHIP_GET_CLASS(chip);
-    g_autofree uint32_t *servers_prop = g_new(uint32_t, smt_threads);
+    uint32_t *servers_prop;
     int i;
-    uint32_t pir;
+    uint32_t pir, tir;
     uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
                        0xffffffff, 0xffffffff};
     uint32_t tbfreq = PNV_TIMEBASE_FREQ;
@@ -154,7 +154,10 @@
     char *nodename;
     int cpus_offset = get_cpus_node(fdt);
 
-    pir = pnv_cc->chip_pir(chip, pc->hwid, 0);
+    pnv_cc->get_pir_tir(chip, pc->hwid, 0, &pir, &tir);
+
+    /* Only one DT node per (big) core */
+    g_assert(tir == 0);
 
     nodename = g_strdup_printf("%s@%x", dc->fw_name, pir);
     offset = fdt_add_subnode(fdt, cpus_offset, nodename);
@@ -235,11 +238,28 @@
     }
 
     /* Build interrupt servers properties */
-    for (i = 0; i < smt_threads; i++) {
-        servers_prop[i] = cpu_to_be32(pnv_cc->chip_pir(chip, pc->hwid, i));
+    if (pc->big_core) {
+        servers_prop = g_new(uint32_t, smt_threads * 2);
+        for (i = 0; i < smt_threads; i++) {
+            pnv_cc->get_pir_tir(chip, pc->hwid, i, &pir, NULL);
+            servers_prop[i * 2] = cpu_to_be32(pir);
+
+            pnv_cc->get_pir_tir(chip, pc->hwid + 1, i, &pir, NULL);
+            servers_prop[i * 2 + 1] = cpu_to_be32(pir);
+        }
+        _FDT((fdt_setprop(fdt, offset, "ibm,ppc-interrupt-server#s",
+                          servers_prop, sizeof(*servers_prop) * smt_threads
+                                        * 2)));
+    } else {
+        servers_prop = g_new(uint32_t, smt_threads);
+        for (i = 0; i < smt_threads; i++) {
+            pnv_cc->get_pir_tir(chip, pc->hwid, i, &pir, NULL);
+            servers_prop[i] = cpu_to_be32(pir);
+        }
+        _FDT((fdt_setprop(fdt, offset, "ibm,ppc-interrupt-server#s",
+                          servers_prop, sizeof(*servers_prop) * smt_threads)));
     }
-    _FDT((fdt_setprop(fdt, offset, "ibm,ppc-interrupt-server#s",
-                       servers_prop, sizeof(*servers_prop) * smt_threads)));
+    g_free(servers_prop);
 
     return offset;
 }
@@ -248,14 +268,17 @@
                        uint32_t nr_threads)
 {
     PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
-    uint32_t pir = pcc->chip_pir(chip, hwid, 0);
-    uint64_t addr = PNV_ICP_BASE(chip) | (pir << 12);
+    uint32_t pir;
+    uint64_t addr;
     char *name;
     const char compat[] = "IBM,power8-icp\0IBM,ppc-xicp";
     uint32_t irange[2], i, rsize;
     uint64_t *reg;
     int offset;
 
+    pcc->get_pir_tir(chip, hwid, 0, &pir, NULL);
+    addr = PNV_ICP_BASE(chip) | (pir << 12);
+
     irange[0] = cpu_to_be32(pir);
     irange[1] = cpu_to_be32(nr_threads);
 
@@ -385,6 +408,10 @@
 
         _FDT((fdt_setprop(fdt, offset, "ibm,pa-features",
                            pa_features_300, sizeof(pa_features_300))));
+
+        if (pnv_core->big_core) {
+            i++; /* Big-core groups two QEMU cores */
+        }
     }
 
     if (chip->ram_size) {
@@ -446,6 +473,10 @@
 
         _FDT((fdt_setprop(fdt, offset, "ibm,pa-features",
                            pa_features_31, sizeof(pa_features_31))));
+
+        if (pnv_core->big_core) {
+            i++; /* Big-core groups two QEMU cores */
+        }
     }
 
     if (chip->ram_size) {
@@ -727,7 +758,8 @@
     Pnv8Chip *chip8 = PNV8_CHIP(chip);
     qemu_irq irq = qdev_get_gpio_in(DEVICE(&chip8->psi), PSIHB_IRQ_EXTERNAL);
 
-    qdev_connect_gpio_out(DEVICE(&chip8->lpc), 0, irq);
+    qdev_connect_gpio_out_named(DEVICE(&chip8->lpc), "LPCHC", 0, irq);
+
     return pnv_lpc_isa_create(&chip8->lpc, true, errp);
 }
 
@@ -736,25 +768,48 @@
     Pnv8Chip *chip8 = PNV8_CHIP(chip);
     qemu_irq irq = qdev_get_gpio_in(DEVICE(&chip8->psi), PSIHB_IRQ_LPC_I2C);
 
-    qdev_connect_gpio_out(DEVICE(&chip8->lpc), 0, irq);
+    qdev_connect_gpio_out_named(DEVICE(&chip8->lpc), "LPCHC", 0, irq);
+
     return pnv_lpc_isa_create(&chip8->lpc, false, errp);
 }
 
 static ISABus *pnv_chip_power9_isa_create(PnvChip *chip, Error **errp)
 {
     Pnv9Chip *chip9 = PNV9_CHIP(chip);
-    qemu_irq irq = qdev_get_gpio_in(DEVICE(&chip9->psi), PSIHB9_IRQ_LPCHC);
+    qemu_irq irq;
 
-    qdev_connect_gpio_out(DEVICE(&chip9->lpc), 0, irq);
+    irq = qdev_get_gpio_in(DEVICE(&chip9->psi), PSIHB9_IRQ_LPCHC);
+    qdev_connect_gpio_out_named(DEVICE(&chip9->lpc), "LPCHC", 0, irq);
+
+    irq = qdev_get_gpio_in(DEVICE(&chip9->psi), PSIHB9_IRQ_LPC_SIRQ0);
+    qdev_connect_gpio_out_named(DEVICE(&chip9->lpc), "SERIRQ", 0, irq);
+    irq = qdev_get_gpio_in(DEVICE(&chip9->psi), PSIHB9_IRQ_LPC_SIRQ1);
+    qdev_connect_gpio_out_named(DEVICE(&chip9->lpc), "SERIRQ", 1, irq);
+    irq = qdev_get_gpio_in(DEVICE(&chip9->psi), PSIHB9_IRQ_LPC_SIRQ2);
+    qdev_connect_gpio_out_named(DEVICE(&chip9->lpc), "SERIRQ", 2, irq);
+    irq = qdev_get_gpio_in(DEVICE(&chip9->psi), PSIHB9_IRQ_LPC_SIRQ3);
+    qdev_connect_gpio_out_named(DEVICE(&chip9->lpc), "SERIRQ", 3, irq);
+
     return pnv_lpc_isa_create(&chip9->lpc, false, errp);
 }
 
 static ISABus *pnv_chip_power10_isa_create(PnvChip *chip, Error **errp)
 {
     Pnv10Chip *chip10 = PNV10_CHIP(chip);
-    qemu_irq irq = qdev_get_gpio_in(DEVICE(&chip10->psi), PSIHB9_IRQ_LPCHC);
+    qemu_irq irq;
 
-    qdev_connect_gpio_out(DEVICE(&chip10->lpc), 0, irq);
+    irq = qdev_get_gpio_in(DEVICE(&chip10->psi), PSIHB9_IRQ_LPCHC);
+    qdev_connect_gpio_out_named(DEVICE(&chip10->lpc), "LPCHC", 0, irq);
+
+    irq = qdev_get_gpio_in(DEVICE(&chip10->psi), PSIHB9_IRQ_LPC_SIRQ0);
+    qdev_connect_gpio_out_named(DEVICE(&chip10->lpc), "SERIRQ", 0, irq);
+    irq = qdev_get_gpio_in(DEVICE(&chip10->psi), PSIHB9_IRQ_LPC_SIRQ1);
+    qdev_connect_gpio_out_named(DEVICE(&chip10->lpc), "SERIRQ", 1, irq);
+    irq = qdev_get_gpio_in(DEVICE(&chip10->psi), PSIHB9_IRQ_LPC_SIRQ2);
+    qdev_connect_gpio_out_named(DEVICE(&chip10->lpc), "SERIRQ", 2, irq);
+    irq = qdev_get_gpio_in(DEVICE(&chip10->psi), PSIHB9_IRQ_LPC_SIRQ3);
+    qdev_connect_gpio_out_named(DEVICE(&chip10->lpc), "SERIRQ", 3, irq);
+
     return pnv_lpc_isa_create(&chip10->lpc, false, errp);
 }
 
@@ -875,6 +930,7 @@
     PnvMachineState *pnv = PNV_MACHINE(machine);
     MachineClass *mc = MACHINE_GET_CLASS(machine);
     PnvMachineClass *pmc = PNV_MACHINE_GET_CLASS(machine);
+    int max_smt_threads = pmc->max_smt_threads;
     char *fw_filename;
     long fw_size;
     uint64_t chip_ram_start = 0;
@@ -970,20 +1026,52 @@
         exit(1);
     }
 
+    /* Set lpar-per-core mode if lpar-per-thread is not supported */
+    if (!pmc->has_lpar_per_thread) {
+        pnv->lpar_per_core = true;
+    }
+
     pnv->num_chips =
         machine->smp.max_cpus / (machine->smp.cores * machine->smp.threads);
 
-    if (machine->smp.threads > 8) {
-        error_report("Cannot support more than 8 threads/core "
-                     "on a powernv machine");
+    if (pnv->big_core) {
+        if (machine->smp.threads % 2 == 1) {
+            error_report("Cannot support %d threads with big-core option "
+                         "because it must be an even number",
+                         machine->smp.threads);
+            exit(1);
+        }
+        max_smt_threads *= 2;
+    }
+
+    if (machine->smp.threads > max_smt_threads) {
+        error_report("Cannot support more than %d threads/core "
+                     "on %s machine", max_smt_threads, mc->desc);
+        if (pmc->max_smt_threads == 4) {
+            error_report("(use big-core=on for 8 threads per core)");
+        }
         exit(1);
     }
+
+    if (pnv->big_core) {
+        /*
+         * powernv models PnvCore as a SMT4 core. Big-core requires 2xPnvCore
+         * per core, so adjust topology here. pnv_dt_core() processor
+         * device-tree and TCG SMT code make the 2 cores appear as one big core
+         * from software point of view. pnv pervasive models and xscoms tend to
+         * see the big core as 2 small core halves.
+         */
+        machine->smp.cores *= 2;
+        machine->smp.threads /= 2;
+    }
+
     if (!is_power_of_2(machine->smp.threads)) {
-        error_report("Cannot support %d threads/core on a powernv"
+        error_report("Cannot support %d threads/core on a powernv "
                      "machine because it must be a power of 2",
                      machine->smp.threads);
         exit(1);
     }
+
     /*
      * TODO: should we decide on how many chips we can create based
      * on #cores and Venice vs. Murano vs. Naples chip type etc...,
@@ -1017,6 +1105,10 @@
                                 &error_fatal);
         object_property_set_int(chip, "nr-threads", machine->smp.threads,
                                 &error_fatal);
+        object_property_set_bool(chip, "big-core", pnv->big_core,
+                                &error_fatal);
+        object_property_set_bool(chip, "lpar-per-core", pnv->lpar_per_core,
+                                &error_fatal);
         /*
          * The POWER8 machine use the XICS interrupt interface.
          * Propagate the XICS fabric to the chip and its controllers.
@@ -1079,10 +1171,16 @@
  *   25:28  Core number
  *   29:31  Thread ID
  */
-static uint32_t pnv_chip_pir_p8(PnvChip *chip, uint32_t core_id,
-                                uint32_t thread_id)
+static void pnv_get_pir_tir_p8(PnvChip *chip,
+                                uint32_t core_id, uint32_t thread_id,
+                                uint32_t *pir, uint32_t *tir)
 {
-    return (chip->chip_id << 7) | (core_id << 3) | thread_id;
+    if (pir) {
+        *pir = (chip->chip_id << 7) | (core_id << 3) | thread_id;
+    }
+    if (tir) {
+        *tir = thread_id;
+    }
 }
 
 static void pnv_chip_power8_intc_create(PnvChip *chip, PowerPCCPU *cpu,
@@ -1134,14 +1232,26 @@
  *
  * We only care about the lower bits. uint32_t is fine for the moment.
  */
-static uint32_t pnv_chip_pir_p9(PnvChip *chip, uint32_t core_id,
-                                uint32_t thread_id)
+static void pnv_get_pir_tir_p9(PnvChip *chip,
+                                uint32_t core_id, uint32_t thread_id,
+                                uint32_t *pir, uint32_t *tir)
 {
-    if (chip->nr_threads == 8) {
-        return (chip->chip_id << 8) | ((thread_id & 1) << 2) | (core_id << 3) |
-               (thread_id >> 1);
+    if (chip->big_core) {
+        /* Big-core interleaves thread ID between small-cores */
+        thread_id <<= 1;
+        thread_id |= core_id & 1;
+        core_id >>= 1;
+
+        if (pir) {
+            *pir = (chip->chip_id << 8) | (core_id << 3) | thread_id;
+        }
     } else {
-        return (chip->chip_id << 8) | (core_id << 2) | thread_id;
+        if (pir) {
+            *pir = (chip->chip_id << 8) | (core_id << 2) | thread_id;
+        }
+    }
+    if (tir) {
+        *tir = thread_id;
     }
 }
 
@@ -1156,14 +1266,26 @@
  *
  * We only care about the lower bits. uint32_t is fine for the moment.
  */
-static uint32_t pnv_chip_pir_p10(PnvChip *chip, uint32_t core_id,
-                                 uint32_t thread_id)
+static void pnv_get_pir_tir_p10(PnvChip *chip,
+                                uint32_t core_id, uint32_t thread_id,
+                                uint32_t *pir, uint32_t *tir)
 {
-    if (chip->nr_threads == 8) {
-        return (chip->chip_id << 8) | ((core_id / 4) << 4) |
-               ((core_id % 2) << 3) | thread_id;
+    if (chip->big_core) {
+        /* Big-core interleaves thread ID between small-cores */
+        thread_id <<= 1;
+        thread_id |= core_id & 1;
+        core_id >>= 1;
+
+        if (pir) {
+            *pir = (chip->chip_id << 8) | (core_id << 3) | thread_id;
+        }
     } else {
-        return (chip->chip_id << 8) | (core_id << 2) | thread_id;
+        if (pir) {
+            *pir = (chip->chip_id << 8) | (core_id << 2) | thread_id;
+        }
+    }
+    if (tir) {
+        *tir = thread_id;
     }
 }
 
@@ -1343,8 +1465,11 @@
         int core_hwid = CPU_CORE(pnv_core)->core_id;
 
         for (j = 0; j < CPU_CORE(pnv_core)->nr_threads; j++) {
-            uint32_t pir = pcc->chip_pir(chip, core_hwid, j);
-            PnvICPState *icp = PNV_ICP(xics_icp_get(chip8->xics, pir));
+            uint32_t pir;
+            PnvICPState *icp;
+
+            pcc->get_pir_tir(chip, core_hwid, j, &pir, NULL);
+            icp = PNV_ICP(xics_icp_get(chip8->xics, pir));
 
             memory_region_add_subregion(&chip8->icp_mmio, pir << 12,
                                         &icp->mmio);
@@ -1456,7 +1581,7 @@
     k->chip_cfam_id = 0x221ef04980000000ull;  /* P8 Murano DD2.1 */
     k->cores_mask = POWER8E_CORE_MASK;
     k->num_phbs = 3;
-    k->chip_pir = pnv_chip_pir_p8;
+    k->get_pir_tir = pnv_get_pir_tir_p8;
     k->intc_create = pnv_chip_power8_intc_create;
     k->intc_reset = pnv_chip_power8_intc_reset;
     k->intc_destroy = pnv_chip_power8_intc_destroy;
@@ -1480,7 +1605,7 @@
     k->chip_cfam_id = 0x220ea04980000000ull; /* P8 Venice DD2.0 */
     k->cores_mask = POWER8_CORE_MASK;
     k->num_phbs = 3;
-    k->chip_pir = pnv_chip_pir_p8;
+    k->get_pir_tir = pnv_get_pir_tir_p8;
     k->intc_create = pnv_chip_power8_intc_create;
     k->intc_reset = pnv_chip_power8_intc_reset;
     k->intc_destroy = pnv_chip_power8_intc_destroy;
@@ -1504,7 +1629,7 @@
     k->chip_cfam_id = 0x120d304980000000ull;  /* P8 Naples DD1.0 */
     k->cores_mask = POWER8_CORE_MASK;
     k->num_phbs = 4;
-    k->chip_pir = pnv_chip_pir_p8;
+    k->get_pir_tir = pnv_get_pir_tir_p8;
     k->intc_create = pnv_chip_power8_intc_create;
     k->intc_reset = pnv_chip_power8_intc_reset;
     k->intc_destroy = pnv_chip_power8_intc_destroy;
@@ -1527,6 +1652,7 @@
     PnvChipClass *pcc = PNV_CHIP_GET_CLASS(obj);
     int i;
 
+    object_initialize_child(obj, "adu",  &chip9->adu, TYPE_PNV_ADU);
     object_initialize_child(obj, "xive", &chip9->xive, TYPE_PNV_XIVE);
     object_property_add_alias(obj, "xive-fabric", OBJECT(&chip9->xive),
                               "xive-fabric");
@@ -1637,6 +1763,15 @@
         return;
     }
 
+    /* ADU */
+    object_property_set_link(OBJECT(&chip9->adu), "lpc", OBJECT(&chip9->lpc),
+                             &error_abort);
+    if (!qdev_realize(DEVICE(&chip9->adu), NULL, errp)) {
+        return;
+    }
+    pnv_xscom_add_subregion(chip, PNV9_XSCOM_ADU_BASE,
+                            &chip9->adu.xscom_regs);
+
     pnv_chip_quad_realize(chip9, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
@@ -1777,7 +1912,7 @@
 
     k->chip_cfam_id = 0x220d104900008000ull; /* P9 Nimbus DD2.0 */
     k->cores_mask = POWER9_CORE_MASK;
-    k->chip_pir = pnv_chip_pir_p9;
+    k->get_pir_tir = pnv_get_pir_tir_p9;
     k->intc_create = pnv_chip_power9_intc_create;
     k->intc_reset = pnv_chip_power9_intc_reset;
     k->intc_destroy = pnv_chip_power9_intc_destroy;
@@ -1803,6 +1938,7 @@
     PnvChipClass *pcc = PNV_CHIP_GET_CLASS(obj);
     int i;
 
+    object_initialize_child(obj, "adu",  &chip10->adu, TYPE_PNV_ADU);
     object_initialize_child(obj, "xive", &chip10->xive, TYPE_PNV_XIVE2);
     object_property_add_alias(obj, "xive-fabric", OBJECT(&chip10->xive),
                               "xive-fabric");
@@ -1826,6 +1962,11 @@
     for (i = 0; i < pcc->i2c_num_engines; i++) {
         object_initialize_child(obj, "i2c[*]", &chip10->i2c[i], TYPE_PNV_I2C);
     }
+
+    for (i = 0; i < PNV10_CHIP_MAX_PIB_SPIC; i++) {
+        object_initialize_child(obj, "pib_spic[*]", &chip10->pib_spic[i],
+                                TYPE_PNV_SPI);
+    }
 }
 
 static void pnv_chip_power10_quad_realize(Pnv10Chip *chip10, Error **errp)
@@ -1895,6 +2036,15 @@
         return;
     }
 
+    /* ADU */
+    object_property_set_link(OBJECT(&chip10->adu), "lpc", OBJECT(&chip10->lpc),
+                             &error_abort);
+    if (!qdev_realize(DEVICE(&chip10->adu), NULL, errp)) {
+        return;
+    }
+    pnv_xscom_add_subregion(chip, PNV10_XSCOM_ADU_BASE,
+                            &chip10->adu.xscom_regs);
+
     pnv_chip_power10_quad_realize(chip10, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
@@ -2040,7 +2190,21 @@
                               qdev_get_gpio_in(DEVICE(&chip10->psi),
                                                PSIHB9_IRQ_SBE_I2C));
     }
-
+    /* PIB SPI Controller */
+    for (i = 0; i < PNV10_CHIP_MAX_PIB_SPIC; i++) {
+        object_property_set_int(OBJECT(&chip10->pib_spic[i]), "spic_num",
+                                i, &error_fatal);
+        /* pib_spic[2] connected to 25csm04 which implements 1 byte transfer */
+        object_property_set_int(OBJECT(&chip10->pib_spic[i]), "transfer_len",
+                                (i == 2) ? 1 : 4, &error_fatal);
+        if (!sysbus_realize(SYS_BUS_DEVICE(OBJECT
+                                        (&chip10->pib_spic[i])), errp)) {
+            return;
+        }
+        pnv_xscom_add_subregion(chip, PNV10_XSCOM_PIB_SPIC_BASE +
+                                i * PNV10_XSCOM_PIB_SPIC_SIZE,
+                                &chip10->pib_spic[i].xscom_spic_regs);
+    }
 }
 
 static void pnv_rainier_i2c_init(PnvMachineState *pnv)
@@ -2087,9 +2251,9 @@
     PnvChipClass *k = PNV_CHIP_CLASS(klass);
     static const int i2c_ports_per_engine[PNV10_CHIP_MAX_I2C] = {14, 14, 2, 16};
 
-    k->chip_cfam_id = 0x120da04900008000ull; /* P10 DD1.0 (with NX) */
+    k->chip_cfam_id = 0x220da04980000000ull; /* P10 DD2.0 (with NX) */
     k->cores_mask = POWER10_CORE_MASK;
-    k->chip_pir = pnv_chip_pir_p10;
+    k->get_pir_tir = pnv_get_pir_tir_p10;
     k->intc_create = pnv_chip_power10_intc_create;
     k->intc_reset = pnv_chip_power10_intc_reset;
     k->intc_destroy = pnv_chip_power10_intc_destroy;
@@ -2108,7 +2272,8 @@
                                     &k->parent_realize);
 }
 
-static void pnv_chip_core_sanitize(PnvChip *chip, Error **errp)
+static void pnv_chip_core_sanitize(PnvMachineState *pnv, PnvChip *chip,
+                                   Error **errp)
 {
     PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
     int cores_max;
@@ -2129,6 +2294,17 @@
     }
     chip->cores_mask &= pcc->cores_mask;
 
+    /* Ensure small-cores a paired up in big-core mode */
+    if (pnv->big_core) {
+        uint64_t even_cores = chip->cores_mask & 0x5555555555555555ULL;
+        uint64_t odd_cores = chip->cores_mask & 0xaaaaaaaaaaaaaaaaULL;
+
+        if (even_cores ^ (odd_cores >> 1)) {
+            error_setg(errp, "warning: unpaired cores in big-core mode !");
+            return;
+        }
+    }
+
     /* now that we have a sane layout, let check the number of cores */
     cores_max = ctpop64(chip->cores_mask);
     if (chip->nr_cores > cores_max) {
@@ -2140,11 +2316,12 @@
 
 static void pnv_chip_core_realize(PnvChip *chip, Error **errp)
 {
+    PnvMachineState *pnv = PNV_MACHINE(qdev_get_machine());
+    PnvMachineClass *pmc = PNV_MACHINE_GET_CLASS(pnv);
     Error *error = NULL;
     PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
     const char *typename = pnv_chip_core_typename(chip);
     int i, core_hwid;
-    PnvMachineState *pnv = PNV_MACHINE(qdev_get_machine());
 
     if (!object_class_by_name(typename)) {
         error_setg(errp, "Unable to find PowerNV CPU Core '%s'", typename);
@@ -2152,7 +2329,7 @@
     }
 
     /* Cores */
-    pnv_chip_core_sanitize(chip, &error);
+    pnv_chip_core_sanitize(pnv, chip, &error);
     if (error) {
         error_propagate(errp, error);
         return;
@@ -2183,8 +2360,15 @@
                                 &error_fatal);
         object_property_set_int(OBJECT(pnv_core), "hrmor", pnv->fw_load_addr,
                                 &error_fatal);
+        object_property_set_bool(OBJECT(pnv_core), "big-core", chip->big_core,
+                                &error_fatal);
+        object_property_set_bool(OBJECT(pnv_core), "quirk-tb-big-core",
+                                pmc->quirk_tb_big_core, &error_fatal);
+        object_property_set_bool(OBJECT(pnv_core), "lpar-per-core",
+                                chip->lpar_per_core, &error_fatal);
         object_property_set_link(OBJECT(pnv_core), "chip", OBJECT(chip),
                                  &error_abort);
+
         qdev_realize(DEVICE(pnv_core), NULL, &error_fatal);
 
         /* Each core has an XSCOM MMIO region */
@@ -2216,6 +2400,8 @@
     DEFINE_PROP_UINT32("nr-cores", PnvChip, nr_cores, 1),
     DEFINE_PROP_UINT64("cores-mask", PnvChip, cores_mask, 0x0),
     DEFINE_PROP_UINT32("nr-threads", PnvChip, nr_threads, 1),
+    DEFINE_PROP_BOOL("big-core", PnvChip, big_core, false),
+    DEFINE_PROP_BOOL("lpar-per-core", PnvChip, lpar_per_core, false),
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -2424,6 +2610,46 @@
     return total_count;
 }
 
+static bool pnv_machine_get_big_core(Object *obj, Error **errp)
+{
+    PnvMachineState *pnv = PNV_MACHINE(obj);
+    return pnv->big_core;
+}
+
+static void pnv_machine_set_big_core(Object *obj, bool value, Error **errp)
+{
+    PnvMachineState *pnv = PNV_MACHINE(obj);
+    pnv->big_core = value;
+}
+
+static bool pnv_machine_get_lpar_per_core(Object *obj, Error **errp)
+{
+    PnvMachineState *pnv = PNV_MACHINE(obj);
+    return pnv->lpar_per_core;
+}
+
+static void pnv_machine_set_lpar_per_core(Object *obj, bool value, Error **errp)
+{
+    PnvMachineState *pnv = PNV_MACHINE(obj);
+    pnv->lpar_per_core = value;
+}
+
+static bool pnv_machine_get_hb(Object *obj, Error **errp)
+{
+    PnvMachineState *pnv = PNV_MACHINE(obj);
+
+    return !!pnv->fw_load_addr;
+}
+
+static void pnv_machine_set_hb(Object *obj, bool value, Error **errp)
+{
+    PnvMachineState *pnv = PNV_MACHINE(obj);
+
+    if (value) {
+        pnv->fw_load_addr = 0x8000000;
+    }
+}
+
 static void pnv_machine_power8_class_init(ObjectClass *oc, void *data)
 {
     MachineClass *mc = MACHINE_CLASS(oc);
@@ -2446,6 +2672,9 @@
 
     pmc->compat = compat;
     pmc->compat_size = sizeof(compat);
+    pmc->max_smt_threads = 8;
+    /* POWER8 is always lpar-per-core mode */
+    pmc->has_lpar_per_thread = false;
 
     machine_class_allow_dynamic_sysbus_dev(mc, TYPE_PNV_PHB);
 }
@@ -2470,9 +2699,23 @@
 
     pmc->compat = compat;
     pmc->compat_size = sizeof(compat);
+    pmc->max_smt_threads = 4;
+    pmc->has_lpar_per_thread = true;
     pmc->dt_power_mgt = pnv_dt_power_mgt;
 
     machine_class_allow_dynamic_sysbus_dev(mc, TYPE_PNV_PHB);
+
+    object_class_property_add_bool(oc, "big-core",
+                                   pnv_machine_get_big_core,
+                                   pnv_machine_set_big_core);
+    object_class_property_set_description(oc, "big-core",
+                              "Use big-core (aka fused-core) mode");
+
+    object_class_property_add_bool(oc, "lpar-per-core",
+                                   pnv_machine_get_lpar_per_core,
+                                   pnv_machine_set_lpar_per_core);
+    object_class_property_set_description(oc, "lpar-per-core",
+                              "Use 1 LPAR per core mode");
 }
 
 static void pnv_machine_p10_common_class_init(ObjectClass *oc, void *data)
@@ -2494,6 +2737,9 @@
 
     pmc->compat = compat;
     pmc->compat_size = sizeof(compat);
+    pmc->max_smt_threads = 4;
+    pmc->has_lpar_per_thread = true;
+    pmc->quirk_tb_big_core = true;
     pmc->dt_power_mgt = pnv_dt_power_mgt;
 
     xfc->match_nvt = pnv10_xive_match_nvt;
@@ -2507,6 +2753,23 @@
 
     pnv_machine_p10_common_class_init(oc, data);
     mc->desc = "IBM PowerNV (Non-Virtualized) POWER10";
+
+    /*
+     * This is the parent of POWER10 Rainier class, so properies go here
+     * rather than common init (which would add them to both parent and
+     * child which is invalid).
+     */
+    object_class_property_add_bool(oc, "big-core",
+                                   pnv_machine_get_big_core,
+                                   pnv_machine_set_big_core);
+    object_class_property_set_description(oc, "big-core",
+                              "Use big-core (aka fused-core) mode");
+
+    object_class_property_add_bool(oc, "lpar-per-core",
+                                   pnv_machine_get_lpar_per_core,
+                                   pnv_machine_set_lpar_per_core);
+    object_class_property_set_description(oc, "lpar-per-core",
+                              "Use 1 LPAR per core mode");
 }
 
 static void pnv_machine_p10_rainier_class_init(ObjectClass *oc, void *data)
@@ -2519,22 +2782,6 @@
     pmc->i2c_init = pnv_rainier_i2c_init;
 }
 
-static bool pnv_machine_get_hb(Object *obj, Error **errp)
-{
-    PnvMachineState *pnv = PNV_MACHINE(obj);
-
-    return !!pnv->fw_load_addr;
-}
-
-static void pnv_machine_set_hb(Object *obj, bool value, Error **errp)
-{
-    PnvMachineState *pnv = PNV_MACHINE(obj);
-
-    if (value) {
-        pnv->fw_load_addr = 0x8000000;
-    }
-}
-
 static void pnv_cpu_do_nmi_on_cpu(CPUState *cs, run_on_cpu_data arg)
 {
     CPUPPCState *env = cpu_env(cs);
@@ -2561,11 +2808,23 @@
          */
         env->spr[SPR_SRR1] |= SRR1_WAKESCOM;
     }
+    if (arg.host_int == 1) {
+        cpu_resume(cs);
+    }
+}
+
+/*
+ * Send a SRESET (NMI) interrupt to the CPU, and resume execution if it was
+ * paused.
+ */
+void pnv_cpu_do_nmi_resume(CPUState *cs)
+{
+    async_run_on_cpu(cs, pnv_cpu_do_nmi_on_cpu, RUN_ON_CPU_HOST_INT(1));
 }
 
 static void pnv_cpu_do_nmi(PnvChip *chip, PowerPCCPU *cpu, void *opaque)
 {
-    async_run_on_cpu(CPU(cpu), pnv_cpu_do_nmi_on_cpu, RUN_ON_CPU_NULL);
+    async_run_on_cpu(CPU(cpu), pnv_cpu_do_nmi_on_cpu, RUN_ON_CPU_HOST_INT(0));
 }
 
 static void pnv_nmi(NMIState *n, int cpu_index, Error **errp)

diff --git a/hw/ppc/pnv_adu.c b/hw/ppc/pnv_adu.c
new file mode 100644
index 0000000..81b7d6e
--- /dev/null
+++ b/hw/ppc/pnv_adu.c

@@ -0,0 +1,206 @@
+/*
+ * QEMU PowerPC PowerNV ADU unit
+ *
+ * The ADU unit actually implements XSCOM, which is the bridge between MMIO
+ * and PIB. However it also includes control and status registers and other
+ * functions that are exposed as PIB (xscom) registers.
+ *
+ * To keep things simple, pnv_xscom.c remains the XSCOM bridge
+ * implementation, and pnv_adu.c implements the ADU registers and other
+ * functions.
+ *
+ * Copyright (c) 2024, IBM Corporation.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+
+#include "hw/qdev-properties.h"
+#include "hw/ppc/pnv.h"
+#include "hw/ppc/pnv_adu.h"
+#include "hw/ppc/pnv_chip.h"
+#include "hw/ppc/pnv_lpc.h"
+#include "hw/ppc/pnv_xscom.h"
+#include "trace.h"
+
+#define ADU_LPC_BASE_REG     0x40
+#define ADU_LPC_CMD_REG      0x41
+#define ADU_LPC_DATA_REG     0x42
+#define ADU_LPC_STATUS_REG   0x43
+
+static uint64_t pnv_adu_xscom_read(void *opaque, hwaddr addr, unsigned width)
+{
+    PnvADU *adu = PNV_ADU(opaque);
+    uint32_t offset = addr >> 3;
+    uint64_t val = 0;
+
+    switch (offset) {
+    case 0x18:     /* Receive status reg */
+    case 0x12:     /* log register */
+    case 0x13:     /* error register */
+        break;
+    case ADU_LPC_BASE_REG:
+        /*
+         * LPC Address Map in Pervasive ADU Workbook
+         *
+         * return PNV10_LPCM_BASE(chip) & PPC_BITMASK(8, 31);
+         * XXX: implement as class property, or get from LPC?
+         */
+        qemu_log_mask(LOG_UNIMP, "ADU: LPC_BASE_REG is not implemented\n");
+        break;
+    case ADU_LPC_CMD_REG:
+        val = adu->lpc_cmd_reg;
+        break;
+    case ADU_LPC_DATA_REG:
+        val = adu->lpc_data_reg;
+        break;
+    case ADU_LPC_STATUS_REG:
+        val = PPC_BIT(0); /* ack / done */
+        break;
+
+    default:
+        qemu_log_mask(LOG_UNIMP, "ADU Unimplemented read register: Ox%08x\n",
+                                                                     offset);
+    }
+
+    trace_pnv_adu_xscom_read(addr, val);
+
+    return val;
+}
+
+static bool lpc_cmd_read(PnvADU *adu)
+{
+    return !!(adu->lpc_cmd_reg & PPC_BIT(0));
+}
+
+static bool lpc_cmd_write(PnvADU *adu)
+{
+    return !lpc_cmd_read(adu);
+}
+
+static uint32_t lpc_cmd_addr(PnvADU *adu)
+{
+    return (adu->lpc_cmd_reg & PPC_BITMASK(32, 63)) >> PPC_BIT_NR(63);
+}
+
+static uint32_t lpc_cmd_size(PnvADU *adu)
+{
+    return (adu->lpc_cmd_reg & PPC_BITMASK(5, 11)) >> PPC_BIT_NR(11);
+}
+
+static void pnv_adu_xscom_write(void *opaque, hwaddr addr, uint64_t val,
+                                unsigned width)
+{
+    PnvADU *adu = PNV_ADU(opaque);
+    uint32_t offset = addr >> 3;
+
+    trace_pnv_adu_xscom_write(addr, val);
+
+    switch (offset) {
+    case 0x18:     /* Receive status reg */
+    case 0x12:     /* log register */
+    case 0x13:     /* error register */
+        break;
+
+    case ADU_LPC_BASE_REG:
+        qemu_log_mask(LOG_UNIMP,
+                      "ADU: Changing LPC_BASE_REG is not implemented\n");
+        break;
+
+    case ADU_LPC_CMD_REG:
+        adu->lpc_cmd_reg = val;
+        if (lpc_cmd_read(adu)) {
+            uint32_t lpc_addr = lpc_cmd_addr(adu);
+            uint32_t lpc_size = lpc_cmd_size(adu);
+            uint64_t data = 0;
+
+            pnv_lpc_opb_read(adu->lpc, lpc_addr, (void *)&data, lpc_size);
+
+            /*
+             * ADU access is performed within 8-byte aligned sectors. Smaller
+             * access sizes don't get formatted to the least significant byte,
+             * but rather appear in the data reg at the same offset as the
+             * address in memory. This shifts them into that position.
+             */
+            adu->lpc_data_reg = be64_to_cpu(data) >> ((lpc_addr & 7) * 8);
+        }
+        break;
+
+    case ADU_LPC_DATA_REG:
+        adu->lpc_data_reg = val;
+        if (lpc_cmd_write(adu)) {
+            uint32_t lpc_addr = lpc_cmd_addr(adu);
+            uint32_t lpc_size = lpc_cmd_size(adu);
+            uint64_t data;
+
+            data = cpu_to_be64(val) >> ((lpc_addr & 7) * 8); /* See above */
+            pnv_lpc_opb_write(adu->lpc, lpc_addr, (void *)&data, lpc_size);
+        }
+        break;
+
+    case ADU_LPC_STATUS_REG:
+        qemu_log_mask(LOG_UNIMP,
+                      "ADU: Changing LPC_STATUS_REG is not implemented\n");
+        break;
+
+    default:
+        qemu_log_mask(LOG_UNIMP, "ADU Unimplemented write register: Ox%08x\n",
+                                                                     offset);
+    }
+}
+
+const MemoryRegionOps pnv_adu_xscom_ops = {
+    .read = pnv_adu_xscom_read,
+    .write = pnv_adu_xscom_write,
+    .valid.min_access_size = 8,
+    .valid.max_access_size = 8,
+    .impl.min_access_size = 8,
+    .impl.max_access_size = 8,
+    .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static void pnv_adu_realize(DeviceState *dev, Error **errp)
+{
+    PnvADU *adu = PNV_ADU(dev);
+
+    assert(adu->lpc);
+
+    /* XScom regions for ADU registers */
+    pnv_xscom_region_init(&adu->xscom_regs, OBJECT(dev),
+                          &pnv_adu_xscom_ops, adu, "xscom-adu",
+                          PNV9_XSCOM_ADU_SIZE);
+}
+
+static Property pnv_adu_properties[] = {
+    DEFINE_PROP_LINK("lpc", PnvADU, lpc, TYPE_PNV_LPC, PnvLpcController *),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void pnv_adu_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->realize = pnv_adu_realize;
+    dc->desc = "PowerNV ADU";
+    device_class_set_props(dc, pnv_adu_properties);
+    dc->user_creatable = false;
+}
+
+static const TypeInfo pnv_adu_type_info = {
+    .name          = TYPE_PNV_ADU,
+    .parent        = TYPE_DEVICE,
+    .instance_size = sizeof(PnvADU),
+    .class_init    = pnv_adu_class_init,
+    .interfaces    = (InterfaceInfo[]) {
+        { TYPE_PNV_XSCOM_INTERFACE },
+        { } },
+};
+
+static void pnv_adu_register_types(void)
+{
+    type_register_static(&pnv_adu_type_info);
+}
+
+type_init(pnv_adu_register_types);

diff --git a/hw/ppc/pnv_chiptod.c b/hw/ppc/pnv_chiptod.c
index 3831a72..1e41fe5 100644
--- a/hw/ppc/pnv_chiptod.c
+++ b/hw/ppc/pnv_chiptod.c

@@ -364,8 +364,7 @@
             qemu_log_mask(LOG_GUEST_ERROR, "pnv_chiptod: xscom write reg"
                           " TOD_MOVE_TOD_TO_TB_REG with no slave target\n");
         } else {
-            PowerPCCPU *cpu = chiptod->slave_pc_target->threads[0];
-            CPUPPCState *env = &cpu->env;
+            PnvCore *pc = chiptod->slave_pc_target;
 
             /*
              * Moving TOD to TB will set the TB of all threads in a
@@ -377,8 +376,8 @@
              * thread 0.
              */
 
-            if (env->pnv_tod_tbst.tb_ready_for_tod) {
-                env->pnv_tod_tbst.tod_sent_to_tb = 1;
+            if (pc->tod_state.tb_ready_for_tod) {
+                pc->tod_state.tod_sent_to_tb = 1;
             } else {
                 qemu_log_mask(LOG_GUEST_ERROR, "pnv_chiptod: xscom write reg"
                               " TOD_MOVE_TOD_TO_TB_REG with TB not ready to"

diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
index f40ab72..a306939 100644
--- a/hw/ppc/pnv_core.c
+++ b/hw/ppc/pnv_core.c

@@ -58,6 +58,10 @@
     env->nip = 0x10;
     env->msr |= MSR_HVB; /* Hypervisor mode */
     env->spr[SPR_HRMOR] = pc->hrmor;
+    if (pc->big_core) {
+        /* Clear "small core" bit on Power9/10 (this is set in default PVR) */
+        env->spr[SPR_PVR] &= ~PPC_BIT(51);
+    }
     hreg_compute_hflags(env);
     ppc_maybe_interrupt(env);
 
@@ -181,16 +185,43 @@
  */
 
 #define PNV10_XSCOM_EC_CORE_THREAD_STATE    0x412
+#define PNV10_XSCOM_EC_CORE_THREAD_INFO     0x413
+#define PNV10_XSCOM_EC_CORE_DIRECT_CONTROLS 0x449
+#define PNV10_XSCOM_EC_CORE_RAS_STATUS      0x454
 
 static uint64_t pnv_core_power10_xscom_read(void *opaque, hwaddr addr,
                                            unsigned int width)
 {
+    PnvCore *pc = PNV_CORE(opaque);
+    int nr_threads = CPU_CORE(pc)->nr_threads;
+    int i;
     uint32_t offset = addr >> 3;
     uint64_t val = 0;
 
     switch (offset) {
     case PNV10_XSCOM_EC_CORE_THREAD_STATE:
-        val = 0;
+        for (i = 0; i < nr_threads; i++) {
+            PowerPCCPU *cpu = pc->threads[i];
+            CPUState *cs = CPU(cpu);
+
+            if (cs->halted) {
+                val |= PPC_BIT(56 + i);
+            }
+        }
+        if (pc->lpar_per_core) {
+            val |= PPC_BIT(62);
+        }
+        break;
+    case PNV10_XSCOM_EC_CORE_THREAD_INFO:
+        break;
+    case PNV10_XSCOM_EC_CORE_RAS_STATUS:
+        for (i = 0; i < nr_threads; i++) {
+            PowerPCCPU *cpu = pc->threads[i];
+            CPUState *cs = CPU(cpu);
+            if (cs->stopped) {
+                val |= PPC_BIT(0 + 8 * i) | PPC_BIT(1 + 8 * i);
+            }
+        }
         break;
     default:
         qemu_log_mask(LOG_UNIMP, "%s: unimp read 0x%08x\n", __func__,
@@ -203,9 +234,46 @@
 static void pnv_core_power10_xscom_write(void *opaque, hwaddr addr,
                                          uint64_t val, unsigned int width)
 {
+    PnvCore *pc = PNV_CORE(opaque);
+    int nr_threads = CPU_CORE(pc)->nr_threads;
+    int i;
     uint32_t offset = addr >> 3;
 
     switch (offset) {
+    case PNV10_XSCOM_EC_CORE_DIRECT_CONTROLS:
+        for (i = 0; i < nr_threads; i++) {
+            PowerPCCPU *cpu = pc->threads[i];
+            CPUState *cs = CPU(cpu);
+
+            if (val & PPC_BIT(7 + 8 * i)) { /* stop */
+                val &= ~PPC_BIT(7 + 8 * i);
+                cpu_pause(cs);
+            }
+            if (val & PPC_BIT(6 + 8 * i)) { /* start */
+                val &= ~PPC_BIT(6 + 8 * i);
+                cpu_resume(cs);
+            }
+            if (val & PPC_BIT(4 + 8 * i)) { /* sreset */
+                val &= ~PPC_BIT(4 + 8 * i);
+                pnv_cpu_do_nmi_resume(cs);
+            }
+            if (val & PPC_BIT(3 + 8 * i)) { /* clear maint */
+                /*
+                 * Hardware has very particular cases for where clear maint
+                 * must be used and where start must be used to resume a
+                 * thread. These are not modelled exactly, just treat
+                 * this and start the same.
+                 */
+                val &= ~PPC_BIT(3 + 8 * i);
+                cpu_resume(cs);
+            }
+        }
+        if (val) {
+            qemu_log_mask(LOG_UNIMP, "%s: unimp bits in DIRECT_CONTROLS "
+                                     "0x%016" PRIx64 "\n", __func__, val);
+        }
+        break;
+
     default:
         qemu_log_mask(LOG_UNIMP, "%s: unimp write 0x%08x\n", __func__,
                       offset);
@@ -227,8 +295,9 @@
 {
     CPUPPCState *env = &cpu->env;
     int core_hwid;
-    ppc_spr_t *pir = &env->spr_cb[SPR_PIR];
-    ppc_spr_t *tir = &env->spr_cb[SPR_TIR];
+    ppc_spr_t *pir_spr = &env->spr_cb[SPR_PIR];
+    ppc_spr_t *tir_spr = &env->spr_cb[SPR_TIR];
+    uint32_t pir, tir;
     Error *local_err = NULL;
     PnvChipClass *pcc = PNV_CHIP_GET_CLASS(pc->chip);
 
@@ -244,8 +313,20 @@
 
     core_hwid = object_property_get_uint(OBJECT(pc), "hwid", &error_abort);
 
-    tir->default_value = thread_index;
-    pir->default_value = pcc->chip_pir(pc->chip, core_hwid, thread_index);
+    pcc->get_pir_tir(pc->chip, core_hwid, thread_index, &pir, &tir);
+    pir_spr->default_value = pir;
+    tir_spr->default_value = tir;
+
+    if (pc->big_core) {
+        /* 2 "small cores" get the same core index for SMT operations */
+        env->core_index = core_hwid >> 1;
+    } else {
+        env->core_index = core_hwid;
+    }
+
+    if (pc->lpar_per_core) {
+        cpu_ppc_set_1lpar(cpu);
+    }
 
     /* Set time-base frequency to 512 MHz */
     cpu_ppc_tb_init(env, PNV_TIMEBASE_FREQ);
@@ -278,16 +359,22 @@
     pc->threads = g_new(PowerPCCPU *, cc->nr_threads);
     for (i = 0; i < cc->nr_threads; i++) {
         PowerPCCPU *cpu;
+        PnvCPUState *pnv_cpu;
 
         obj = object_new(typename);
         cpu = POWERPC_CPU(obj);
 
         pc->threads[i] = POWERPC_CPU(obj);
+        if (cc->nr_threads > 1) {
+            cpu->env.has_smt_siblings = true;
+        }
 
         snprintf(name, sizeof(name), "thread[%d]", i);
         object_property_add_child(OBJECT(pc), name, obj);
 
         cpu->machine_data = g_new0(PnvCPUState, 1);
+        pnv_cpu = pnv_cpu_state(cpu);
+        pnv_cpu->pnv_core = pc;
 
         object_unref(obj);
     }
@@ -344,6 +431,10 @@
 static Property pnv_core_properties[] = {
     DEFINE_PROP_UINT32("hwid", PnvCore, hwid, 0),
     DEFINE_PROP_UINT64("hrmor", PnvCore, hrmor, 0),
+    DEFINE_PROP_BOOL("big-core", PnvCore, big_core, false),
+    DEFINE_PROP_BOOL("quirk-tb-big-core", PnvCore, tod_state.big_core_quirk,
+                     false),
+    DEFINE_PROP_BOOL("lpar-per-core", PnvCore, lpar_per_core, false),
     DEFINE_PROP_LINK("chip", PnvCore, chip, TYPE_PNV_CHIP, PnvChip *),
     DEFINE_PROP_END_OF_LIST(),
 };
@@ -504,6 +595,7 @@
 static uint64_t pnv_qme_power10_xscom_read(void *opaque, hwaddr addr,
                                             unsigned int width)
 {
+    PnvQuad *eq = PNV_QUAD(opaque);
     uint32_t offset = addr >> 3;
     uint64_t val = -1;
 
@@ -511,10 +603,14 @@
      * Forth nibble selects the core within a quad, mask it to process read
      * for any core.
      */
-    switch (offset & ~0xf000) {
-    case P10_QME_SPWU_HYP:
+    switch (offset & ~PPC_BITMASK32(16, 19)) {
     case P10_QME_SSH_HYP:
-        return 0;
+        val = 0;
+        if (eq->special_wakeup_done) {
+            val |= PPC_BIT(1); /* SPWU DONE */
+            val |= PPC_BIT(4); /* SSH SPWU DONE */
+        }
+        break;
     default:
         qemu_log_mask(LOG_UNIMP, "%s: unimp read 0x%08x\n", __func__,
                       offset);
@@ -526,9 +622,22 @@
 static void pnv_qme_power10_xscom_write(void *opaque, hwaddr addr,
                                          uint64_t val, unsigned int width)
 {
+    PnvQuad *eq = PNV_QUAD(opaque);
     uint32_t offset = addr >> 3;
+    bool set;
+    int i;
 
-    switch (offset) {
+    switch (offset & ~PPC_BITMASK32(16, 19)) {
+    case P10_QME_SPWU_HYP:
+        set = !!(val & PPC_BIT(0));
+        eq->special_wakeup_done = set;
+        for (i = 0; i < 4; i++) {
+            /* These bits select cores in the quad */
+            if (offset & PPC_BIT32(16 + i)) {
+                eq->special_wakeup[i] = set;
+            }
+        }
+        break;
     default:
         qemu_log_mask(LOG_UNIMP, "%s: unimp write 0x%08x\n", __func__,
                       offset);

diff --git a/hw/ppc/pnv_lpc.c b/hw/ppc/pnv_lpc.c
index d692858..f8aad95 100644
--- a/hw/ppc/pnv_lpc.c
+++ b/hw/ppc/pnv_lpc.c

@@ -64,6 +64,7 @@
 #define   LPC_HC_IRQSER_START_4CLK      0x00000000
 #define   LPC_HC_IRQSER_START_6CLK      0x01000000
 #define   LPC_HC_IRQSER_START_8CLK      0x02000000
+#define   LPC_HC_IRQSER_AUTO_CLEAR      0x00800000
 #define LPC_HC_IRQMASK          0x34    /* same bit defs as LPC_HC_IRQSTAT */
 #define LPC_HC_IRQSTAT          0x38
 #define   LPC_HC_IRQ_SERIRQ0            0x80000000 /* all bits down to ... */
@@ -235,16 +236,16 @@
  * TODO: rework to use address_space_stq() and address_space_ldq()
  * instead.
  */
-static bool opb_read(PnvLpcController *lpc, uint32_t addr, uint8_t *data,
-                     int sz)
+bool pnv_lpc_opb_read(PnvLpcController *lpc, uint32_t addr,
+                      uint8_t *data, int sz)
 {
     /* XXX Handle access size limits and FW read caching here */
     return !address_space_read(&lpc->opb_as, addr, MEMTXATTRS_UNSPECIFIED,
                                data, sz);
 }
 
-static bool opb_write(PnvLpcController *lpc, uint32_t addr, uint8_t *data,
-                      int sz)
+bool pnv_lpc_opb_write(PnvLpcController *lpc, uint32_t addr,
+                       uint8_t *data, int sz)
 {
     /* XXX Handle access size limits here */
     return !address_space_write(&lpc->opb_as, addr, MEMTXATTRS_UNSPECIFIED,
@@ -276,7 +277,7 @@
     }
 
     if (cmd & ECCB_CTL_READ) {
-        success = opb_read(lpc, opb_addr, data, sz);
+        success = pnv_lpc_opb_read(lpc, opb_addr, data, sz);
         if (success) {
             lpc->eccb_stat_reg = ECCB_STAT_OP_DONE |
                     (((uint64_t)data[0]) << 24 |
@@ -293,7 +294,7 @@
         data[2] = lpc->eccb_data_reg >>  8;
         data[3] = lpc->eccb_data_reg;
 
-        success = opb_write(lpc, opb_addr, data, sz);
+        success = pnv_lpc_opb_write(lpc, opb_addr, data, sz);
         lpc->eccb_stat_reg = ECCB_STAT_OP_DONE;
     }
     /* XXX Which error bit (if any) to signal OPB error ? */
@@ -420,32 +421,90 @@
     .endianness = DEVICE_BIG_ENDIAN,
 };
 
+/* Program the POWER9 LPC irq to PSI serirq routing table */
+static void pnv_lpc_eval_serirq_routes(PnvLpcController *lpc)
+{
+    int irq;
+
+    if (!lpc->psi_has_serirq) {
+        if ((lpc->opb_irq_route0 & PPC_BITMASK(8, 13)) ||
+            (lpc->opb_irq_route1 & PPC_BITMASK(4, 31))) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                "OPB: setting serirq routing on POWER8 system, ignoring.\n");
+        }
+        return;
+    }
+
+    for (irq = 0; irq <= 13; irq++) {
+        int serirq = (lpc->opb_irq_route1 >> (31 - 5 - (irq * 2))) & 0x3;
+        lpc->irq_to_serirq_route[irq] = serirq;
+    }
+
+    for (irq = 14; irq < ISA_NUM_IRQS; irq++) {
+        int serirq = (lpc->opb_irq_route0 >> (31 - 9 - (irq * 2))) & 0x3;
+        lpc->irq_to_serirq_route[irq] = serirq;
+    }
+}
+
 static void pnv_lpc_eval_irqs(PnvLpcController *lpc)
 {
-    bool lpc_to_opb_irq = false;
+    uint32_t active_irqs = 0;
 
-    /* Update LPC controller to OPB line */
+    if (lpc->lpc_hc_irqstat & PPC_BITMASK32(16, 31)) {
+        qemu_log_mask(LOG_UNIMP, "LPC HC Unimplemented irqs in IRQSTAT: "
+                                 "0x%08"PRIx32"\n", lpc->lpc_hc_irqstat);
+    }
+
     if (lpc->lpc_hc_irqser_ctrl & LPC_HC_IRQSER_EN) {
-        uint32_t irqs;
-
-        irqs = lpc->lpc_hc_irqstat & lpc->lpc_hc_irqmask;
-        lpc_to_opb_irq = (irqs != 0);
+        active_irqs = lpc->lpc_hc_irqstat & lpc->lpc_hc_irqmask;
     }
 
-    /* We don't honor the polarity register, it's pointless and unused
-     * anyway
-     */
-    if (lpc_to_opb_irq) {
-        lpc->opb_irq_input |= OPB_MASTER_IRQ_LPC;
-    } else {
-        lpc->opb_irq_input &= ~OPB_MASTER_IRQ_LPC;
-    }
-
-    /* Update OPB internal latch */
-    lpc->opb_irq_stat |= lpc->opb_irq_input & lpc->opb_irq_mask;
-
     /* Reflect the interrupt */
-    qemu_set_irq(lpc->psi_irq, lpc->opb_irq_stat != 0);
+    if (!lpc->psi_has_serirq) {
+        /*
+         * POWER8 ORs all irqs together (also with LPCHC internal interrupt
+         * sources) and outputs a single line that raises the PSI LPCHC irq
+         * which then latches an OPB IRQ status register that sends the irq
+         * to PSI.
+         *
+         * We don't honor the polarity register, it's pointless and unused
+         * anyway
+         */
+        if (active_irqs) {
+            lpc->opb_irq_input |= OPB_MASTER_IRQ_LPC;
+        } else {
+            lpc->opb_irq_input &= ~OPB_MASTER_IRQ_LPC;
+        }
+
+        /* Update OPB internal latch */
+        lpc->opb_irq_stat |= lpc->opb_irq_input & lpc->opb_irq_mask;
+
+        qemu_set_irq(lpc->psi_irq_lpchc, lpc->opb_irq_stat != 0);
+    } else {
+        /*
+         * POWER9 and POWER10 have routing fields in OPB master registers that
+         * send LPC irqs to 4 output lines that raise the PSI SERIRQ irqs.
+         * These don't appear to get latched into an OPB register like the
+         * LPCHC irqs.
+         *
+         * POWER9 LPC controller internal irqs still go via the OPB
+         * and LPCHC PSI irqs like P8, but we have no such internal sources
+         * modelled yet.
+         */
+        bool serirq_out[4] = { false, false, false, false };
+        int irq;
+
+        for (irq = 0; irq < ISA_NUM_IRQS; irq++) {
+            if (active_irqs & (LPC_HC_IRQ_SERIRQ0 >> irq)) {
+                serirq_out[lpc->irq_to_serirq_route[irq]] = true;
+            }
+        }
+
+        qemu_set_irq(lpc->psi_irq_serirq[0], serirq_out[0]);
+        qemu_set_irq(lpc->psi_irq_serirq[1], serirq_out[1]);
+        qemu_set_irq(lpc->psi_irq_serirq[2], serirq_out[2]);
+        qemu_set_irq(lpc->psi_irq_serirq[3], serirq_out[3]);
+    }
 }
 
 static uint64_t lpc_hc_read(void *opaque, hwaddr addr, unsigned size)
@@ -505,7 +564,14 @@
         pnv_lpc_eval_irqs(lpc);
         break;
     case LPC_HC_IRQSTAT:
-        lpc->lpc_hc_irqstat &= ~val;
+        /*
+         * This register is write-to-clear for the IRQSER (LPC device IRQ)
+         * status. However if the device has not de-asserted its interrupt
+         * that will just raise this IRQ status bit again. Model this by
+         * keeping track of the inputs and only clearing if the inputs are
+         * deasserted.
+         */
+        lpc->lpc_hc_irqstat &= ~(val & ~lpc->lpc_hc_irq_inputs);
         pnv_lpc_eval_irqs(lpc);
         break;
     case LPC_HC_ERROR_ADDRESS:
@@ -536,10 +602,10 @@
     uint64_t val = 0xfffffffffffffffful;
 
     switch (addr) {
-    case OPB_MASTER_LS_ROUTE0: /* TODO */
+    case OPB_MASTER_LS_ROUTE0:
         val = lpc->opb_irq_route0;
         break;
-    case OPB_MASTER_LS_ROUTE1: /* TODO */
+    case OPB_MASTER_LS_ROUTE1:
         val = lpc->opb_irq_route1;
         break;
     case OPB_MASTER_LS_IRQ_STAT:
@@ -568,11 +634,15 @@
     PnvLpcController *lpc = opaque;
 
     switch (addr) {
-    case OPB_MASTER_LS_ROUTE0: /* TODO */
+    case OPB_MASTER_LS_ROUTE0:
         lpc->opb_irq_route0 = val;
+        pnv_lpc_eval_serirq_routes(lpc);
+        pnv_lpc_eval_irqs(lpc);
         break;
-    case OPB_MASTER_LS_ROUTE1: /* TODO */
+    case OPB_MASTER_LS_ROUTE1:
         lpc->opb_irq_route1 = val;
+        pnv_lpc_eval_serirq_routes(lpc);
+        pnv_lpc_eval_irqs(lpc);
         break;
     case OPB_MASTER_LS_IRQ_STAT:
         lpc->opb_irq_stat &= ~val;
@@ -657,6 +727,8 @@
     PnvLpcClass *plc = PNV_LPC_GET_CLASS(dev);
     Error *local_err = NULL;
 
+    object_property_set_bool(OBJECT(lpc), "psi-serirq", true, &error_abort);
+
     plc->parent_realize(dev, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
@@ -666,6 +738,9 @@
     /* P9 uses a MMIO region */
     memory_region_init_io(&lpc->xscom_regs, OBJECT(lpc), &pnv_lpc_mmio_ops,
                           lpc, "lpcm", PNV9_LPCM_SIZE);
+
+    /* P9 LPC routes ISA irqs to 4 PSI SERIRQ lines */
+    qdev_init_gpio_out_named(dev, lpc->psi_irq_serirq, "SERIRQ", 4);
 }
 
 static void pnv_lpc_power9_class_init(ObjectClass *klass, void *data)
@@ -744,13 +819,19 @@
     memory_region_add_subregion(&lpc->opb_mr, LPC_HC_REGS_OPB_ADDR,
                                 &lpc->lpc_hc_regs);
 
-    qdev_init_gpio_out(dev, &lpc->psi_irq, 1);
+    qdev_init_gpio_out_named(dev, &lpc->psi_irq_lpchc, "LPCHC", 1);
 }
 
+static Property pnv_lpc_properties[] = {
+    DEFINE_PROP_BOOL("psi-serirq", PnvLpcController, psi_has_serirq, false),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
 static void pnv_lpc_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
 
+    device_class_set_props(dc, pnv_lpc_properties);
     dc->realize = pnv_lpc_realize;
     dc->desc = "PowerNV LPC Controller";
     dc->user_creatable = false;
@@ -796,18 +877,34 @@
     }
 
     if (pnv->cpld_irqstate != old_state) {
-        qemu_set_irq(lpc->psi_irq, pnv->cpld_irqstate != 0);
+        qemu_set_irq(lpc->psi_irq_lpchc, pnv->cpld_irqstate != 0);
     }
 }
 
 static void pnv_lpc_isa_irq_handler(void *opaque, int n, int level)
 {
     PnvLpcController *lpc = PNV_LPC(opaque);
+    uint32_t irq_bit = LPC_HC_IRQ_SERIRQ0 >> n;
 
-    /* The Naples HW latches the 1 levels, clearing is done by SW */
     if (level) {
-        lpc->lpc_hc_irqstat |= LPC_HC_IRQ_SERIRQ0 >> n;
+        lpc->lpc_hc_irq_inputs |= irq_bit;
+
+        /*
+         * The LPC HC in Naples and later latches LPC IRQ into a bit field in
+         * the IRQSTAT register, and that drives the PSI IRQ to the IC.
+         * Software clears this bit manually (see LPC_HC_IRQSTAT handler).
+         */
+        lpc->lpc_hc_irqstat |= irq_bit;
         pnv_lpc_eval_irqs(lpc);
+    } else {
+        lpc->lpc_hc_irq_inputs &= ~irq_bit;
+
+        /* POWER9 adds an auto-clear mode that clears IRQSTAT bits on EOI */
+        if (lpc->psi_has_serirq &&
+            (lpc->lpc_hc_irqser_ctrl & LPC_HC_IRQSER_AUTO_CLEAR)) {
+            lpc->lpc_hc_irqstat &= ~irq_bit;
+            pnv_lpc_eval_irqs(lpc);
+        }
     }
 }
 
@@ -838,6 +935,7 @@
         handler = pnv_lpc_isa_irq_handler;
     }
 
+    /* POWER has a 17th irq, QEMU only implements the 16 regular device irqs */
     irqs = qemu_allocate_irqs(handler, lpc, ISA_NUM_IRQS);
 
     isa_bus_register_input_irqs(isa_bus, irqs);

diff --git a/hw/ppc/pnv_xscom.c b/hw/ppc/pnv_xscom.c
index a17816d..d192bbe 100644
--- a/hw/ppc/pnv_xscom.c
+++ b/hw/ppc/pnv_xscom.c

@@ -75,11 +75,6 @@
     case PRD_P9_IPOLL_REG_MASK:
     case PRD_P9_IPOLL_REG_STATUS:
 
-        /* P9 xscom reset */
-    case 0x0090018:     /* Receive status reg */
-    case 0x0090012:     /* log register */
-    case 0x0090013:     /* error register */
-
         /* P8 xscom reset */
     case 0x2020007:     /* ADU stuff, log register */
     case 0x2020009:     /* ADU stuff, error register */
@@ -119,10 +114,6 @@
     case 0x1010c03:     /* PIBAM FIR MASK */
     case 0x1010c04:     /* PIBAM FIR MASK */
     case 0x1010c05:     /* PIBAM FIR MASK */
-        /* P9 xscom reset */
-    case 0x0090018:     /* Receive status reg */
-    case 0x0090012:     /* log register */
-    case 0x0090013:     /* error register */
 
         /* P8 xscom reset */
     case 0x2020007:     /* ADU stuff, log register */

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 98fa3aa..370d7c3 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c

@@ -2195,6 +2195,7 @@
         &vmstate_spapr_cap_fwnmi,
         &vmstate_spapr_fwnmi,
         &vmstate_spapr_cap_rpt_invalidate,
+        &vmstate_spapr_cap_ail_mode_3,
         &vmstate_spapr_cap_nested_papr,
         NULL
     }

diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index 0a15415..2f74923 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c

@@ -974,6 +974,7 @@
 SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
 SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI);
 SPAPR_CAP_MIG_STATE(rpt_invalidate, SPAPR_CAP_RPT_INVALIDATE);
+SPAPR_CAP_MIG_STATE(ail_mode_3, SPAPR_CAP_AIL_MODE_3);
 
 void spapr_caps_init(SpaprMachineState *spapr)
 {

diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index e7c9edd..56090ab 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c

@@ -300,11 +300,13 @@
     g_autofree char *id = NULL;
     CPUState *cs;
     PowerPCCPU *cpu;
+    CPUPPCState *env;
 
     obj = object_new(scc->cpu_type);
 
     cs = CPU(obj);
     cpu = POWERPC_CPU(obj);
+    env = &cpu->env;
     /*
      * All CPUs start halted. CPU0 is unhalted from the machine level reset code
      * and the rest are explicitly started up by the guest using an RTAS call.
@@ -315,6 +317,8 @@
         return NULL;
     }
 
+    env->core_index = cc->core_id;
+
     cpu->node_id = sc->node_id;
 
     id = g_strdup_printf("thread[%d]", i);
@@ -345,9 +349,15 @@
     qemu_register_reset(spapr_cpu_core_reset_handler, sc);
     sc->threads = g_new0(PowerPCCPU *, cc->nr_threads);
     for (i = 0; i < cc->nr_threads; i++) {
-        sc->threads[i] = spapr_create_vcpu(sc, i, errp);
-        if (!sc->threads[i] ||
-            !spapr_realize_vcpu(sc->threads[i], spapr, sc, i, errp)) {
+        PowerPCCPU *cpu;
+
+        cpu = spapr_create_vcpu(sc, i, errp);
+        sc->threads[i] = cpu;
+        if (cpu && cc->nr_threads > 1) {
+            cpu->env.has_smt_siblings = true;
+        }
+
+        if (!cpu || !spapr_realize_vcpu(cpu, spapr, sc, i, errp)) {
             spapr_cpu_core_unrealize(dev);
             return;
         }

diff --git a/hw/ppc/spapr_vhyp_mmu.c b/hw/ppc/spapr_vhyp_mmu.c
index b3dd8b3..2d41d7f 100644
--- a/hw/ppc/spapr_vhyp_mmu.c
+++ b/hw/ppc/spapr_vhyp_mmu.c

@@ -15,19 +15,6 @@
 #include "helper_regs.h"
 #include "hw/ppc/spapr.h"
 #include "mmu-hash64.h"
-#include "mmu-book3s-v3.h"
-
-
-static inline bool valid_ptex(PowerPCCPU *cpu, target_ulong ptex)
-{
-    /*
-     * hash value/pteg group index is normalized by HPT mask
-     */
-    if (((ptex & ~7ULL) / HPTES_PER_GROUP) & ~ppc_hash64_hpt_mask(cpu)) {
-        return false;
-    }
-    return true;
-}
 
 static target_ulong h_enter(PowerPCCPU *cpu, SpaprMachineState *spapr,
                             target_ulong opcode, target_ulong *args)
@@ -70,7 +57,7 @@
 
     pteh &= ~0x60ULL;
 
-    if (!valid_ptex(cpu, ptex)) {
+    if (!ppc_hash64_valid_ptex(cpu, ptex)) {
         return H_PARAMETER;
     }
 
@@ -119,7 +106,7 @@
     const ppc_hash_pte64_t *hptes;
     target_ulong v, r;
 
-    if (!valid_ptex(cpu, ptex)) {
+    if (!ppc_hash64_valid_ptex(cpu, ptex)) {
         return REMOVE_PARM;
     }
 
@@ -250,7 +237,7 @@
     const ppc_hash_pte64_t *hptes;
     target_ulong v, r;
 
-    if (!valid_ptex(cpu, ptex)) {
+    if (!ppc_hash64_valid_ptex(cpu, ptex)) {
         return H_PARAMETER;
     }
 
@@ -287,7 +274,7 @@
     int i, ridx, n_entries = 1;
     const ppc_hash_pte64_t *hptes;
 
-    if (!valid_ptex(cpu, ptex)) {
+    if (!ppc_hash64_valid_ptex(cpu, ptex)) {
         return H_PARAMETER;
     }
 

diff --git a/hw/ppc/spapr_vof.c b/hw/ppc/spapr_vof.c
index 09f29be..c02eaac 100644
--- a/hw/ppc/spapr_vof.c
+++ b/hw/ppc/spapr_vof.c

@@ -28,7 +28,7 @@
 
 void spapr_vof_client_dt_finalize(SpaprMachineState *spapr, void *fdt)
 {
-    char *stdout_path = spapr_vio_stdout_path(spapr->vio_bus);
+    g_autofree char *stdout_path = spapr_vio_stdout_path(spapr->vio_bus);
 
     vof_build_dt(fdt, spapr->vof);
 

diff --git a/hw/ppc/trace-events b/hw/ppc/trace-events
index bf29bbf..1f125ce 100644
--- a/hw/ppc/trace-events
+++ b/hw/ppc/trace-events

@@ -95,6 +95,10 @@
 vof_avail(uint64_t start, uint64_t end, uint64_t size) "0x%"PRIx64"..0x%"PRIx64" size=0x%"PRIx64
 vof_claimed(uint64_t start, uint64_t end, uint64_t size) "0x%"PRIx64"..0x%"PRIx64" size=0x%"PRIx64
 
+# pnv_adu.c
+pnv_adu_xscom_read(uint64_t addr, uint64_t val) "addr 0x%" PRIx64 " val 0x%" PRIx64
+pnv_adu_xscom_write(uint64_t addr, uint64_t val) "addr 0x%" PRIx64 " val 0x%" PRIx64
+
 # pnv_chiptod.c
 pnv_chiptod_xscom_read(uint64_t addr, uint64_t val) "addr 0x%" PRIx64 " val 0x%" PRIx64
 pnv_chiptod_xscom_write(uint64_t addr, uint64_t val) "addr 0x%" PRIx64 " val 0x%" PRIx64

diff --git a/hw/ppc/vof.c b/hw/ppc/vof.c
index e3b430a..b5b6514 100644
--- a/hw/ppc/vof.c
+++ b/hw/ppc/vof.c

@@ -646,7 +646,7 @@
     mem0_reg = fdt_getprop(fdt, offset, "reg", &proplen);
     g_assert(mem0_reg && proplen == sizeof(uint32_t) * (ac + sc));
     if (sc == 2) {
-        mem0_end = be64_to_cpu(*(uint64_t *)(mem0_reg + sizeof(uint32_t) * ac));
+        mem0_end = ldq_be_p(mem0_reg + sizeof(uint32_t) * ac);
     } else {
         mem0_end = be32_to_cpu(*(uint32_t *)(mem0_reg + sizeof(uint32_t) * ac));
     }

diff --git a/hw/ssi/Kconfig b/hw/ssi/Kconfig
index 83ee53c..8d180de 100644
--- a/hw/ssi/Kconfig
+++ b/hw/ssi/Kconfig

@@ -24,3 +24,7 @@
 config BCM2835_SPI
     bool
     select SSI
+
+config PNV_SPI
+    bool
+    select SSI

diff --git a/hw/ssi/meson.build b/hw/ssi/meson.build
index b999aeb..b7ad7fc 100644
--- a/hw/ssi/meson.build
+++ b/hw/ssi/meson.build

@@ -12,3 +12,4 @@
 system_ss.add(when: 'CONFIG_OMAP', if_true: files('omap_spi.c'))
 system_ss.add(when: 'CONFIG_IBEX', if_true: files('ibex_spi_host.c'))
 system_ss.add(when: 'CONFIG_BCM2835_SPI', if_true: files('bcm2835_spi.c'))
+system_ss.add(when: 'CONFIG_PNV_SPI', if_true: files('pnv_spi.c'))

diff --git a/hw/ssi/pnv_spi.c b/hw/ssi/pnv_spi.c
new file mode 100644
index 0000000..c1297ab
--- /dev/null
+++ b/hw/ssi/pnv_spi.c

@@ -0,0 +1,1268 @@
+/*
+ * QEMU PowerPC SPI model
+ *
+ * Copyright (c) 2024, IBM Corporation.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "hw/qdev-properties.h"
+#include "hw/ppc/pnv_xscom.h"
+#include "hw/ssi/pnv_spi.h"
+#include "hw/ssi/pnv_spi_regs.h"
+#include "hw/ssi/ssi.h"
+#include <libfdt.h>
+#include "hw/irq.h"
+#include "trace.h"
+
+#define PNV_SPI_OPCODE_LO_NIBBLE(x) (x & 0x0F)
+#define PNV_SPI_MASKED_OPCODE(x) (x & 0xF0)
+
+/*
+ * Macro from include/hw/ppc/fdt.h
+ * fdt.h cannot be included here as it contain ppc target specific dependency.
+ */
+#define _FDT(exp)                                                  \
+    do {                                                           \
+        int _ret = (exp);                                          \
+        if (_ret < 0) {                                            \
+            qemu_log_mask(LOG_GUEST_ERROR,                         \
+                    "error creating device tree: %s: %s",          \
+                    #exp, fdt_strerror(_ret));                     \
+            exit(1);                                               \
+        }                                                          \
+    } while (0)
+
+/* PnvXferBuffer */
+typedef struct PnvXferBuffer {
+
+    uint32_t    len;
+    uint8_t    *data;
+
+} PnvXferBuffer;
+
+/* pnv_spi_xfer_buffer_methods */
+static PnvXferBuffer *pnv_spi_xfer_buffer_new(void)
+{
+    PnvXferBuffer *payload = g_malloc0(sizeof(*payload));
+
+    return payload;
+}
+
+static void pnv_spi_xfer_buffer_free(PnvXferBuffer *payload)
+{
+    free(payload->data);
+    free(payload);
+}
+
+static uint8_t *pnv_spi_xfer_buffer_write_ptr(PnvXferBuffer *payload,
+                uint32_t offset, uint32_t length)
+{
+    if (payload->len < (offset + length)) {
+        payload->len = offset + length;
+        payload->data = g_realloc(payload->data, payload->len);
+    }
+    return &payload->data[offset];
+}
+
+static bool does_rdr_match(PnvSpi *s)
+{
+    /*
+     * According to spec, the mask bits that are 0 are compared and the
+     * bits that are 1 are ignored.
+     */
+    uint16_t rdr_match_mask = GETFIELD(SPI_MM_RDR_MATCH_MASK,
+                                        s->regs[SPI_MM_REG]);
+    uint16_t rdr_match_val = GETFIELD(SPI_MM_RDR_MATCH_VAL,
+                                        s->regs[SPI_MM_REG]);
+
+    if ((~rdr_match_mask & rdr_match_val) == ((~rdr_match_mask) &
+            GETFIELD(PPC_BITMASK(48, 63), s->regs[SPI_RCV_DATA_REG]))) {
+        return true;
+    }
+    return false;
+}
+
+static uint8_t get_from_offset(PnvSpi *s, uint8_t offset)
+{
+    uint8_t byte;
+
+    /*
+     * Offset is an index between 0 and PNV_SPI_REG_SIZE - 1
+     * Check the offset before using it.
+     */
+    if (offset < PNV_SPI_REG_SIZE) {
+        byte = (s->regs[SPI_XMIT_DATA_REG] >> (56 - offset * 8)) & 0xFF;
+    } else {
+        /*
+         * Log an error and return a 0xFF since we have to assign something
+         * to byte before returning.
+         */
+        qemu_log_mask(LOG_GUEST_ERROR, "Invalid offset = %d used to get byte "
+                      "from TDR\n", offset);
+        byte = 0xff;
+    }
+    return byte;
+}
+
+static uint8_t read_from_frame(PnvSpi *s, uint8_t *read_buf, uint8_t nr_bytes,
+                uint8_t ecc_count, uint8_t shift_in_count)
+{
+    uint8_t byte;
+    int count = 0;
+
+    while (count < nr_bytes) {
+        shift_in_count++;
+        if ((ecc_count != 0) &&
+            (shift_in_count == (PNV_SPI_REG_SIZE + ecc_count))) {
+            shift_in_count = 0;
+        } else {
+            byte = read_buf[count];
+            trace_pnv_spi_shift_rx(byte, count);
+            s->regs[SPI_RCV_DATA_REG] = (s->regs[SPI_RCV_DATA_REG] << 8) | byte;
+        }
+        count++;
+    } /* end of while */
+    return shift_in_count;
+}
+
+static void spi_response(PnvSpi *s, int bits, PnvXferBuffer *rsp_payload)
+{
+    uint8_t ecc_count;
+    uint8_t shift_in_count;
+
+    /*
+     * Processing here must handle:
+     * - Which bytes in the payload we should move to the RDR
+     * - Explicit mode counter configuration settings
+     * - RDR full and RDR overrun status
+     */
+
+    /*
+     * First check that the response payload is the exact same
+     * number of bytes as the request payload was
+     */
+    if (rsp_payload->len != (s->N1_bytes + s->N2_bytes)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "Invalid response payload size in "
+                       "bytes, expected %d, got %d\n",
+                       (s->N1_bytes + s->N2_bytes), rsp_payload->len);
+    } else {
+        uint8_t ecc_control;
+        trace_pnv_spi_rx_received(rsp_payload->len);
+        trace_pnv_spi_log_Ncounts(s->N1_bits, s->N1_bytes, s->N1_tx,
+                        s->N1_rx, s->N2_bits, s->N2_bytes, s->N2_tx, s->N2_rx);
+        /*
+         * Adding an ECC count let's us know when we have found a payload byte
+         * that was shifted in but cannot be loaded into RDR.  Bits 29-30 of
+         * clock_config_reset_control register equal to either 0b00 or 0b10
+         * indicate that we are taking in data with ECC and either applying
+         * the ECC or discarding it.
+         */
+        ecc_count = 0;
+        ecc_control = GETFIELD(SPI_CLK_CFG_ECC_CTRL, s->regs[SPI_CLK_CFG_REG]);
+        if (ecc_control == 0 || ecc_control == 2) {
+            ecc_count = 1;
+        }
+        /*
+         * Use the N1_rx and N2_rx counts to control shifting data from the
+         * payload into the RDR.  Keep an overall count of the number of bytes
+         * shifted into RDR so we can discard every 9th byte when ECC is
+         * enabled.
+         */
+        shift_in_count = 0;
+        /* Handle the N1 portion of the frame first */
+        if (s->N1_rx != 0) {
+            trace_pnv_spi_rx_read_N1frame();
+            shift_in_count = read_from_frame(s, &rsp_payload->data[0],
+                            s->N1_bytes, ecc_count, shift_in_count);
+        }
+        /* Handle the N2 portion of the frame */
+        if (s->N2_rx != 0) {
+            trace_pnv_spi_rx_read_N2frame();
+            shift_in_count = read_from_frame(s,
+                            &rsp_payload->data[s->N1_bytes], s->N2_bytes,
+                            ecc_count, shift_in_count);
+        }
+        if ((s->N1_rx + s->N2_rx) > 0) {
+            /*
+             * Data was received so handle RDR status.
+             * It is easier to handle RDR_full and RDR_overrun status here
+             * since the RDR register's shift_byte_in method is called
+             * multiple times in a row. Controlling RDR status is done here
+             * instead of in the RDR scoped methods for that reason.
+             */
+            if (GETFIELD(SPI_STS_RDR_FULL, s->status) == 1) {
+                /*
+                 * Data was shifted into the RDR before having been read
+                 * causing previous data to have been overrun.
+                 */
+                s->status = SETFIELD(SPI_STS_RDR_OVERRUN, s->status, 1);
+            } else {
+                /*
+                 * Set status to indicate that the received data register is
+                 * full. This flag is only cleared once the RDR is unloaded.
+                 */
+                s->status = SETFIELD(SPI_STS_RDR_FULL, s->status, 1);
+            }
+        }
+    } /* end of else */
+} /* end of spi_response() */
+
+static void transfer(PnvSpi *s, PnvXferBuffer *payload)
+{
+    uint32_t tx;
+    uint32_t rx;
+    PnvXferBuffer *rsp_payload = NULL;
+
+    rsp_payload = pnv_spi_xfer_buffer_new();
+    for (int offset = 0; offset < payload->len; offset += s->transfer_len) {
+        tx = 0;
+        for (int i = 0; i < s->transfer_len; i++) {
+            if ((offset + i) >= payload->len) {
+                tx <<= 8;
+            } else {
+                tx = (tx << 8) | payload->data[offset + i];
+            }
+        }
+        rx = ssi_transfer(s->ssi_bus, tx);
+        for (int i = 0; i < s->transfer_len; i++) {
+            if ((offset + i) >= payload->len) {
+                break;
+            }
+            *(pnv_spi_xfer_buffer_write_ptr(rsp_payload, rsp_payload->len, 1)) =
+                    (rx >> (8 * (s->transfer_len - 1) - i * 8)) & 0xFF;
+        }
+    }
+    if (rsp_payload != NULL) {
+        spi_response(s, s->N1_bits, rsp_payload);
+    }
+}
+
+static inline uint8_t get_seq_index(PnvSpi *s)
+{
+    return GETFIELD(SPI_STS_SEQ_INDEX, s->status);
+}
+
+static inline void next_sequencer_fsm(PnvSpi *s)
+{
+    uint8_t seq_index = get_seq_index(s);
+    s->status = SETFIELD(SPI_STS_SEQ_INDEX, s->status, (seq_index + 1));
+    s->status = SETFIELD(SPI_STS_SEQ_FSM, s->status, SEQ_STATE_INDEX_INCREMENT);
+}
+
+/*
+ * Calculate the N1 counters based on passed in opcode and
+ * internal register values.
+ * The method assumes that the opcode is a Shift_N1 opcode
+ * and doesn't test it.
+ * The counters returned are:
+ * N1 bits: Number of bits in the payload data that are significant
+ * to the responder.
+ * N1_bytes: Total count of payload bytes for the N1 (portion of the) frame.
+ * N1_tx: Total number of bytes taken from TDR for N1
+ * N1_rx: Total number of bytes taken from the payload for N1
+ */
+static void calculate_N1(PnvSpi *s, uint8_t opcode)
+{
+    /*
+     * Shift_N1 opcode form: 0x3M
+     * Implicit mode:
+     * If M != 0 the shift count is M bytes and M is the number of tx bytes.
+     * Forced Implicit mode:
+     * M is the shift count but tx and rx is determined by the count control
+     * register fields.  Note that we only check for forced Implicit mode when
+     * M != 0 since the mode doesn't make sense when M = 0.
+     * Explicit mode:
+     * If M == 0 then shift count is number of bits defined in the
+     * Counter Configuration Register's shift_count_N1 field.
+     */
+    if (PNV_SPI_OPCODE_LO_NIBBLE(opcode) == 0) {
+        /* Explicit mode */
+        s->N1_bits = GETFIELD(SPI_CTR_CFG_N1, s->regs[SPI_CTR_CFG_REG]);
+        s->N1_bytes = (s->N1_bits + 7) / 8;
+        s->N1_tx = 0;
+        s->N1_rx = 0;
+        /* If tx count control for N1 is set, load the tx value */
+        if (GETFIELD(SPI_CTR_CFG_N1_CTRL_B2, s->regs[SPI_CTR_CFG_REG]) == 1) {
+            s->N1_tx = s->N1_bytes;
+        }
+        /* If rx count control for N1 is set, load the rx value */
+        if (GETFIELD(SPI_CTR_CFG_N1_CTRL_B3, s->regs[SPI_CTR_CFG_REG]) == 1) {
+            s->N1_rx = s->N1_bytes;
+        }
+    } else {
+        /* Implicit mode/Forced Implicit mode, use M field from opcode */
+        s->N1_bytes = PNV_SPI_OPCODE_LO_NIBBLE(opcode);
+        s->N1_bits = s->N1_bytes * 8;
+        /*
+         * Assume that we are going to transmit the count
+         * (pure Implicit only)
+         */
+        s->N1_tx = s->N1_bytes;
+        s->N1_rx = 0;
+        /* Let Forced Implicit mode have an effect on the counts */
+        if (GETFIELD(SPI_CTR_CFG_N1_CTRL_B1, s->regs[SPI_CTR_CFG_REG]) == 1) {
+            /*
+             * If Forced Implicit mode and count control doesn't
+             * indicate transmit then reset the tx count to 0
+             */
+            if (GETFIELD(SPI_CTR_CFG_N1_CTRL_B2,
+                                    s->regs[SPI_CTR_CFG_REG]) == 0) {
+                s->N1_tx = 0;
+            }
+            /* If rx count control for N1 is set, load the rx value */
+            if (GETFIELD(SPI_CTR_CFG_N1_CTRL_B3,
+                                    s->regs[SPI_CTR_CFG_REG]) == 1) {
+                s->N1_rx = s->N1_bytes;
+            }
+        }
+    }
+    /*
+     * Enforce an upper limit on the size of N1 that is equal to the known size
+     * of the shift register, 64 bits or 72 bits if ECC is enabled.
+     * If the size exceeds 72 bits it is a user error so log an error,
+     * cap the size at a max of 64 bits or 72 bits and set the sequencer FSM
+     * error bit.
+     */
+    uint8_t ecc_control = GETFIELD(SPI_CLK_CFG_ECC_CTRL,
+                                   s->regs[SPI_CLK_CFG_REG]);
+    if (ecc_control == 0 || ecc_control == 2) {
+        if (s->N1_bytes > (PNV_SPI_REG_SIZE + 1)) {
+            qemu_log_mask(LOG_GUEST_ERROR, "Unsupported N1 shift size when "
+                          "ECC enabled, bytes = 0x%x, bits = 0x%x\n",
+                          s->N1_bytes, s->N1_bits);
+            s->N1_bytes = PNV_SPI_REG_SIZE + 1;
+            s->N1_bits = s->N1_bytes * 8;
+        }
+    } else if (s->N1_bytes > PNV_SPI_REG_SIZE) {
+        qemu_log_mask(LOG_GUEST_ERROR, "Unsupported N1 shift size, "
+                      "bytes = 0x%x, bits = 0x%x\n",
+                      s->N1_bytes, s->N1_bits);
+        s->N1_bytes = PNV_SPI_REG_SIZE;
+        s->N1_bits = s->N1_bytes * 8;
+    }
+} /* end of calculate_N1 */
+
+/*
+ * Shift_N1 operation handler method
+ */
+static bool operation_shiftn1(PnvSpi *s, uint8_t opcode,
+                       PnvXferBuffer **payload, bool send_n1_alone)
+{
+    uint8_t n1_count;
+    bool stop = false;
+
+    /*
+     * If there isn't a current payload left over from a stopped sequence
+     * create a new one.
+     */
+    if (*payload == NULL) {
+        *payload = pnv_spi_xfer_buffer_new();
+    }
+    /*
+     * Use a combination of N1 counters to build the N1 portion of the
+     * transmit payload.
+     * We only care about transmit at this time since the request payload
+     * only represents data going out on the controller output line.
+     * Leave mode specific considerations in the calculate function since
+     * all we really care about are counters that tell use exactly how
+     * many bytes are in the payload and how many of those bytes to
+     * include from the TDR into the payload.
+     */
+    calculate_N1(s, opcode);
+    trace_pnv_spi_log_Ncounts(s->N1_bits, s->N1_bytes, s->N1_tx,
+                    s->N1_rx, s->N2_bits, s->N2_bytes, s->N2_tx, s->N2_rx);
+    /*
+     * Zero out the N2 counters here in case there is no N2 operation following
+     * the N1 operation in the sequencer.  This keeps leftover N2 information
+     * from interfering with spi_response logic.
+     */
+    s->N2_bits = 0;
+    s->N2_bytes = 0;
+    s->N2_tx = 0;
+    s->N2_rx = 0;
+    /*
+     * N1_bytes is the overall size of the N1 portion of the frame regardless of
+     * whether N1 is used for tx, rx or both.  Loop over the size to build a
+     * payload that is N1_bytes long.
+     * N1_tx is the count of bytes to take from the TDR and "shift" into the
+     * frame which means append those bytes to the payload for the N1 portion
+     * of the frame.
+     * If N1_tx is 0 or if the count exceeds the size of the TDR append 0xFF to
+     * the frame until the overall N1 count is reached.
+     */
+    n1_count = 0;
+    while (n1_count < s->N1_bytes) {
+        /*
+         * Assuming that if N1_tx is not equal to 0 then it is the same as
+         * N1_bytes.
+         */
+        if ((s->N1_tx != 0) && (n1_count < PNV_SPI_REG_SIZE)) {
+
+            if (GETFIELD(SPI_STS_TDR_FULL, s->status) == 1) {
+                /*
+                 * Note that we are only appending to the payload IF the TDR
+                 * is full otherwise we don't touch the payload because we are
+                 * going to NOT send the payload and instead tell the sequencer
+                 * that called us to stop and wait for a TDR write so we have
+                 * data to load into the payload.
+                 */
+                uint8_t n1_byte = 0x00;
+                n1_byte = get_from_offset(s, n1_count);
+                trace_pnv_spi_tx_append("n1_byte", n1_byte, n1_count);
+                *(pnv_spi_xfer_buffer_write_ptr(*payload, (*payload)->len, 1)) =
+                        n1_byte;
+            } else {
+                /*
+                 * We hit a shift_n1 opcode TX but the TDR is empty, tell the
+                 * sequencer to stop and break this loop.
+                 */
+                trace_pnv_spi_sequencer_stop_requested("Shift N1"
+                                "set for transmit but TDR is empty");
+                stop = true;
+                break;
+            }
+        } else {
+            /*
+             * Cases here:
+             * - we are receiving during the N1 frame segment and the RDR
+             *   is full so we need to stop until the RDR is read
+             * - we are transmitting and we don't care about RDR status
+             *   since we won't be loading RDR during the frame segment.
+             * - we are receiving and the RDR is empty so we allow the operation
+             *   to proceed.
+             */
+            if ((s->N1_rx != 0) && (GETFIELD(SPI_STS_RDR_FULL,
+                                           s->status) == 1)) {
+                trace_pnv_spi_sequencer_stop_requested("shift N1"
+                                "set for receive but RDR is full");
+                stop = true;
+                break;
+            } else {
+                trace_pnv_spi_tx_append_FF("n1_byte");
+                *(pnv_spi_xfer_buffer_write_ptr(*payload, (*payload)->len, 1))
+                        = 0xff;
+            }
+        }
+        n1_count++;
+    } /* end of while */
+    /*
+     * If we are not stopping due to an empty TDR and we are doing an N1 TX
+     * and the TDR is full we need to clear the TDR_full status.
+     * Do this here instead of up in the loop above so we don't log the message
+     * in every loop iteration.
+     * Ignore the send_n1_alone flag, all that does is defer the TX until the N2
+     * operation, which was found immediately after the current opcode.  The TDR
+     * was unloaded and will be shifted so we have to clear the TDR_full status.
+     */
+    if (!stop && (s->N1_tx != 0) &&
+        (GETFIELD(SPI_STS_TDR_FULL, s->status) == 1)) {
+        s->status = SETFIELD(SPI_STS_TDR_FULL, s->status, 0);
+    }
+    /*
+     * There are other reasons why the shifter would stop, such as a TDR empty
+     * or RDR full condition with N1 set to receive.  If we haven't stopped due
+     * to either one of those conditions then check if the send_n1_alone flag is
+     * equal to False, indicating the next opcode is an N2 operation, AND if
+     * the N2 counter reload switch (bit 0 of the N2 count control field) is
+     * set.  This condition requires a pacing write to "kick" off the N2
+     * shift which includes the N1 shift as well when send_n1_alone is False.
+     */
+    if (!stop && !send_n1_alone &&
+       (GETFIELD(SPI_CTR_CFG_N2_CTRL_B0, s->regs[SPI_CTR_CFG_REG]) == 1)) {
+        trace_pnv_spi_sequencer_stop_requested("N2 counter reload "
+                        "active, stop N1 shift, TDR_underrun set to 1");
+        stop = true;
+        s->status = SETFIELD(SPI_STS_TDR_UNDERRUN, s->status, 1);
+    }
+    /*
+     * If send_n1_alone is set AND we have a full TDR then this is the first and
+     * last payload to send and we don't have an N2 frame segment to add to the
+     * payload.
+     */
+    if (send_n1_alone && !stop) {
+        /* We have a TX and a full TDR or an RX and an empty RDR */
+        trace_pnv_spi_tx_request("Shifting N1 frame", (*payload)->len);
+        transfer(s, *payload);
+        /* The N1 frame shift is complete so reset the N1 counters */
+        s->N2_bits = 0;
+        s->N2_bytes = 0;
+        s->N2_tx = 0;
+        s->N2_rx = 0;
+        pnv_spi_xfer_buffer_free(*payload);
+        *payload = NULL;
+    }
+    return stop;
+} /* end of operation_shiftn1() */
+
+/*
+ * Calculate the N2 counters based on passed in opcode and
+ * internal register values.
+ * The method assumes that the opcode is a Shift_N2 opcode
+ * and doesn't test it.
+ * The counters returned are:
+ * N2 bits: Number of bits in the payload data that are significant
+ * to the responder.
+ * N2_bytes: Total count of payload bytes for the N2 frame.
+ * N2_tx: Total number of bytes taken from TDR for N2
+ * N2_rx: Total number of bytes taken from the payload for N2
+ */
+static void calculate_N2(PnvSpi *s, uint8_t opcode)
+{
+    /*
+     * Shift_N2 opcode form: 0x4M
+     * Implicit mode:
+     * If M!=0 the shift count is M bytes and M is the number of rx bytes.
+     * Forced Implicit mode:
+     * M is the shift count but tx and rx is determined by the count control
+     * register fields.  Note that we only check for Forced Implicit mode when
+     * M != 0 since the mode doesn't make sense when M = 0.
+     * Explicit mode:
+     * If M==0 then shift count is number of bits defined in the
+     * Counter Configuration Register's shift_count_N1 field.
+     */
+    if (PNV_SPI_OPCODE_LO_NIBBLE(opcode) == 0) {
+        /* Explicit mode */
+        s->N2_bits = GETFIELD(SPI_CTR_CFG_N2, s->regs[SPI_CTR_CFG_REG]);
+        s->N2_bytes = (s->N2_bits + 7) / 8;
+        s->N2_tx = 0;
+        s->N2_rx = 0;
+        /* If tx count control for N2 is set, load the tx value */
+        if (GETFIELD(SPI_CTR_CFG_N2_CTRL_B2, s->regs[SPI_CTR_CFG_REG]) == 1) {
+            s->N2_tx = s->N2_bytes;
+        }
+        /* If rx count control for N2 is set, load the rx value */
+        if (GETFIELD(SPI_CTR_CFG_N2_CTRL_B3, s->regs[SPI_CTR_CFG_REG]) == 1) {
+            s->N2_rx = s->N2_bytes;
+        }
+    } else {
+        /* Implicit mode/Forced Implicit mode, use M field from opcode */
+        s->N2_bytes = PNV_SPI_OPCODE_LO_NIBBLE(opcode);
+        s->N2_bits = s->N2_bytes * 8;
+        /* Assume that we are going to receive the count */
+        s->N2_rx = s->N2_bytes;
+        s->N2_tx = 0;
+        /* Let Forced Implicit mode have an effect on the counts */
+        if (GETFIELD(SPI_CTR_CFG_N2_CTRL_B1, s->regs[SPI_CTR_CFG_REG]) == 1) {
+            /*
+             * If Forced Implicit mode and count control doesn't
+             * indicate a receive then reset the rx count to 0
+             */
+            if (GETFIELD(SPI_CTR_CFG_N2_CTRL_B3,
+                                    s->regs[SPI_CTR_CFG_REG]) == 0) {
+                s->N2_rx = 0;
+            }
+            /* If tx count control for N2 is set, load the tx value */
+            if (GETFIELD(SPI_CTR_CFG_N2_CTRL_B2,
+                                    s->regs[SPI_CTR_CFG_REG]) == 1) {
+                s->N2_tx = s->N2_bytes;
+            }
+        }
+    }
+    /*
+     * Enforce an upper limit on the size of N1 that is equal to the
+     * known size of the shift register, 64 bits or 72 bits if ECC
+     * is enabled.
+     * If the size exceeds 72 bits it is a user error so log an error,
+     * cap the size at a max of 64 bits or 72 bits and set the sequencer FSM
+     * error bit.
+     */
+    uint8_t ecc_control = GETFIELD(SPI_CLK_CFG_ECC_CTRL,
+                    s->regs[SPI_CLK_CFG_REG]);
+    if (ecc_control == 0 || ecc_control == 2) {
+        if (s->N2_bytes > (PNV_SPI_REG_SIZE + 1)) {
+            /* Unsupported N2 shift size when ECC enabled */
+            s->N2_bytes = PNV_SPI_REG_SIZE + 1;
+            s->N2_bits = s->N2_bytes * 8;
+        }
+    } else if (s->N2_bytes > PNV_SPI_REG_SIZE) {
+        /* Unsupported N2 shift size */
+        s->N2_bytes = PNV_SPI_REG_SIZE;
+        s->N2_bits = s->N2_bytes * 8;
+    }
+} /* end of calculate_N2 */
+
+/*
+ * Shift_N2 operation handler method
+ */
+
+static bool operation_shiftn2(PnvSpi *s, uint8_t opcode,
+                       PnvXferBuffer **payload)
+{
+    uint8_t n2_count;
+    bool stop = false;
+
+    /*
+     * If there isn't a current payload left over from a stopped sequence
+     * create a new one.
+     */
+    if (*payload == NULL) {
+        *payload = pnv_spi_xfer_buffer_new();
+    }
+    /*
+     * Use a combination of N2 counters to build the N2 portion of the
+     * transmit payload.
+     */
+    calculate_N2(s, opcode);
+    trace_pnv_spi_log_Ncounts(s->N1_bits, s->N1_bytes, s->N1_tx,
+                    s->N1_rx, s->N2_bits, s->N2_bytes, s->N2_tx, s->N2_rx);
+    /*
+     * The only difference between this code and the code for shift N1 is
+     * that this code has to account for the possible presence of N1 transmit
+     * bytes already taken from the TDR.
+     * If there are bytes to be transmitted for the N2 portion of the frame
+     * and there are still bytes in TDR that have not been copied into the
+     * TX data of the payload, this code will handle transmitting those
+     * remaining bytes.
+     * If for some reason the transmit count(s) add up to more than the size
+     * of the TDR we will just append 0xFF to the transmit payload data until
+     * the payload is N1 + N2 bytes long.
+     */
+    n2_count = 0;
+    while (n2_count < s->N2_bytes) {
+        /*
+         * If the RDR is full and we need to RX just bail out, letting the
+         * code continue will end up building the payload twice in the same
+         * buffer since RDR full causes a sequence stop and restart.
+         */
+        if ((s->N2_rx != 0) &&
+            (GETFIELD(SPI_STS_RDR_FULL, s->status) == 1)) {
+            trace_pnv_spi_sequencer_stop_requested("shift N2 set"
+                            "for receive but RDR is full");
+            stop = true;
+            break;
+        }
+        if ((s->N2_tx != 0) && ((s->N1_tx + n2_count) <
+                                PNV_SPI_REG_SIZE)) {
+            /* Always append data for the N2 segment if it is set for TX */
+            uint8_t n2_byte = 0x00;
+            n2_byte = get_from_offset(s, (s->N1_tx + n2_count));
+            trace_pnv_spi_tx_append("n2_byte", n2_byte, (s->N1_tx + n2_count));
+            *(pnv_spi_xfer_buffer_write_ptr(*payload, (*payload)->len, 1))
+                    = n2_byte;
+        } else {
+            /*
+             * Regardless of whether or not N2 is set for TX or RX, we need
+             * the number of bytes in the payload to match the overall length
+             * of the operation.
+             */
+            trace_pnv_spi_tx_append_FF("n2_byte");
+            *(pnv_spi_xfer_buffer_write_ptr(*payload, (*payload)->len, 1))
+                    = 0xff;
+        }
+        n2_count++;
+    } /* end of while */
+    if (!stop) {
+        /* We have a TX and a full TDR or an RX and an empty RDR */
+        trace_pnv_spi_tx_request("Shifting N2 frame", (*payload)->len);
+        transfer(s, *payload);
+        /*
+         * If we are doing an N2 TX and the TDR is full we need to clear the
+         * TDR_full status. Do this here instead of up in the loop above so we
+         * don't log the message in every loop iteration.
+         */
+        if ((s->N2_tx != 0) &&
+            (GETFIELD(SPI_STS_TDR_FULL, s->status) == 1)) {
+            s->status = SETFIELD(SPI_STS_TDR_FULL, s->status, 0);
+        }
+        /*
+         * The N2 frame shift is complete so reset the N2 counters.
+         * Reset the N1 counters also in case the frame was a combination of
+         * N1 and N2 segments.
+         */
+        s->N2_bits = 0;
+        s->N2_bytes = 0;
+        s->N2_tx = 0;
+        s->N2_rx = 0;
+        s->N1_bits = 0;
+        s->N1_bytes = 0;
+        s->N1_tx = 0;
+        s->N1_rx = 0;
+        pnv_spi_xfer_buffer_free(*payload);
+        *payload = NULL;
+    }
+    return stop;
+} /*  end of operation_shiftn2()*/
+
+static void operation_sequencer(PnvSpi *s)
+{
+    /*
+     * Loop through each sequencer operation ID and perform the requested
+     *  operations.
+     * Flag for indicating if we should send the N1 frame or wait to combine
+     * it with a preceding N2 frame.
+     */
+    bool send_n1_alone = true;
+    bool stop = false; /* Flag to stop the sequencer */
+    uint8_t opcode = 0;
+    uint8_t masked_opcode = 0;
+
+    /*
+     * PnvXferBuffer for containing the payload of the SPI frame.
+     * This is a static because there are cases where a sequence has to stop
+     * and wait for the target application to unload the RDR.  If this occurs
+     * during a sequence where N1 is not sent alone and instead combined with
+     * N2 since the N1 tx length + the N2 tx length is less than the size of
+     * the TDR.
+     */
+    static PnvXferBuffer *payload;
+
+    if (payload == NULL) {
+        payload = pnv_spi_xfer_buffer_new();
+    }
+    /*
+     * Clear the sequencer FSM error bit - general_SPI_status[3]
+     * before starting a sequence.
+     */
+    s->status = SETFIELD(SPI_STS_GEN_STATUS_B3, s->status, 0);
+    /*
+     * If the FSM is idle set the sequencer index to 0
+     * (new/restarted sequence)
+     */
+    if (GETFIELD(SPI_STS_SEQ_FSM, s->status) == SEQ_STATE_IDLE) {
+        s->status = SETFIELD(SPI_STS_SEQ_INDEX, s->status, 0);
+    }
+    /*
+     * There are only 8 possible operation IDs to iterate through though
+     * some operations may cause more than one frame to be sequenced.
+     */
+    while (get_seq_index(s) < NUM_SEQ_OPS) {
+        opcode = s->seq_op[get_seq_index(s)];
+        /* Set sequencer state to decode */
+        s->status = SETFIELD(SPI_STS_SEQ_FSM, s->status, SEQ_STATE_DECODE);
+        /*
+         * Only the upper nibble of the operation ID is needed to know what
+         * kind of operation is requested.
+         */
+        masked_opcode = PNV_SPI_MASKED_OPCODE(opcode);
+        switch (masked_opcode) {
+        /*
+         * Increment the operation index in each case instead of just
+         * once at the end in case an operation like the branch
+         * operation needs to change the index.
+         */
+        case SEQ_OP_STOP:
+            s->status = SETFIELD(SPI_STS_SEQ_FSM, s->status, SEQ_STATE_EXECUTE);
+            /* A stop operation in any position stops the sequencer */
+            trace_pnv_spi_sequencer_op("STOP", get_seq_index(s));
+
+            stop = true;
+            s->status = SETFIELD(SPI_STS_SHIFTER_FSM, s->status, FSM_IDLE);
+            s->loop_counter_1 = 0;
+            s->loop_counter_2 = 0;
+            s->status = SETFIELD(SPI_STS_SEQ_FSM, s->status, SEQ_STATE_IDLE);
+            break;
+
+        case SEQ_OP_SELECT_SLAVE:
+            s->status = SETFIELD(SPI_STS_SEQ_FSM, s->status, SEQ_STATE_EXECUTE);
+            trace_pnv_spi_sequencer_op("SELECT_SLAVE", get_seq_index(s));
+            /*
+             * This device currently only supports a single responder
+             * connection at position 0.  De-selecting a responder is fine
+             * and expected at the end of a sequence but selecting any
+             * responder other than 0 should cause an error.
+             */
+            s->responder_select = PNV_SPI_OPCODE_LO_NIBBLE(opcode);
+            if (s->responder_select == 0) {
+                trace_pnv_spi_shifter_done();
+                qemu_set_irq(s->cs_line[0], 1);
+                s->status = SETFIELD(SPI_STS_SEQ_INDEX, s->status,
+                                (get_seq_index(s) + 1));
+                s->status = SETFIELD(SPI_STS_SHIFTER_FSM, s->status, FSM_DONE);
+            } else if (s->responder_select != 1) {
+                qemu_log_mask(LOG_GUEST_ERROR, "Slave selection other than 1 "
+                              "not supported, select = 0x%x\n",
+                               s->responder_select);
+                trace_pnv_spi_sequencer_stop_requested("invalid "
+                                "responder select");
+                s->status = SETFIELD(SPI_STS_SHIFTER_FSM, s->status, FSM_IDLE);
+                stop = true;
+            } else {
+                /*
+                 * Only allow an FSM_START state when a responder is
+                 * selected
+                 */
+                s->status = SETFIELD(SPI_STS_SHIFTER_FSM, s->status, FSM_START);
+                trace_pnv_spi_shifter_stating();
+                qemu_set_irq(s->cs_line[0], 0);
+                /*
+                 * A Shift_N2 operation is only valid after a Shift_N1
+                 * according to the spec. The spec doesn't say if that means
+                 * immediately after or just after at any point. We will track
+                 * the occurrence of a Shift_N1 to enforce this requirement in
+                 * the most generic way possible by assuming that the rule
+                 * applies once a valid responder select has occurred.
+                 */
+                s->shift_n1_done = false;
+                next_sequencer_fsm(s);
+            }
+            break;
+
+        case SEQ_OP_SHIFT_N1:
+            s->status = SETFIELD(SPI_STS_SEQ_FSM, s->status, SEQ_STATE_EXECUTE);
+            trace_pnv_spi_sequencer_op("SHIFT_N1", get_seq_index(s));
+            /*
+             * Only allow a shift_n1 when the state is not IDLE or DONE.
+             * In either of those two cases the sequencer is not in a proper
+             * state to perform shift operations because the sequencer has:
+             * - processed a responder deselect (DONE)
+             * - processed a stop opcode (IDLE)
+             * - encountered an error (IDLE)
+             */
+            if ((GETFIELD(SPI_STS_SHIFTER_FSM, s->status) == FSM_IDLE) ||
+                (GETFIELD(SPI_STS_SHIFTER_FSM, s->status) == FSM_DONE)) {
+                qemu_log_mask(LOG_GUEST_ERROR, "Shift_N1 not allowed in "
+                              "shifter state = 0x%llx", GETFIELD(
+                        SPI_STS_SHIFTER_FSM, s->status));
+                /*
+                 * Set sequencer FSM error bit 3 (general_SPI_status[3])
+                 * in status reg.
+                 */
+                s->status = SETFIELD(SPI_STS_GEN_STATUS_B3, s->status, 1);
+                trace_pnv_spi_sequencer_stop_requested("invalid shifter state");
+                stop = true;
+            } else {
+                /*
+                 * Look for the special case where there is a shift_n1 set for
+                 * transmit and it is followed by a shift_n2 set for transmit
+                 * AND the combined transmit length of the two operations is
+                 * less than or equal to the size of the TDR register. In this
+                 * case we want to use both this current shift_n1 opcode and the
+                 * following shift_n2 opcode to assemble the frame for
+                 * transmission to the responder without requiring a refill of
+                 * the TDR between the two operations.
+                 */
+                if (PNV_SPI_MASKED_OPCODE(s->seq_op[get_seq_index(s) + 1])
+                                == SEQ_OP_SHIFT_N2) {
+                    send_n1_alone = false;
+                }
+                s->status = SETFIELD(SPI_STS_SHIFTER_FSM, s->status,
+                                FSM_SHIFT_N1);
+                stop = operation_shiftn1(s, opcode, &payload, send_n1_alone);
+                if (stop) {
+                    /*
+                     *  The operation code says to stop, this can occur if:
+                     * (1) RDR is full and the N1 shift is set for receive
+                     * (2) TDR was empty at the time of the N1 shift so we need
+                     * to wait for data.
+                     * (3) Neither 1 nor 2 are occurring and we aren't sending
+                     * N1 alone and N2 counter reload is set (bit 0 of the N2
+                     * counter reload field).  In this case TDR_underrun will
+                     * will be set and the Payload has been loaded so it is
+                     * ok to advance the sequencer.
+                     */
+                    if (GETFIELD(SPI_STS_TDR_UNDERRUN, s->status)) {
+                        s->shift_n1_done = true;
+                        s->status = SETFIELD(SPI_STS_SHIFTER_FSM, s->status,
+                                                  FSM_SHIFT_N2);
+                        s->status = SETFIELD(SPI_STS_SEQ_INDEX, s->status,
+                                        (get_seq_index(s) + 1));
+                    } else {
+                        /*
+                         * This is case (1) or (2) so the sequencer needs to
+                         * wait and NOT go to the next sequence yet.
+                         */
+                        s->status = SETFIELD(SPI_STS_SHIFTER_FSM, s->status,
+                                        FSM_WAIT);
+                    }
+                } else {
+                    /* Ok to move on to the next index */
+                    s->shift_n1_done = true;
+                    next_sequencer_fsm(s);
+                }
+            }
+            break;
+
+        case SEQ_OP_SHIFT_N2:
+            s->status = SETFIELD(SPI_STS_SEQ_FSM, s->status, SEQ_STATE_EXECUTE);
+            trace_pnv_spi_sequencer_op("SHIFT_N2", get_seq_index(s));
+            if (!s->shift_n1_done) {
+                qemu_log_mask(LOG_GUEST_ERROR, "Shift_N2 is not allowed if a "
+                              "Shift_N1 is not done, shifter state = 0x%llx",
+                              GETFIELD(SPI_STS_SHIFTER_FSM, s->status));
+                /*
+                 * In case the sequencer actually stops if an N2 shift is
+                 * requested before any N1 shift is done. Set sequencer FSM
+                 * error bit 3 (general_SPI_status[3]) in status reg.
+                 */
+                s->status = SETFIELD(SPI_STS_GEN_STATUS_B3, s->status, 1);
+                trace_pnv_spi_sequencer_stop_requested("shift_n2 "
+                                    "w/no shift_n1 done");
+                stop = true;
+            } else {
+                /* Ok to do a Shift_N2 */
+                s->status = SETFIELD(SPI_STS_SHIFTER_FSM, s->status,
+                                FSM_SHIFT_N2);
+                stop = operation_shiftn2(s, opcode, &payload);
+                /*
+                 * If the operation code says to stop set the shifter state to
+                 * wait and stop
+                 */
+                if (stop) {
+                    s->status = SETFIELD(SPI_STS_SHIFTER_FSM, s->status,
+                                    FSM_WAIT);
+                } else {
+                    /* Ok to move on to the next index */
+                    next_sequencer_fsm(s);
+                }
+            }
+            break;
+
+        case SEQ_OP_BRANCH_IFNEQ_RDR:
+            s->status = SETFIELD(SPI_STS_SEQ_FSM, s->status, SEQ_STATE_EXECUTE);
+            trace_pnv_spi_sequencer_op("BRANCH_IFNEQ_RDR", get_seq_index(s));
+            /*
+             * The memory mapping register RDR match value is compared against
+             * the 16 rightmost bytes of the RDR (potentially with masking).
+             * Since this comparison is performed against the contents of the
+             * RDR then a receive must have previously occurred otherwise
+             * there is no data to compare and the operation cannot be
+             * completed and will stop the sequencer until RDR full is set to
+             * 1.
+             */
+            if (GETFIELD(SPI_STS_RDR_FULL, s->status) == 1) {
+                bool rdr_matched = false;
+                rdr_matched = does_rdr_match(s);
+                if (rdr_matched) {
+                    trace_pnv_spi_RDR_match("success");
+                    /* A match occurred, increment the sequencer index. */
+                    next_sequencer_fsm(s);
+                } else {
+                    trace_pnv_spi_RDR_match("failed");
+                    /*
+                     * Branch the sequencer to the index coded into the op
+                     * code.
+                     */
+                    s->status = SETFIELD(SPI_STS_SEQ_INDEX, s->status,
+                                    PNV_SPI_OPCODE_LO_NIBBLE(opcode));
+                }
+                /*
+                 * Regardless of where the branch ended up we want the
+                 * sequencer to continue shifting so we have to clear
+                 * RDR_full.
+                 */
+                s->status = SETFIELD(SPI_STS_RDR_FULL, s->status, 0);
+            } else {
+                trace_pnv_spi_sequencer_stop_requested("RDR not"
+                                "full for 0x6x opcode");
+                stop = true;
+                s->status = SETFIELD(SPI_STS_SHIFTER_FSM, s->status, FSM_WAIT);
+            }
+            break;
+
+        case SEQ_OP_TRANSFER_TDR:
+            s->status = SETFIELD(SPI_STS_SEQ_FSM, s->status, SEQ_STATE_EXECUTE);
+            qemu_log_mask(LOG_GUEST_ERROR, "Transfer TDR is not supported\n");
+            next_sequencer_fsm(s);
+            break;
+
+        case SEQ_OP_BRANCH_IFNEQ_INC_1:
+            s->status = SETFIELD(SPI_STS_SEQ_FSM, s->status, SEQ_STATE_EXECUTE);
+            trace_pnv_spi_sequencer_op("BRANCH_IFNEQ_INC_1", get_seq_index(s));
+            /*
+             * The spec says the loop should execute count compare + 1 times.
+             * However we learned from engineering that we really only loop
+             * count_compare times, count compare = 0 makes this op code a
+             * no-op
+             */
+            if (s->loop_counter_1 !=
+                GETFIELD(SPI_CTR_CFG_CMP1, s->regs[SPI_CTR_CFG_REG])) {
+                /*
+                 * Next index is the lower nibble of the branch operation ID,
+                 * mask off all but the first three bits so we don't try to
+                 * access beyond the sequencer_operation_reg boundary.
+                 */
+                s->status = SETFIELD(SPI_STS_SEQ_INDEX, s->status,
+                                PNV_SPI_OPCODE_LO_NIBBLE(opcode));
+                s->loop_counter_1++;
+            } else {
+                /* Continue to next index if loop counter is reached */
+                next_sequencer_fsm(s);
+            }
+            break;
+
+        case SEQ_OP_BRANCH_IFNEQ_INC_2:
+            s->status = SETFIELD(SPI_STS_SEQ_FSM, s->status, SEQ_STATE_EXECUTE);
+            trace_pnv_spi_sequencer_op("BRANCH_IFNEQ_INC_2", get_seq_index(s));
+            uint8_t condition2 = GETFIELD(SPI_CTR_CFG_CMP2,
+                              s->regs[SPI_CTR_CFG_REG]);
+            /*
+             * The spec says the loop should execute count compare + 1 times.
+             * However we learned from engineering that we really only loop
+             * count_compare times, count compare = 0 makes this op code a
+             * no-op
+             */
+            if (s->loop_counter_2 != condition2) {
+                /*
+                 * Next index is the lower nibble of the branch operation ID,
+                 * mask off all but the first three bits so we don't try to
+                 * access beyond the sequencer_operation_reg boundary.
+                 */
+                s->status = SETFIELD(SPI_STS_SEQ_INDEX,
+                                s->status, PNV_SPI_OPCODE_LO_NIBBLE(opcode));
+                s->loop_counter_2++;
+            } else {
+                /* Continue to next index if loop counter is reached */
+                next_sequencer_fsm(s);
+            }
+            break;
+
+        default:
+            s->status = SETFIELD(SPI_STS_SEQ_FSM, s->status, SEQ_STATE_EXECUTE);
+            /* Ignore unsupported operations. */
+            next_sequencer_fsm(s);
+            break;
+        } /* end of switch */
+        /*
+         * If we used all 8 opcodes without seeing a 00 - STOP in the sequence
+         * we need to go ahead and end things as if there was a STOP at the
+         * end.
+         */
+        if (get_seq_index(s) == NUM_SEQ_OPS) {
+            /* All 8 opcodes completed, sequencer idling */
+            s->status = SETFIELD(SPI_STS_SHIFTER_FSM, s->status, FSM_IDLE);
+            s->status = SETFIELD(SPI_STS_SEQ_INDEX, s->status, 0);
+            s->loop_counter_1 = 0;
+            s->loop_counter_2 = 0;
+            s->status = SETFIELD(SPI_STS_SEQ_FSM, s->status, SEQ_STATE_IDLE);
+            break;
+        }
+        /* Break the loop if a stop was requested */
+        if (stop) {
+            break;
+        }
+    } /* end of while */
+    return;
+} /* end of operation_sequencer() */
+
+/*
+ * The SPIC engine and its internal sequencer can be interrupted and reset by
+ * a hardware signal, the sbe_spicst_hard_reset bits from Pervasive
+ * Miscellaneous Register of sbe_register_bo device.
+ * Reset immediately aborts any SPI transaction in progress and returns the
+ * sequencer and state machines to idle state.
+ * The configuration register values are not changed. The status register is
+ * not reset. The engine registers are not reset.
+ * The SPIC engine reset does not have any affect on the attached devices.
+ * Reset handling of any attached devices is beyond the scope of the engine.
+ */
+static void do_reset(DeviceState *dev)
+{
+    PnvSpi *s = PNV_SPI(dev);
+    DeviceState *ssi_dev;
+
+    trace_pnv_spi_reset();
+
+    /* Connect cs irq */
+    ssi_dev = ssi_get_cs(s->ssi_bus, 0);
+    if (ssi_dev) {
+        qemu_irq cs_line = qdev_get_gpio_in_named(ssi_dev, SSI_GPIO_CS, 0);
+        qdev_connect_gpio_out_named(DEVICE(s), "cs", 0, cs_line);
+    }
+
+    /* Reset all N1 and N2 counters, and other constants */
+    s->N2_bits = 0;
+    s->N2_bytes = 0;
+    s->N2_tx = 0;
+    s->N2_rx = 0;
+    s->N1_bits = 0;
+    s->N1_bytes = 0;
+    s->N1_tx = 0;
+    s->N1_rx = 0;
+    s->loop_counter_1 = 0;
+    s->loop_counter_2 = 0;
+    /* Disconnected from responder */
+    qemu_set_irq(s->cs_line[0], 1);
+}
+
+static uint64_t pnv_spi_xscom_read(void *opaque, hwaddr addr, unsigned size)
+{
+    PnvSpi *s = PNV_SPI(opaque);
+    uint32_t reg = addr >> 3;
+    uint64_t val = ~0ull;
+
+    switch (reg) {
+    case ERROR_REG:
+    case SPI_CTR_CFG_REG:
+    case CONFIG_REG1:
+    case SPI_CLK_CFG_REG:
+    case SPI_MM_REG:
+    case SPI_XMIT_DATA_REG:
+        val = s->regs[reg];
+        break;
+    case SPI_RCV_DATA_REG:
+        val = s->regs[reg];
+        trace_pnv_spi_read_RDR(val);
+        s->status = SETFIELD(SPI_STS_RDR_FULL, s->status, 0);
+        if (GETFIELD(SPI_STS_SHIFTER_FSM, s->status) == FSM_WAIT) {
+            trace_pnv_spi_start_sequencer();
+            operation_sequencer(s);
+        }
+        break;
+    case SPI_SEQ_OP_REG:
+        val = 0;
+        for (int i = 0; i < PNV_SPI_REG_SIZE; i++) {
+            val = (val << 8) | s->seq_op[i];
+        }
+        break;
+    case SPI_STS_REG:
+        val = s->status;
+        break;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "pnv_spi_regs: Invalid xscom "
+                 "read at 0x%" PRIx32 "\n", reg);
+    }
+
+    trace_pnv_spi_read(addr, val);
+    return val;
+}
+
+static void pnv_spi_xscom_write(void *opaque, hwaddr addr,
+                                 uint64_t val, unsigned size)
+{
+    PnvSpi *s = PNV_SPI(opaque);
+    uint32_t reg = addr >> 3;
+
+    trace_pnv_spi_write(addr, val);
+
+    switch (reg) {
+    case ERROR_REG:
+    case SPI_CTR_CFG_REG:
+    case CONFIG_REG1:
+    case SPI_MM_REG:
+    case SPI_RCV_DATA_REG:
+        s->regs[reg] = val;
+        break;
+    case SPI_CLK_CFG_REG:
+        /*
+         * To reset the SPI controller write the sequence 0x5 0xA to
+         * reset_control field
+         */
+        if ((GETFIELD(SPI_CLK_CFG_RST_CTRL, s->regs[SPI_CLK_CFG_REG]) == 0x5)
+             && (GETFIELD(SPI_CLK_CFG_RST_CTRL, val) == 0xA)) {
+                /* SPI controller reset sequence completed, resetting */
+            s->regs[reg] = SPI_CLK_CFG_HARD_RST;
+        } else {
+            s->regs[reg] = val;
+        }
+        break;
+    case SPI_XMIT_DATA_REG:
+        /*
+         * Writing to the transmit data register causes the transmit data
+         * register full status bit in the status register to be set.  Writing
+         * when the transmit data register full status bit is already set
+         * causes a "Resource Not Available" condition.  This is not possible
+         * in the model since writes to this register are not asynchronous to
+         * the operation sequence like it would be in hardware.
+         */
+        s->regs[reg] = val;
+        trace_pnv_spi_write_TDR(val);
+        s->status = SETFIELD(SPI_STS_TDR_FULL, s->status, 1);
+        s->status = SETFIELD(SPI_STS_TDR_UNDERRUN, s->status, 0);
+        trace_pnv_spi_start_sequencer();
+        operation_sequencer(s);
+        break;
+    case SPI_SEQ_OP_REG:
+        for (int i = 0; i < PNV_SPI_REG_SIZE; i++) {
+            s->seq_op[i] = (val >> (56 - i * 8)) & 0xFF;
+        }
+        break;
+    case SPI_STS_REG:
+        /* other fields are ignore_write */
+        s->status = SETFIELD(SPI_STS_RDR_OVERRUN, s->status,
+                                  GETFIELD(SPI_STS_RDR, val));
+        s->status = SETFIELD(SPI_STS_TDR_OVERRUN, s->status,
+                                  GETFIELD(SPI_STS_TDR, val));
+        break;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "pnv_spi_regs: Invalid xscom "
+                 "write at 0x%" PRIx32 "\n", reg);
+    }
+    return;
+}
+
+static const MemoryRegionOps pnv_spi_xscom_ops = {
+    .read = pnv_spi_xscom_read,
+    .write = pnv_spi_xscom_write,
+    .valid.min_access_size = 8,
+    .valid.max_access_size = 8,
+    .impl.min_access_size = 8,
+    .impl.max_access_size = 8,
+    .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static Property pnv_spi_properties[] = {
+    DEFINE_PROP_UINT32("spic_num", PnvSpi, spic_num, 0),
+    DEFINE_PROP_UINT8("transfer_len", PnvSpi, transfer_len, 4),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void pnv_spi_realize(DeviceState *dev, Error **errp)
+{
+    PnvSpi *s = PNV_SPI(dev);
+    g_autofree char *name = g_strdup_printf(TYPE_PNV_SPI_BUS ".%d",
+                    s->spic_num);
+    s->ssi_bus = ssi_create_bus(dev, name);
+    s->cs_line = g_new0(qemu_irq, 1);
+    qdev_init_gpio_out_named(DEVICE(s), s->cs_line, "cs", 1);
+
+    /* spi scoms */
+    pnv_xscom_region_init(&s->xscom_spic_regs, OBJECT(s), &pnv_spi_xscom_ops,
+                          s, "xscom-spi", PNV10_XSCOM_PIB_SPIC_SIZE);
+}
+
+static int pnv_spi_dt_xscom(PnvXScomInterface *dev, void *fdt,
+                             int offset)
+{
+    PnvSpi *s = PNV_SPI(dev);
+    g_autofree char *name;
+    int s_offset;
+    const char compat[] = "ibm,power10-spi";
+    uint32_t spic_pcba = PNV10_XSCOM_PIB_SPIC_BASE +
+        s->spic_num * PNV10_XSCOM_PIB_SPIC_SIZE;
+    uint32_t reg[] = {
+        cpu_to_be32(spic_pcba),
+        cpu_to_be32(PNV10_XSCOM_PIB_SPIC_SIZE)
+    };
+    name = g_strdup_printf("pnv_spi@%x", spic_pcba);
+    s_offset = fdt_add_subnode(fdt, offset, name);
+    _FDT(s_offset);
+
+    _FDT(fdt_setprop(fdt, s_offset, "reg", reg, sizeof(reg)));
+    _FDT(fdt_setprop(fdt, s_offset, "compatible", compat, sizeof(compat)));
+    _FDT((fdt_setprop_cell(fdt, s_offset, "spic_num#", s->spic_num)));
+    return 0;
+}
+
+static void pnv_spi_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PnvXScomInterfaceClass *xscomc = PNV_XSCOM_INTERFACE_CLASS(klass);
+
+    xscomc->dt_xscom = pnv_spi_dt_xscom;
+
+    dc->desc = "PowerNV SPI";
+    dc->realize = pnv_spi_realize;
+    dc->reset = do_reset;
+    device_class_set_props(dc, pnv_spi_properties);
+}
+
+static const TypeInfo pnv_spi_info = {
+    .name          = TYPE_PNV_SPI,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(PnvSpi),
+    .class_init    = pnv_spi_class_init,
+    .interfaces    = (InterfaceInfo[]) {
+        { TYPE_PNV_XSCOM_INTERFACE },
+        { }
+    }
+};
+
+static void pnv_spi_register_types(void)
+{
+    type_register_static(&pnv_spi_info);
+}
+
+type_init(pnv_spi_register_types);

diff --git a/hw/ssi/trace-events b/hw/ssi/trace-events
index 7b5ad6a..089d269 100644
--- a/hw/ssi/trace-events
+++ b/hw/ssi/trace-events

@@ -32,3 +32,24 @@
 ibex_spi_host_transfer(uint32_t tx_data, uint32_t rx_data) "tx_data: 0x%" PRIx32 " rx_data: @0x%" PRIx32
 ibex_spi_host_write(uint64_t addr, uint32_t size, uint64_t data) "@0x%" PRIx64 " size %u: 0x%" PRIx64
 ibex_spi_host_read(uint64_t addr, uint32_t size) "@0x%" PRIx64 " size %u:"
+
+#pnv_spi.c
+pnv_spi_read(uint64_t addr, uint64_t val) "addr 0x%" PRIx64 " val 0x%" PRIx64
+pnv_spi_write(uint64_t addr, uint64_t val) "addr 0x%" PRIx64 " val 0x%" PRIx64
+pnv_spi_read_RDR(uint64_t val) "data extracted = 0x%" PRIx64
+pnv_spi_write_TDR(uint64_t val) "being written, data written = 0x%" PRIx64
+pnv_spi_start_sequencer(void) ""
+pnv_spi_reset(void) "spic engine sequencer configuration and spi communication"
+pnv_spi_sequencer_op(const char* op, uint8_t index) "%s at index = 0x%x"
+pnv_spi_shifter_stating(void) "pull CS line low"
+pnv_spi_shifter_done(void) "pull the CS line high"
+pnv_spi_log_Ncounts(uint8_t N1_bits, uint8_t N1_bytes, uint8_t N1_tx, uint8_t N1_rx, uint8_t N2_bits, uint8_t N2_bytes, uint8_t N2_tx, uint8_t N2_rx) "N1_bits = %d, N1_bytes = %d, N1_tx = %d, N1_rx = %d, N2_bits = %d, N2_bytes = %d, N2_tx = %d, N2_rx = %d"
+pnv_spi_tx_append(const char* frame, uint8_t byte, uint8_t tdr_index) "%s = 0x%2.2x to payload from TDR at index %d"
+pnv_spi_tx_append_FF(const char* frame) "%s to Payload"
+pnv_spi_tx_request(const char* frame, uint32_t payload_len) "%s, payload len = %d"
+pnv_spi_rx_received(uint32_t payload_len) "payload len = %d"
+pnv_spi_rx_read_N1frame(void) ""
+pnv_spi_rx_read_N2frame(void) ""
+pnv_spi_shift_rx(uint8_t byte, uint32_t index) "byte = 0x%2.2x into RDR from payload index %d"
+pnv_spi_sequencer_stop_requested(const char* reason) "due to %s"
+pnv_spi_RDR_match(const char* result) "%s"

diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index 240ee04..2e1b499 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h

@@ -35,6 +35,8 @@
 void cpu_list_unlock(void);
 unsigned int cpu_list_generation_id_get(void);
 
+int cpu_get_free_index(void);
+
 void tcg_iommu_init_notifier_list(CPUState *cpu);
 void tcg_iommu_free_notifier_list(CPUState *cpu);
 

diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h
index 476b136..fcb6699 100644
--- a/include/hw/ppc/pnv.h
+++ b/include/hw/ppc/pnv.h

@@ -76,6 +76,9 @@
     /*< public >*/
     const char *compat;
     int compat_size;
+    int max_smt_threads;
+    bool has_lpar_per_thread;
+    bool quirk_tb_big_core;
 
     void (*dt_power_mgt)(PnvMachineState *pnv, void *fdt);
     void (*i2c_init)(PnvMachineState *pnv);
@@ -100,6 +103,9 @@
     PnvPnor      *pnor;
 
     hwaddr       fw_load_addr;
+
+    bool         big_core;
+    bool         lpar_per_core;
 };
 
 PnvChip *pnv_get_chip(PnvMachineState *pnv, uint32_t chip_id);
@@ -108,6 +114,8 @@
 #define PNV_FDT_ADDR          0x01000000
 #define PNV_TIMEBASE_FREQ     512000000ULL
 
+void pnv_cpu_do_nmi_resume(CPUState *cs);
+
 /*
  * BMC helpers
  */

diff --git a/include/hw/ppc/pnv_adu.h b/include/hw/ppc/pnv_adu.h
new file mode 100644
index 0000000..f9dbd8c
--- /dev/null
+++ b/include/hw/ppc/pnv_adu.h

@@ -0,0 +1,32 @@
+/*
+ * QEMU PowerPC PowerNV Emulation of some ADU behaviour
+ *
+ * Copyright (c) 2024, IBM Corporation.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef PPC_PNV_ADU_H
+#define PPC_PNV_ADU_H
+
+#include "hw/ppc/pnv.h"
+#include "hw/ppc/pnv_lpc.h"
+#include "hw/qdev-core.h"
+
+#define TYPE_PNV_ADU "pnv-adu"
+
+OBJECT_DECLARE_TYPE(PnvADU, PnvADUClass, PNV_ADU)
+
+struct PnvADU {
+    DeviceState xd;
+
+    /* LPCMC (LPC Master Controller) access engine */
+    PnvLpcController *lpc;
+    uint64_t     lpc_base_reg;
+    uint64_t     lpc_cmd_reg;
+    uint64_t     lpc_data_reg;
+
+    MemoryRegion xscom_regs;
+};
+
+#endif /* PPC_PNV_ADU_H */

diff --git a/include/hw/ppc/pnv_chip.h b/include/hw/ppc/pnv_chip.h
index a4ed17a..24ce37a 100644
--- a/include/hw/ppc/pnv_chip.h
+++ b/include/hw/ppc/pnv_chip.h

@@ -2,10 +2,12 @@
 #define PPC_PNV_CHIP_H
 
 #include "hw/pci-host/pnv_phb4.h"
+#include "hw/ppc/pnv_adu.h"
 #include "hw/ppc/pnv_chiptod.h"
 #include "hw/ppc/pnv_core.h"
 #include "hw/ppc/pnv_homer.h"
 #include "hw/ppc/pnv_n1_chiplet.h"
+#include "hw/ssi/pnv_spi.h"
 #include "hw/ppc/pnv_lpc.h"
 #include "hw/ppc/pnv_occ.h"
 #include "hw/ppc/pnv_psi.h"
@@ -26,6 +28,8 @@
     uint64_t     ram_start;
     uint64_t     ram_size;
 
+    bool         big_core;
+    bool         lpar_per_core;
     uint32_t     nr_cores;
     uint32_t     nr_threads;
     uint64_t     cores_mask;
@@ -77,6 +81,7 @@
     PnvChip      parent_obj;
 
     /*< public >*/
+    PnvADU       adu;
     PnvXive      xive;
     Pnv9Psi      psi;
     PnvLpcController lpc;
@@ -110,6 +115,7 @@
     PnvChip      parent_obj;
 
     /*< public >*/
+    PnvADU       adu;
     PnvXive2     xive;
     Pnv9Psi      psi;
     PnvLpcController lpc;
@@ -118,6 +124,8 @@
     PnvSBE       sbe;
     PnvHomer     homer;
     PnvN1Chiplet     n1_chiplet;
+#define PNV10_CHIP_MAX_PIB_SPIC 6
+    PnvSpi pib_spic[PNV10_CHIP_MAX_PIB_SPIC];
 
     uint32_t     nr_quads;
     PnvQuad      *quads;
@@ -131,6 +139,7 @@
 
 #define PNV10_PIR2FUSEDCORE(pir) (((pir) >> 3) & 0xf)
 #define PNV10_PIR2CHIP(pir)      (((pir) >> 8) & 0x7f)
+#define PNV10_PIR2THREAD(pir)    (((pir) & 0x7f))
 
 struct PnvChipClass {
     /*< private >*/
@@ -147,7 +156,9 @@
 
     DeviceRealize parent_realize;
 
-    uint32_t (*chip_pir)(PnvChip *chip, uint32_t core_id, uint32_t thread_id);
+    /* Get PIR and TIR values for a CPU thread identified by core/thread id */
+    void (*get_pir_tir)(PnvChip *chip, uint32_t core_id, uint32_t thread_id,
+                         uint32_t *pir, uint32_t *tir);
     void (*intc_create)(PnvChip *chip, PowerPCCPU *cpu, Error **errp);
     void (*intc_reset)(PnvChip *chip, PowerPCCPU *cpu);
     void (*intc_destroy)(PnvChip *chip, PowerPCCPU *cpu);

diff --git a/include/hw/ppc/pnv_core.h b/include/hw/ppc/pnv_core.h
index c6d62fd..d8afb4f 100644
--- a/include/hw/ppc/pnv_core.h
+++ b/include/hw/ppc/pnv_core.h

@@ -25,6 +25,27 @@
 #include "hw/ppc/pnv.h"
 #include "qom/object.h"
 
+/* Per-core ChipTOD / TimeBase state */
+typedef struct PnvCoreTODState {
+    /*
+     * POWER10 DD2.0 - big core TFMR drives the state machine on the even
+     * small core. Skiboot has a workaround that targets the even small core
+     * for CHIPTOD_TO_TB ops.
+     */
+    bool big_core_quirk;
+
+    int tb_ready_for_tod; /* core TB ready to receive TOD from chiptod */
+    int tod_sent_to_tb;   /* chiptod sent TOD to the core TB */
+
+    /*
+     * "Timers" for async TBST events are simulated by mfTFAC because TFAC
+     * is polled for such events. These are just used to ensure firmware
+     * performs the polling at least a few times.
+     */
+    int tb_state_timer;
+    int tb_sync_pulse_timer;
+} PnvCoreTODState;
+
 #define TYPE_PNV_CORE "powernv-cpu-core"
 OBJECT_DECLARE_TYPE(PnvCore, PnvCoreClass,
                     PNV_CORE)
@@ -35,9 +56,15 @@
 
     /*< public >*/
     PowerPCCPU **threads;
+    bool big_core;
+    bool lpar_per_core;
     uint32_t pir;
     uint32_t hwid;
     uint64_t hrmor;
+
+    target_ulong scratch[8]; /* SPRC/SPRD indirect SCRATCH registers */
+    PnvCoreTODState tod_state;
+
     PnvChip *chip;
 
     MemoryRegion xscom_regs;
@@ -54,6 +81,7 @@
 #define PNV_CORE_TYPE_NAME(cpu_model) cpu_model PNV_CORE_TYPE_SUFFIX
 
 typedef struct PnvCPUState {
+    PnvCore *pnv_core;
     Object *intc;
 } PnvCPUState;
 
@@ -82,6 +110,9 @@
 struct PnvQuad {
     DeviceState parent_obj;
 
+    bool special_wakeup_done;
+    bool special_wakeup[4];
+
     uint32_t quad_id;
     MemoryRegion xscom_regs;
     MemoryRegion xscom_qme_regs;

diff --git a/include/hw/ppc/pnv_lpc.h b/include/hw/ppc/pnv_lpc.h
index 5d22c45..174add4 100644
--- a/include/hw/ppc/pnv_lpc.h
+++ b/include/hw/ppc/pnv_lpc.h

@@ -23,6 +23,7 @@
 #include "exec/memory.h"
 #include "hw/ppc/pnv.h"
 #include "hw/qdev-core.h"
+#include "hw/isa/isa.h" /* For ISA_NUM_IRQS */
 
 #define TYPE_PNV_LPC "pnv-lpc"
 typedef struct PnvLpcClass PnvLpcClass;
@@ -73,6 +74,9 @@
     uint32_t opb_irq_pol;
     uint32_t opb_irq_input;
 
+    /* LPC device IRQ state */
+    uint32_t lpc_hc_irq_inputs;
+
     /* LPC HC registers */
     uint32_t lpc_hc_fw_seg_idsel;
     uint32_t lpc_hc_fw_rd_acc_size;
@@ -84,8 +88,19 @@
     /* XSCOM registers */
     MemoryRegion xscom_regs;
 
+    /*
+     * In P8, ISA irqs are combined with internal sources to drive the
+     * LPCHC interrupt output. P9 ISA irqs raise one of 4 lines that
+     * drive PSI SERIRQ irqs, routing according to OPB routing registers.
+     */
+    bool psi_has_serirq;
+
     /* PSI to generate interrupts */
-    qemu_irq psi_irq;
+    qemu_irq psi_irq_lpchc;
+
+    /* P9 serirq lines and irq routing table */
+    qemu_irq psi_irq_serirq[4];
+    int irq_to_serirq_route[ISA_NUM_IRQS];
 };
 
 struct PnvLpcClass {
@@ -94,6 +109,11 @@
     DeviceRealize parent_realize;
 };
 
+bool pnv_lpc_opb_read(PnvLpcController *lpc, uint32_t addr,
+                      uint8_t *data, int sz);
+bool pnv_lpc_opb_write(PnvLpcController *lpc, uint32_t addr,
+                       uint8_t *data, int sz);
+
 ISABus *pnv_lpc_isa_create(PnvLpcController *lpc, bool use_cpld, Error **errp);
 int pnv_dt_lpc(PnvChip *chip, void *fdt, int root_offset,
                uint64_t lpcm_addr, uint64_t lpcm_size);

diff --git a/include/hw/ppc/pnv_xscom.h b/include/hw/ppc/pnv_xscom.h
index 6209e18..648388a 100644
--- a/include/hw/ppc/pnv_xscom.h
+++ b/include/hw/ppc/pnv_xscom.h

@@ -21,9 +21,9 @@
 #define PPC_PNV_XSCOM_H
 
 #include "exec/memory.h"
-#include "hw/ppc/pnv.h"
 
 typedef struct PnvXScomInterface PnvXScomInterface;
+typedef struct PnvChip PnvChip;
 
 #define TYPE_PNV_XSCOM_INTERFACE "pnv-xscom-interface"
 #define PNV_XSCOM_INTERFACE(obj) \
@@ -82,6 +82,9 @@
 #define PNV_XSCOM_PBCQ_SPCI_BASE  0x9013c00
 #define PNV_XSCOM_PBCQ_SPCI_SIZE  0x5
 
+#define PNV9_XSCOM_ADU_BASE       0x0090000
+#define PNV9_XSCOM_ADU_SIZE       0x55
+
 /*
  * Layout of the XSCOM PCB addresses (POWER 9)
  */
@@ -128,6 +131,9 @@
 #define PNV9_XSCOM_PEC_PCI_STK1   0x140
 #define PNV9_XSCOM_PEC_PCI_STK2   0x180
 
+#define PNV10_XSCOM_ADU_BASE      0x0090000
+#define PNV10_XSCOM_ADU_SIZE      0x55
+
 /*
  * Layout of the XSCOM PCB addresses (POWER 10)
  */
@@ -194,6 +200,9 @@
 #define PNV10_XSCOM_PEC_PCI_BASE   0x8010800 /* index goes upwards ... */
 #define PNV10_XSCOM_PEC_PCI_SIZE   0x200
 
+#define PNV10_XSCOM_PIB_SPIC_BASE 0xc0000
+#define PNV10_XSCOM_PIB_SPIC_SIZE 0x20
+
 void pnv_xscom_init(PnvChip *chip, uint64_t size, hwaddr addr);
 int pnv_dt_xscom(PnvChip *chip, void *fdt, int root_offset,
                  uint64_t xscom_base, uint64_t xscom_size,

diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 4aaf23d..f6de3e9 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h

@@ -1004,6 +1004,7 @@
 extern const VMStateDescription vmstate_spapr_cap_ccf_assist;
 extern const VMStateDescription vmstate_spapr_cap_fwnmi;
 extern const VMStateDescription vmstate_spapr_cap_rpt_invalidate;
+extern const VMStateDescription vmstate_spapr_cap_ail_mode_3;
 extern const VMStateDescription vmstate_spapr_wdt;
 
 static inline uint8_t spapr_get_cap(SpaprMachineState *spapr, int cap)

diff --git a/include/hw/ppc/xive2_regs.h b/include/hw/ppc/xive2_regs.h
index 4e5e17c..4349d00 100644
--- a/include/hw/ppc/xive2_regs.h
+++ b/include/hw/ppc/xive2_regs.h

@@ -97,6 +97,7 @@
         uint32_t       w6;
 #define END2_W6_FORMAT_BIT         PPC_BIT32(0)
 #define END2_W6_IGNORE             PPC_BIT32(1)
+#define END2_W6_CROWD              PPC_BIT32(2)
 #define END2_W6_VP_BLOCK           PPC_BITMASK32(4, 7)
 #define END2_W6_VP_OFFSET          PPC_BITMASK32(8, 31)
 #define END2_W6_VP_OFFSET_GEN1     PPC_BITMASK32(13, 31)
@@ -111,6 +112,8 @@
 #define xive2_end_is_notify(end)                \
     (be32_to_cpu((end)->w0) & END2_W0_UCOND_NOTIFY)
 #define xive2_end_is_backlog(end)  (be32_to_cpu((end)->w0) & END2_W0_BACKLOG)
+#define xive2_end_is_precluded_escalation(end)          \
+    (be32_to_cpu((end)->w0) & END2_W0_PRECL_ESC_CTL)
 #define xive2_end_is_escalate(end)                      \
     (be32_to_cpu((end)->w0) & END2_W0_ESCALATE_CTL)
 #define xive2_end_is_uncond_escalation(end)              \
@@ -123,6 +126,10 @@
     (be32_to_cpu((end)->w0) & END2_W0_FIRMWARE1)
 #define xive2_end_is_firmware2(end)              \
     (be32_to_cpu((end)->w0) & END2_W0_FIRMWARE2)
+#define xive2_end_is_ignore(end)                \
+    (be32_to_cpu((end)->w6) & END2_W6_IGNORE)
+#define xive2_end_is_crowd(end)                 \
+    (be32_to_cpu((end)->w6) & END2_W6_CROWD)
 
 static inline uint64_t xive2_end_qaddr(Xive2End *end)
 {
@@ -194,6 +201,8 @@
     return (cam_line >> XIVE2_NVP_SHIFT) & 0xf;
 }
 
+void xive2_nvp_pic_print_info(Xive2Nvp *nvp, uint32_t nvp_idx, GString *buf);
+
 /*
  * Notification Virtual Group or Crowd (NVG/NVC)
  */

diff --git a/include/hw/ssi/pnv_spi.h b/include/hw/ssi/pnv_spi.h
new file mode 100644
index 0000000..8815f67
--- /dev/null
+++ b/include/hw/ssi/pnv_spi.h

@@ -0,0 +1,67 @@
+/*
+ * QEMU PowerPC SPI model
+ *
+ * Copyright (c) 2024, IBM Corporation.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * This model Supports a connection to a single SPI responder.
+ * Introduced for P10 to provide access to SPI seeproms, TPM, flash device
+ * and an ADC controller.
+ *
+ * All SPI function control is mapped into the SPI register space to enable
+ * full control by firmware.
+ *
+ * SPI Controller has sequencer and shift engine. The SPI shift engine
+ * performs serialization and de-serialization according to the control by
+ * the sequencer and according to the setup defined in the configuration
+ * registers and the SPI sequencer implements the main control logic.
+ */
+
+#ifndef PPC_PNV_SPI_H
+#define PPC_PNV_SPI_H
+
+#include "hw/ssi/ssi.h"
+#include "hw/sysbus.h"
+
+#define TYPE_PNV_SPI "pnv-spi"
+OBJECT_DECLARE_SIMPLE_TYPE(PnvSpi, PNV_SPI)
+
+#define PNV_SPI_REG_SIZE 8
+#define PNV_SPI_REGS 7
+
+#define TYPE_PNV_SPI_BUS "pnv-spi-bus"
+typedef struct PnvSpi {
+    SysBusDevice parent_obj;
+
+    SSIBus *ssi_bus;
+    qemu_irq *cs_line;
+    MemoryRegion    xscom_spic_regs;
+    /* SPI object number */
+    uint32_t        spic_num;
+    uint8_t         transfer_len;
+    uint8_t         responder_select;
+    /* To verify if shift_n1 happens prior to shift_n2 */
+    bool            shift_n1_done;
+    /* Loop counter for branch operation opcode Ex/Fx */
+    uint8_t         loop_counter_1;
+    uint8_t         loop_counter_2;
+    /* N1/N2_bits specifies the size of the N1/N2 segment of a frame in bits.*/
+    uint8_t         N1_bits;
+    uint8_t         N2_bits;
+    /* Number of bytes in a payload for the N1/N2 frame segment.*/
+    uint8_t         N1_bytes;
+    uint8_t         N2_bytes;
+    /* Number of N1/N2 bytes marked for transmit */
+    uint8_t         N1_tx;
+    uint8_t         N2_tx;
+    /* Number of N1/N2 bytes marked for receive */
+    uint8_t         N1_rx;
+    uint8_t         N2_rx;
+
+    /* SPI registers */
+    uint64_t        regs[PNV_SPI_REGS];
+    uint8_t         seq_op[PNV_SPI_REG_SIZE];
+    uint64_t        status;
+} PnvSpi;
+#endif /* PPC_PNV_SPI_H */

diff --git a/include/hw/ssi/pnv_spi_regs.h b/include/hw/ssi/pnv_spi_regs.h
new file mode 100644
index 0000000..596e2c1
--- /dev/null
+++ b/include/hw/ssi/pnv_spi_regs.h

@@ -0,0 +1,133 @@
+/*
+ * QEMU PowerPC SPI model
+ *
+ * Copyright (c) 2024, IBM Corporation.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef PNV_SPI_CONTROLLER_REGS_H
+#define PNV_SPI_CONTROLLER_REGS_H
+
+/*
+ * Macros from target/ppc/cpu.h
+ * These macros are copied from ppc target specific file target/ppc/cpu.h
+ * as target/ppc/cpu.h cannot be included here.
+ */
+#define PPC_BIT(bit)            (0x8000000000000000ULL >> (bit))
+#define PPC_BIT8(bit)           (0x80 >> (bit))
+#define PPC_BITMASK(bs, be)     ((PPC_BIT(bs) - PPC_BIT(be)) | PPC_BIT(bs))
+#define PPC_BITMASK8(bs, be)    ((PPC_BIT8(bs) - PPC_BIT8(be)) | PPC_BIT8(bs))
+#define MASK_TO_LSH(m)          (__builtin_ffsll(m) - 1)
+#define GETFIELD(m, v)          (((v) & (m)) >> MASK_TO_LSH(m))
+#define SETFIELD(m, v, val) \
+        (((v) & ~(m)) | ((((typeof(v))(val)) << MASK_TO_LSH(m)) & (m)))
+
+/* Error Register */
+#define ERROR_REG               0x00
+
+/* counter_config_reg */
+#define SPI_CTR_CFG_REG         0x01
+#define SPI_CTR_CFG_N1          PPC_BITMASK(0, 7)
+#define SPI_CTR_CFG_N2          PPC_BITMASK(8, 15)
+#define SPI_CTR_CFG_CMP1        PPC_BITMASK(24, 31)
+#define SPI_CTR_CFG_CMP2        PPC_BITMASK(32, 39)
+#define SPI_CTR_CFG_N1_CTRL_B1  PPC_BIT(49)
+#define SPI_CTR_CFG_N1_CTRL_B2  PPC_BIT(50)
+#define SPI_CTR_CFG_N1_CTRL_B3  PPC_BIT(51)
+#define SPI_CTR_CFG_N2_CTRL_B0  PPC_BIT(52)
+#define SPI_CTR_CFG_N2_CTRL_B1  PPC_BIT(53)
+#define SPI_CTR_CFG_N2_CTRL_B2  PPC_BIT(54)
+#define SPI_CTR_CFG_N2_CTRL_B3  PPC_BIT(55)
+
+/* config_reg */
+#define CONFIG_REG1             0x02
+
+/* clock_config_reset_control_ecc_enable_reg */
+#define SPI_CLK_CFG_REG         0x03
+#define SPI_CLK_CFG_HARD_RST    0x0084000000000000;
+#define SPI_CLK_CFG_RST_CTRL    PPC_BITMASK(24, 27)
+#define SPI_CLK_CFG_ECC_EN      PPC_BIT(28)
+#define SPI_CLK_CFG_ECC_CTRL    PPC_BITMASK(29, 30)
+
+/* memory_mapping_reg */
+#define SPI_MM_REG              0x04
+#define SPI_MM_RDR_MATCH_VAL    PPC_BITMASK(32, 47)
+#define SPI_MM_RDR_MATCH_MASK   PPC_BITMASK(48, 63)
+
+/* transmit_data_reg */
+#define SPI_XMIT_DATA_REG       0x05
+
+/* receive_data_reg */
+#define SPI_RCV_DATA_REG        0x06
+
+/* sequencer_operation_reg */
+#define SPI_SEQ_OP_REG          0x07
+
+/* status_reg */
+#define SPI_STS_REG             0x08
+#define SPI_STS_RDR_FULL        PPC_BIT(0)
+#define SPI_STS_RDR_OVERRUN     PPC_BIT(1)
+#define SPI_STS_RDR_UNDERRUN    PPC_BIT(2)
+#define SPI_STS_TDR_FULL        PPC_BIT(4)
+#define SPI_STS_TDR_OVERRUN     PPC_BIT(5)
+#define SPI_STS_TDR_UNDERRUN    PPC_BIT(6)
+#define SPI_STS_SEQ_FSM         PPC_BITMASK(8, 15)
+#define SPI_STS_SHIFTER_FSM     PPC_BITMASK(16, 27)
+#define SPI_STS_SEQ_INDEX       PPC_BITMASK(28, 31)
+#define SPI_STS_GEN_STATUS_B3   PPC_BIT(35)
+#define SPI_STS_RDR             PPC_BITMASK(1, 3)
+#define SPI_STS_TDR             PPC_BITMASK(5, 7)
+
+/*
+ * Shifter states
+ *
+ * These are the same values defined for the Shifter FSM field of the
+ * status register.  It's a 12 bit field so we will represent it as three
+ * nibbles in the constants.
+ *
+ * These are shifter_fsm values
+ *
+ * Status reg bits 16-27 -> field bits 0-11
+ * bits 0,1,2,5 unused/reserved
+ * bit 4 crc shift in (unused)
+ * bit 8 crc shift out (unused)
+ */
+
+#define FSM_DONE                        0x100   /* bit 3 */
+#define FSM_SHIFT_N2                    0x020   /* bit 6 */
+#define FSM_WAIT                        0x010   /* bit 7 */
+#define FSM_SHIFT_N1                    0x004   /* bit 9 */
+#define FSM_START                       0x002   /* bit 10 */
+#define FSM_IDLE                        0x001   /* bit 11 */
+
+/*
+ * Sequencer states
+ *
+ * These are sequencer_fsm values
+ *
+ * Status reg bits 8-15 -> field bits 0-7
+ * bits 0-3 unused/reserved
+ *
+ */
+#define SEQ_STATE_INDEX_INCREMENT       0x08    /* bit 4 */
+#define SEQ_STATE_EXECUTE               0x04    /* bit 5 */
+#define SEQ_STATE_DECODE                0x02    /* bit 6 */
+#define SEQ_STATE_IDLE                  0x01    /* bit 7 */
+
+/*
+ * These are the supported sequencer operations.
+ * Only the upper nibble is significant because for many operations
+ * the lower nibble is a variable specific to the operation.
+ */
+#define SEQ_OP_STOP                     0x00
+#define SEQ_OP_SELECT_SLAVE             0x10
+#define SEQ_OP_SHIFT_N1                 0x30
+#define SEQ_OP_SHIFT_N2                 0x40
+#define SEQ_OP_BRANCH_IFNEQ_RDR         0x60
+#define SEQ_OP_TRANSFER_TDR             0xC0
+#define SEQ_OP_BRANCH_IFNEQ_INC_1       0xE0
+#define SEQ_OP_BRANCH_IFNEQ_INC_2       0xF0
+#define NUM_SEQ_OPS                     8
+
+#endif

diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index c4a914b..9cf14ca 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h

@@ -338,6 +338,14 @@
  */
 int kvm_unpark_vcpu(KVMState *s, unsigned long vcpu_id);
 
+/**
+ * kvm_create_and_park_vcpu - Create and park a KVM vCPU
+ * @cpu: QOM CPUState object for which KVM vCPU has to be created and parked.
+ *
+ * @returns: 0 when success, errno (<0) when failed.
+ */
+int kvm_create_and_park_vcpu(CPUState *cpu);
+
 /* Arch specific hooks */
 
 extern const KVMCapabilityInfo kvm_arch_required_capabilities[];

diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h
index 1691297..eaeda00 100644
--- a/linux-headers/asm-powerpc/kvm.h
+++ b/linux-headers/asm-powerpc/kvm.h

@@ -645,6 +645,9 @@
 #define KVM_REG_PPC_SIER3	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc3)
 #define KVM_REG_PPC_DAWR1	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc4)
 #define KVM_REG_PPC_DAWRX1	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc5)
+#define KVM_REG_PPC_DEXCR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc6)
+#define KVM_REG_PPC_HASHKEYR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc7)
+#define KVM_REG_PPC_HASHPKEYR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc8)
 
 /* Transactional Memory checkpointed state:
  * This is all GPRs, all VSX regs and a subset of SPRs

diff --git a/target/ppc/arch_dump.c b/target/ppc/arch_dump.c
index a831565..f454741 100644
--- a/target/ppc/arch_dump.c
+++ b/target/ppc/arch_dump.c

@@ -47,9 +47,14 @@
 } QEMU_PACKED;
 
 struct PPCElfPrstatus {
-    char pad1[112];
+    char pad1[32]; /* 32 == offsetof(struct elf_prstatus, pr_pid) */
+    uint32_t pid;
+    char pad2[76]; /* 76 == offsetof(struct elf_prstatus, pr_reg) -
+                            offsetof(struct elf_prstatus, pr_ppid) */
     struct PPCUserRegStruct pr_reg;
-    char pad2[40];
+    char pad3[40]; /* 40 == sizeof(struct elf_prstatus) -
+                            offsetof(struct elf_prstatus, pr_reg) -
+                            sizeof(struct user_pt_regs) */
 } QEMU_PACKED;
 
 
@@ -96,7 +101,7 @@
     DumpState *state;
 } NoteFuncArg;
 
-static void ppc_write_elf_prstatus(NoteFuncArg *arg, PowerPCCPU *cpu)
+static void ppc_write_elf_prstatus(NoteFuncArg *arg, PowerPCCPU *cpu, int id)
 {
     int i;
     reg_t cr;
@@ -109,6 +114,7 @@
 
     prstatus = &note->contents.prstatus;
     memset(prstatus, 0, sizeof(*prstatus));
+    prstatus->pid = cpu_to_dump32(s, id);
     reg = &prstatus->pr_reg;
 
     for (i = 0; i < 32; i++) {
@@ -127,7 +133,7 @@
     reg->ccr = cpu_to_dump_reg(s, cr);
 }
 
-static void ppc_write_elf_fpregset(NoteFuncArg *arg, PowerPCCPU *cpu)
+static void ppc_write_elf_fpregset(NoteFuncArg *arg, PowerPCCPU *cpu, int id)
 {
     int i;
     struct PPCElfFpregset  *fpregset;
@@ -146,7 +152,7 @@
     fpregset->fpscr = cpu_to_dump_reg(s, cpu->env.fpscr);
 }
 
-static void ppc_write_elf_vmxregset(NoteFuncArg *arg, PowerPCCPU *cpu)
+static void ppc_write_elf_vmxregset(NoteFuncArg *arg, PowerPCCPU *cpu, int id)
 {
     int i;
     struct PPCElfVmxregset *vmxregset;
@@ -178,7 +184,7 @@
     vmxregset->vscr.u32[3] = cpu_to_dump32(s, ppc_get_vscr(&cpu->env));
 }
 
-static void ppc_write_elf_vsxregset(NoteFuncArg *arg, PowerPCCPU *cpu)
+static void ppc_write_elf_vsxregset(NoteFuncArg *arg, PowerPCCPU *cpu, int id)
 {
     int i;
     struct PPCElfVsxregset *vsxregset;
@@ -195,7 +201,7 @@
     }
 }
 
-static void ppc_write_elf_speregset(NoteFuncArg *arg, PowerPCCPU *cpu)
+static void ppc_write_elf_speregset(NoteFuncArg *arg, PowerPCCPU *cpu, int id)
 {
     struct PPCElfSperegset *speregset;
     Note *note = &arg->note;
@@ -211,7 +217,7 @@
 
 static const struct NoteFuncDescStruct {
     int contents_size;
-    void (*note_contents_func)(NoteFuncArg *arg, PowerPCCPU *cpu);
+    void (*note_contents_func)(NoteFuncArg *arg, PowerPCCPU *cpu, int id);
 } note_func[] = {
     {sizeof_field(Note, contents.prstatus),  ppc_write_elf_prstatus},
     {sizeof_field(Note, contents.fpregset),  ppc_write_elf_fpregset},
@@ -282,7 +288,7 @@
         arg.note.hdr.n_descsz = cpu_to_dump32(s, nf->contents_size);
         strncpy(arg.note.name, note_name, sizeof(arg.note.name));
 
-        (*nf->note_contents_func)(&arg, cpu);
+        (*nf->note_contents_func)(&arg, cpu, id);
 
         note_size =
             sizeof(arg.note) - sizeof(arg.note.contents) + nf->contents_size;

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 2015e60..321ed2d 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h

@@ -1197,21 +1197,6 @@
 DEXCR_ASPECT(PHIE, 6)
 
 /*****************************************************************************/
-/* PowerNV ChipTOD and TimeBase State Machine */
-struct pnv_tod_tbst {
-    int tb_ready_for_tod; /* core TB ready to receive TOD from chiptod */
-    int tod_sent_to_tb;   /* chiptod sent TOD to the core TB */
-
-    /*
-     * "Timers" for async TBST events are simulated by mfTFAC because TFAC
-     * is polled for such events. These are just used to ensure firmware
-     * performs the polling at least a few times.
-     */
-    int tb_state_timer;
-    int tb_sync_pulse_timer;
-};
-
-/*****************************************************************************/
 /* The whole PowerPC CPU context */
 
 /*
@@ -1262,15 +1247,16 @@
     /* when a memory exception occurs, the access type is stored here */
     int access_type;
 
+    /* For SMT processors */
+    bool has_smt_siblings;
+    int core_index;
+
 #if !defined(CONFIG_USER_ONLY)
     /* MMU context, only relevant for full system emulation */
 #if defined(TARGET_PPC64)
     ppc_slb_t slb[MAX_SLB_ENTRIES]; /* PowerPC 64 SLB area */
     struct CPUBreakpoint *ciabr_breakpoint;
     struct CPUWatchpoint *dawr0_watchpoint;
-
-    /* POWER CPU regs/state */
-    target_ulong scratch[8]; /* SCRATCH registers (shared across core) */
 #endif
     target_ulong sr[32];   /* segment registers */
     uint32_t nb_BATs;      /* number of BATs */
@@ -1291,12 +1277,6 @@
     uint32_t tlb_need_flush; /* Delayed flush needed */
 #define TLB_NEED_LOCAL_FLUSH   0x1
 #define TLB_NEED_GLOBAL_FLUSH  0x2
-
-#if defined(TARGET_PPC64)
-    /* PowerNV chiptod / timebase facility state. */
-    /* Would be nice to put these into PnvCore */
-    struct pnv_tod_tbst pnv_tod_tbst;
-#endif
 #endif
 
     /* Other registers */
@@ -1426,12 +1406,10 @@
     uint64_t pmu_base_time;
 };
 
-#define _CORE_ID(cs)                                            \
-    (POWERPC_CPU(cs)->env.spr_cb[SPR_PIR].default_value & ~(cs->nr_threads - 1))
-
 #define THREAD_SIBLING_FOREACH(cs, cs_sibling)                  \
     CPU_FOREACH(cs_sibling)                                     \
-        if (_CORE_ID(cs) == _CORE_ID(cs_sibling))
+        if (POWERPC_CPU(cs)->env.core_index ==                  \
+            POWERPC_CPU(cs_sibling)->env.core_index)
 
 #define SET_FIT_PERIOD(a_, b_, c_, d_)          \
 do {                                            \
@@ -1535,6 +1513,17 @@
     int  (*check_attn)(CPUPPCState *env);
 };
 
+static inline bool ppc_cpu_core_single_threaded(CPUState *cs)
+{
+    return !POWERPC_CPU(cs)->env.has_smt_siblings;
+}
+
+static inline bool ppc_cpu_lpar_single_threaded(CPUState *cs)
+{
+    return !(POWERPC_CPU(cs)->env.flags & POWERPC_FLAG_SMT_1LPAR) ||
+           ppc_cpu_core_single_threaded(cs);
+}
+
 ObjectClass *ppc_cpu_class_by_name(const char *name);
 PowerPCCPUClass *ppc_cpu_class_by_pvr(uint32_t pvr);
 PowerPCCPUClass *ppc_cpu_class_by_pvr_mask(uint32_t pvr);

diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index cdada79..23881d0 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c

@@ -5760,16 +5760,6 @@
                  SPR_NOACCESS, SPR_NOACCESS,
                  &spr_read_generic, &spr_core_write_generic,
                  0x00000000);
-    spr_register_hv(env, SPR_POWER_SPRC, "SPRC",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_sprc,
-                 0x00000000);
-    spr_register_hv(env, SPR_POWER_SPRD, "SPRD",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_sprd, &spr_write_sprd,
-                 0x00000000);
 #endif
 }
 
@@ -5803,6 +5793,17 @@
                      SPR_NOACCESS, SPR_NOACCESS,
                      &spr_read_generic, &spr_write_generic,
                      KVM_REG_PPC_WORT, 0);
+    /* SPRC/SPRD exist in earlier CPUs but only tested on POWER9/10 */
+    spr_register_hv(env, SPR_POWER_SPRC, "SPRC",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_sprc,
+                 0x00000000);
+    spr_register_hv(env, SPR_POWER_SPRD, "SPRD",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_sprd, &spr_write_sprd,
+                 0x00000000);
 #endif
 }
 
@@ -5873,22 +5874,22 @@
         ((uint64_t)g_rand_int(rand) << 32) | (uint64_t)g_rand_int(rand);
     g_rand_free(rand);
 #endif
-    spr_register(env, SPR_HASHKEYR, "HASHKEYR",
+    spr_register_kvm(env, SPR_HASHKEYR, "HASHKEYR",
             SPR_NOACCESS, SPR_NOACCESS,
             &spr_read_generic, &spr_write_generic,
-            hashkeyr_initial_value);
-    spr_register_hv(env, SPR_HASHPKEYR, "HASHPKEYR",
+            KVM_REG_PPC_HASHKEYR, hashkeyr_initial_value);
+    spr_register_kvm_hv(env, SPR_HASHPKEYR, "HASHPKEYR",
             SPR_NOACCESS, SPR_NOACCESS,
             SPR_NOACCESS, SPR_NOACCESS,
             &spr_read_generic, &spr_write_generic,
-            hashpkeyr_initial_value);
+            KVM_REG_PPC_HASHPKEYR, hashpkeyr_initial_value);
 }
 
 static void register_power10_dexcr_sprs(CPUPPCState *env)
 {
-    spr_register(env, SPR_DEXCR, "DEXCR",
+    spr_register_kvm(env, SPR_DEXCR, "DEXCR",
             SPR_NOACCESS, SPR_NOACCESS,
-            &spr_read_generic, &spr_write_generic,
+            &spr_read_generic, &spr_write_generic, KVM_REG_PPC_DEXCR,
             0);
 
     spr_register(env, SPR_UDEXCR, "UDEXCR",
@@ -6785,7 +6786,8 @@
 
     /*
      * pseries SMT means "LPAR per core" mode, e.g., msgsndp is usable
-     * between threads.
+     * between threads. powernv be in either mode, and it mostly affects
+     * supervisor visible registers and instructions.
      */
     if (env->flags & POWERPC_FLAG_SMT) {
         env->flags |= POWERPC_FLAG_SMT_1LPAR;
@@ -6975,7 +6977,7 @@
 
     pcc->parent_realize(dev, errp);
 
-    if (env_cpu(env)->nr_threads > 1) {
+    if (!ppc_cpu_core_single_threaded(cs)) {
         env->flags |= POWERPC_FLAG_SMT;
     }
 

diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index 0cd5426..f33fc36 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c

@@ -2998,6 +2998,34 @@
     return (rb & DBELL_BRDCAST_MASK) == DBELL_BRDCAST_SUBPROC;
 }
 
+/*
+ * Send an interrupt to a thread in the same core as env).
+ */
+static void msgsnd_core_tir(CPUPPCState *env, uint32_t target_tir, int irq)
+{
+    PowerPCCPU *cpu = env_archcpu(env);
+    CPUState *cs = env_cpu(env);
+
+    if (ppc_cpu_lpar_single_threaded(cs)) {
+        if (target_tir == 0) {
+            ppc_set_irq(cpu, irq, 1);
+        }
+    } else {
+        CPUState *ccs;
+
+        /* Does iothread need to be locked for walking CPU list? */
+        bql_lock();
+        THREAD_SIBLING_FOREACH(cs, ccs) {
+            PowerPCCPU *ccpu = POWERPC_CPU(ccs);
+            if (target_tir == ppc_cpu_tir(ccpu)) {
+                ppc_set_irq(ccpu, irq, 1);
+                break;
+            }
+        }
+        bql_unlock();
+    }
+}
+
 void helper_book3s_msgclr(CPUPPCState *env, target_ulong rb)
 {
     if (!dbell_type_server(rb)) {
@@ -3018,6 +3046,13 @@
         return;
     }
 
+    /* POWER8 msgsnd is like msgsndp (targets a thread within core) */
+    if (!(env->insns_flags2 & PPC2_ISA300)) {
+        msgsnd_core_tir(env, rb & PPC_BITMASK(57, 63), PPC_INTERRUPT_HDOORBELL);
+        return;
+    }
+
+    /* POWER9 and later msgsnd is a global (targets any thread) */
     cpu = ppc_get_vcpu_by_pir(pir);
     if (!cpu) {
         return;
@@ -3029,7 +3064,7 @@
         brdcast = true;
     }
 
-    if (cs->nr_threads == 1 || !brdcast) {
+    if (ppc_cpu_core_single_threaded(cs) || !brdcast) {
         ppc_set_irq(cpu, PPC_INTERRUPT_HDOORBELL, 1);
         return;
     }
@@ -3064,41 +3099,13 @@
  */
 void helper_book3s_msgsndp(CPUPPCState *env, target_ulong rb)
 {
-    CPUState *cs = env_cpu(env);
-    PowerPCCPU *cpu = env_archcpu(env);
-    CPUState *ccs;
-    uint32_t nr_threads = cs->nr_threads;
-    int ttir = rb & PPC_BITMASK(57, 63);
-
     helper_hfscr_facility_check(env, HFSCR_MSGP, "msgsndp", HFSCR_IC_MSGP);
 
-    if (!(env->flags & POWERPC_FLAG_SMT_1LPAR)) {
-        nr_threads = 1; /* msgsndp behaves as 1-thread in LPAR-per-thread mode*/
-    }
-
-    if (!dbell_type_server(rb) || ttir >= nr_threads) {
+    if (!dbell_type_server(rb)) {
         return;
     }
 
-    if (nr_threads == 1) {
-        ppc_set_irq(cpu, PPC_INTERRUPT_DOORBELL, 1);
-        return;
-    }
-
-    /* Does iothread need to be locked for walking CPU list? */
-    bql_lock();
-    THREAD_SIBLING_FOREACH(cs, ccs) {
-        PowerPCCPU *ccpu = POWERPC_CPU(ccs);
-        uint32_t thread_id = ppc_cpu_tir(ccpu);
-
-        if (ttir == thread_id) {
-            ppc_set_irq(ccpu, PPC_INTERRUPT_DOORBELL, 1);
-            bql_unlock();
-            return;
-        }
-    }
-
-    g_assert_not_reached();
+    msgsnd_core_tir(env, rb & PPC_BITMASK(57, 63), PPC_INTERRUPT_DOORBELL);
 }
 #endif /* TARGET_PPC64 */
 

diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index 51bce99..230466a 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c

@@ -1599,14 +1599,14 @@
     do_float_check_status(env, sfifprf, GETPC());                            \
 }
 
-VSX_ADD_SUB(xsadddp, add, 1, float64, VsrD(0), 1, 0)
-VSX_ADD_SUB(xsaddsp, add, 1, float64, VsrD(0), 1, 1)
-VSX_ADD_SUB(xvadddp, add, 2, float64, VsrD(i), 0, 0)
-VSX_ADD_SUB(xvaddsp, add, 4, float32, VsrW(i), 0, 0)
-VSX_ADD_SUB(xssubdp, sub, 1, float64, VsrD(0), 1, 0)
-VSX_ADD_SUB(xssubsp, sub, 1, float64, VsrD(0), 1, 1)
-VSX_ADD_SUB(xvsubdp, sub, 2, float64, VsrD(i), 0, 0)
-VSX_ADD_SUB(xvsubsp, sub, 4, float32, VsrW(i), 0, 0)
+VSX_ADD_SUB(XSADDDP, add, 1, float64, VsrD(0), 1, 0)
+VSX_ADD_SUB(XSADDSP, add, 1, float64, VsrD(0), 1, 1)
+VSX_ADD_SUB(XVADDDP, add, 2, float64, VsrD(i), 0, 0)
+VSX_ADD_SUB(XVADDSP, add, 4, float32, VsrW(i), 0, 0)
+VSX_ADD_SUB(XSSUBDP, sub, 1, float64, VsrD(0), 1, 0)
+VSX_ADD_SUB(XSSUBSP, sub, 1, float64, VsrD(0), 1, 1)
+VSX_ADD_SUB(XVSUBDP, sub, 2, float64, VsrD(i), 0, 0)
+VSX_ADD_SUB(XVSUBSP, sub, 4, float32, VsrW(i), 0, 0)
 
 void helper_xsaddqp(CPUPPCState *env, uint32_t opcode,
                     ppc_vsr_t *xt, ppc_vsr_t *xa, ppc_vsr_t *xb)
@@ -1676,10 +1676,10 @@
     do_float_check_status(env, sfifprf, GETPC());                            \
 }
 
-VSX_MUL(xsmuldp, 1, float64, VsrD(0), 1, 0)
-VSX_MUL(xsmulsp, 1, float64, VsrD(0), 1, 1)
-VSX_MUL(xvmuldp, 2, float64, VsrD(i), 0, 0)
-VSX_MUL(xvmulsp, 4, float32, VsrW(i), 0, 0)
+VSX_MUL(XSMULDP, 1, float64, VsrD(0), 1, 0)
+VSX_MUL(XSMULSP, 1, float64, VsrD(0), 1, 1)
+VSX_MUL(XVMULDP, 2, float64, VsrD(i), 0, 0)
+VSX_MUL(XVMULSP, 4, float32, VsrW(i), 0, 0)
 
 void helper_xsmulqp(CPUPPCState *env, uint32_t opcode,
                     ppc_vsr_t *xt, ppc_vsr_t *xa, ppc_vsr_t *xb)
@@ -1750,10 +1750,10 @@
     do_float_check_status(env, sfifprf, GETPC());                             \
 }
 
-VSX_DIV(xsdivdp, 1, float64, VsrD(0), 1, 0)
-VSX_DIV(xsdivsp, 1, float64, VsrD(0), 1, 1)
-VSX_DIV(xvdivdp, 2, float64, VsrD(i), 0, 0)
-VSX_DIV(xvdivsp, 4, float32, VsrW(i), 0, 0)
+VSX_DIV(XSDIVDP, 1, float64, VsrD(0), 1, 0)
+VSX_DIV(XSDIVSP, 1, float64, VsrD(0), 1, 1)
+VSX_DIV(XVDIVDP, 2, float64, VsrD(i), 0, 0)
+VSX_DIV(XVDIVSP, 4, float32, VsrW(i), 0, 0)
 
 void helper_xsdivqp(CPUPPCState *env, uint32_t opcode,
                     ppc_vsr_t *xt, ppc_vsr_t *xa, ppc_vsr_t *xb)
@@ -2383,12 +2383,12 @@
     do_float_check_status(env, false, GETPC());                               \
 }
 
-VSX_MAX_MIN(xsmaxdp, maxnum, 1, float64, VsrD(0))
-VSX_MAX_MIN(xvmaxdp, maxnum, 2, float64, VsrD(i))
-VSX_MAX_MIN(xvmaxsp, maxnum, 4, float32, VsrW(i))
-VSX_MAX_MIN(xsmindp, minnum, 1, float64, VsrD(0))
-VSX_MAX_MIN(xvmindp, minnum, 2, float64, VsrD(i))
-VSX_MAX_MIN(xvminsp, minnum, 4, float32, VsrW(i))
+VSX_MAX_MIN(XSMAXDP, maxnum, 1, float64, VsrD(0))
+VSX_MAX_MIN(XVMAXDP, maxnum, 2, float64, VsrD(i))
+VSX_MAX_MIN(XVMAXSP, maxnum, 4, float32, VsrW(i))
+VSX_MAX_MIN(XSMINDP, minnum, 1, float64, VsrD(0))
+VSX_MAX_MIN(XVMINDP, minnum, 2, float64, VsrD(i))
+VSX_MAX_MIN(XVMINSP, minnum, 4, float32, VsrW(i))
 
 #define VSX_MAX_MINC(name, max, tp, fld)                                      \
 void helper_##name(CPUPPCState *env,                                          \
@@ -2527,14 +2527,14 @@
     return crf6;                                                          \
 }
 
-VSX_CMP(xvcmpeqdp, 2, float64, VsrD(i), eq, 0, 1)
-VSX_CMP(xvcmpgedp, 2, float64, VsrD(i), le, 1, 1)
-VSX_CMP(xvcmpgtdp, 2, float64, VsrD(i), lt, 1, 1)
-VSX_CMP(xvcmpnedp, 2, float64, VsrD(i), eq, 0, 0)
-VSX_CMP(xvcmpeqsp, 4, float32, VsrW(i), eq, 0, 1)
-VSX_CMP(xvcmpgesp, 4, float32, VsrW(i), le, 1, 1)
-VSX_CMP(xvcmpgtsp, 4, float32, VsrW(i), lt, 1, 1)
-VSX_CMP(xvcmpnesp, 4, float32, VsrW(i), eq, 0, 0)
+VSX_CMP(XVCMPEQDP, 2, float64, VsrD(i), eq, 0, 1)
+VSX_CMP(XVCMPGEDP, 2, float64, VsrD(i), le, 1, 1)
+VSX_CMP(XVCMPGTDP, 2, float64, VsrD(i), lt, 1, 1)
+VSX_CMP(XVCMPNEDP, 2, float64, VsrD(i), eq, 0, 0)
+VSX_CMP(XVCMPEQSP, 4, float32, VsrW(i), eq, 0, 1)
+VSX_CMP(XVCMPGESP, 4, float32, VsrW(i), le, 1, 1)
+VSX_CMP(XVCMPGTSP, 4, float32, VsrW(i), lt, 1, 1)
+VSX_CMP(XVCMPNESP, 4, float32, VsrW(i), eq, 0, 0)
 
 /*
  * VSX_CVT_FP_TO_FP - VSX floating point/floating point conversion

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 4fa089c..5a77e76 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h

@@ -203,18 +203,18 @@
 DEF_HELPER_FLAGS_3(vsrv, TCG_CALL_NO_RWG, void, avr, avr, avr)
 DEF_HELPER_FLAGS_3(vslv, TCG_CALL_NO_RWG, void, avr, avr, avr)
 DEF_HELPER_FLAGS_3(VPRTYBQ, TCG_CALL_NO_RWG, void, avr, avr, i32)
-DEF_HELPER_FLAGS_5(vaddsbs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
-DEF_HELPER_FLAGS_5(vaddshs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
-DEF_HELPER_FLAGS_5(vaddsws, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
-DEF_HELPER_FLAGS_5(vsubsbs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
-DEF_HELPER_FLAGS_5(vsubshs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
-DEF_HELPER_FLAGS_5(vsubsws, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
-DEF_HELPER_FLAGS_5(vaddubs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
-DEF_HELPER_FLAGS_5(vadduhs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
-DEF_HELPER_FLAGS_5(vadduws, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
-DEF_HELPER_FLAGS_5(vsububs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
-DEF_HELPER_FLAGS_5(vsubuhs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
-DEF_HELPER_FLAGS_5(vsubuws, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
+DEF_HELPER_FLAGS_5(VADDSBS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
+DEF_HELPER_FLAGS_5(VADDSHS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
+DEF_HELPER_FLAGS_5(VADDSWS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
+DEF_HELPER_FLAGS_5(VSUBSBS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
+DEF_HELPER_FLAGS_5(VSUBSHS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
+DEF_HELPER_FLAGS_5(VSUBSWS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
+DEF_HELPER_FLAGS_5(VADDUBS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
+DEF_HELPER_FLAGS_5(VADDUHS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
+DEF_HELPER_FLAGS_5(VADDUWS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
+DEF_HELPER_FLAGS_5(VSUBUBS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
+DEF_HELPER_FLAGS_5(VSUBUHS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
+DEF_HELPER_FLAGS_5(VSUBUWS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
 DEF_HELPER_FLAGS_3(VADDUQM, TCG_CALL_NO_RWG, void, avr, avr, avr)
 DEF_HELPER_FLAGS_4(VADDECUQ, TCG_CALL_NO_RWG, void, avr, avr, avr, avr)
 DEF_HELPER_FLAGS_4(VADDEUQM, TCG_CALL_NO_RWG, void, avr, avr, avr, avr)
@@ -277,10 +277,10 @@
 DEF_HELPER_3(STVEHX, void, env, avr, tl)
 DEF_HELPER_3(STVEWX, void, env, avr, tl)
 #if defined(TARGET_PPC64)
-DEF_HELPER_4(lxvl, void, env, tl, vsr, tl)
-DEF_HELPER_4(lxvll, void, env, tl, vsr, tl)
-DEF_HELPER_4(stxvl, void, env, tl, vsr, tl)
-DEF_HELPER_4(stxvll, void, env, tl, vsr, tl)
+DEF_HELPER_4(LXVL, void, env, tl, vsr, tl)
+DEF_HELPER_4(LXVLL, void, env, tl, vsr, tl)
+DEF_HELPER_4(STXVL, void, env, tl, vsr, tl)
+DEF_HELPER_4(STXVLL, void, env, tl, vsr, tl)
 #endif
 DEF_HELPER_4(vsumsws, void, env, avr, avr, avr)
 DEF_HELPER_4(vsum2sws, void, env, avr, avr, avr)
@@ -364,12 +364,12 @@
 DEF_HELPER_FLAGS_4(bcdtrunc, TCG_CALL_NO_RWG, i32, avr, avr, avr, i32)
 DEF_HELPER_FLAGS_4(bcdutrunc, TCG_CALL_NO_RWG, i32, avr, avr, avr, i32)
 
-DEF_HELPER_4(xsadddp, void, env, vsr, vsr, vsr)
+DEF_HELPER_4(XSADDDP, void, env, vsr, vsr, vsr)
 DEF_HELPER_5(xsaddqp, void, env, i32, vsr, vsr, vsr)
-DEF_HELPER_4(xssubdp, void, env, vsr, vsr, vsr)
-DEF_HELPER_4(xsmuldp, void, env, vsr, vsr, vsr)
+DEF_HELPER_4(XSSUBDP, void, env, vsr, vsr, vsr)
+DEF_HELPER_4(XSMULDP, void, env, vsr, vsr, vsr)
 DEF_HELPER_5(xsmulqp, void, env, i32, vsr, vsr, vsr)
-DEF_HELPER_4(xsdivdp, void, env, vsr, vsr, vsr)
+DEF_HELPER_4(XSDIVDP, void, env, vsr, vsr, vsr)
 DEF_HELPER_5(xsdivqp, void, env, i32, vsr, vsr, vsr)
 DEF_HELPER_3(xsredp, void, env, vsr, vsr)
 DEF_HELPER_3(xssqrtdp, void, env, vsr, vsr)
@@ -392,8 +392,8 @@
 DEF_HELPER_4(xscmpudp, void, env, i32, vsr, vsr)
 DEF_HELPER_4(xscmpoqp, void, env, i32, vsr, vsr)
 DEF_HELPER_4(xscmpuqp, void, env, i32, vsr, vsr)
-DEF_HELPER_4(xsmaxdp, void, env, vsr, vsr, vsr)
-DEF_HELPER_4(xsmindp, void, env, vsr, vsr, vsr)
+DEF_HELPER_4(XSMAXDP, void, env, vsr, vsr, vsr)
+DEF_HELPER_4(XSMINDP, void, env, vsr, vsr, vsr)
 DEF_HELPER_4(XSMAXCDP, void, env, vsr, vsr, vsr)
 DEF_HELPER_4(XSMINCDP, void, env, vsr, vsr, vsr)
 DEF_HELPER_4(XSMAXJDP, void, env, vsr, vsr, vsr)
@@ -439,10 +439,10 @@
 DEF_HELPER_4(xssqrtqp, void, env, i32, vsr, vsr)
 DEF_HELPER_5(xssubqp, void, env, i32, vsr, vsr, vsr)
 
-DEF_HELPER_4(xsaddsp, void, env, vsr, vsr, vsr)
-DEF_HELPER_4(xssubsp, void, env, vsr, vsr, vsr)
-DEF_HELPER_4(xsmulsp, void, env, vsr, vsr, vsr)
-DEF_HELPER_4(xsdivsp, void, env, vsr, vsr, vsr)
+DEF_HELPER_4(XSADDSP, void, env, vsr, vsr, vsr)
+DEF_HELPER_4(XSSUBSP, void, env, vsr, vsr, vsr)
+DEF_HELPER_4(XSMULSP, void, env, vsr, vsr, vsr)
+DEF_HELPER_4(XSDIVSP, void, env, vsr, vsr, vsr)
 DEF_HELPER_3(xsresp, void, env, vsr, vsr)
 DEF_HELPER_2(xsrsp, i64, env, i64)
 DEF_HELPER_3(xssqrtsp, void, env, vsr, vsr)
@@ -461,10 +461,10 @@
 DEF_HELPER_5(XSNMSUBQP, void, env, vsr, vsr, vsr, vsr)
 DEF_HELPER_5(XSNMSUBQPO, void, env, vsr, vsr, vsr, vsr)
 
-DEF_HELPER_4(xvadddp, void, env, vsr, vsr, vsr)
-DEF_HELPER_4(xvsubdp, void, env, vsr, vsr, vsr)
-DEF_HELPER_4(xvmuldp, void, env, vsr, vsr, vsr)
-DEF_HELPER_4(xvdivdp, void, env, vsr, vsr, vsr)
+DEF_HELPER_4(XVADDDP, void, env, vsr, vsr, vsr)
+DEF_HELPER_4(XVSUBDP, void, env, vsr, vsr, vsr)
+DEF_HELPER_4(XVMULDP, void, env, vsr, vsr, vsr)
+DEF_HELPER_4(XVDIVDP, void, env, vsr, vsr, vsr)
 DEF_HELPER_3(xvredp, void, env, vsr, vsr)
 DEF_HELPER_3(xvsqrtdp, void, env, vsr, vsr)
 DEF_HELPER_3(xvrsqrtedp, void, env, vsr, vsr)
@@ -474,12 +474,12 @@
 DEF_HELPER_5(xvmsubdp, void, env, vsr, vsr, vsr, vsr)
 DEF_HELPER_5(xvnmadddp, void, env, vsr, vsr, vsr, vsr)
 DEF_HELPER_5(xvnmsubdp, void, env, vsr, vsr, vsr, vsr)
-DEF_HELPER_4(xvmaxdp, void, env, vsr, vsr, vsr)
-DEF_HELPER_4(xvmindp, void, env, vsr, vsr, vsr)
-DEF_HELPER_FLAGS_4(xvcmpeqdp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
-DEF_HELPER_FLAGS_4(xvcmpgedp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
-DEF_HELPER_FLAGS_4(xvcmpgtdp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
-DEF_HELPER_FLAGS_4(xvcmpnedp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
+DEF_HELPER_4(XVMAXDP, void, env, vsr, vsr, vsr)
+DEF_HELPER_4(XVMINDP, void, env, vsr, vsr, vsr)
+DEF_HELPER_FLAGS_4(XVCMPEQDP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
+DEF_HELPER_FLAGS_4(XVCMPGEDP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
+DEF_HELPER_FLAGS_4(XVCMPGTDP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
+DEF_HELPER_FLAGS_4(XVCMPNEDP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
 DEF_HELPER_3(xvcvdpsp, void, env, vsr, vsr)
 DEF_HELPER_3(xvcvdpsxds, void, env, vsr, vsr)
 DEF_HELPER_3(xvcvdpsxws, void, env, vsr, vsr)
@@ -495,10 +495,10 @@
 DEF_HELPER_3(xvrdpip, void, env, vsr, vsr)
 DEF_HELPER_3(xvrdpiz, void, env, vsr, vsr)
 
-DEF_HELPER_4(xvaddsp, void, env, vsr, vsr, vsr)
-DEF_HELPER_4(xvsubsp, void, env, vsr, vsr, vsr)
-DEF_HELPER_4(xvmulsp, void, env, vsr, vsr, vsr)
-DEF_HELPER_4(xvdivsp, void, env, vsr, vsr, vsr)
+DEF_HELPER_4(XVADDSP, void, env, vsr, vsr, vsr)
+DEF_HELPER_4(XVSUBSP, void, env, vsr, vsr, vsr)
+DEF_HELPER_4(XVMULSP, void, env, vsr, vsr, vsr)
+DEF_HELPER_4(XVDIVSP, void, env, vsr, vsr, vsr)
 DEF_HELPER_3(xvresp, void, env, vsr, vsr)
 DEF_HELPER_3(xvsqrtsp, void, env, vsr, vsr)
 DEF_HELPER_3(xvrsqrtesp, void, env, vsr, vsr)
@@ -508,12 +508,12 @@
 DEF_HELPER_5(xvmsubsp, void, env, vsr, vsr, vsr, vsr)
 DEF_HELPER_5(xvnmaddsp, void, env, vsr, vsr, vsr, vsr)
 DEF_HELPER_5(xvnmsubsp, void, env, vsr, vsr, vsr, vsr)
-DEF_HELPER_4(xvmaxsp, void, env, vsr, vsr, vsr)
-DEF_HELPER_4(xvminsp, void, env, vsr, vsr, vsr)
-DEF_HELPER_FLAGS_4(xvcmpeqsp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
-DEF_HELPER_FLAGS_4(xvcmpgesp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
-DEF_HELPER_FLAGS_4(xvcmpgtsp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
-DEF_HELPER_FLAGS_4(xvcmpnesp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
+DEF_HELPER_4(XVMAXSP, void, env, vsr, vsr, vsr)
+DEF_HELPER_4(XVMINSP, void, env, vsr, vsr, vsr)
+DEF_HELPER_FLAGS_4(XVCMPEQSP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
+DEF_HELPER_FLAGS_4(XVCMPGESP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
+DEF_HELPER_FLAGS_4(XVCMPGTSP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
+DEF_HELPER_FLAGS_4(XVCMPNESP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
 DEF_HELPER_3(xvcvspdp, void, env, vsr, vsr)
 DEF_HELPER_3(xvcvsphp, void, env, vsr, vsr)
 DEF_HELPER_3(xvcvhpsp, void, env, vsr, vsr)

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index ee33141..e53fd28 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode

@@ -241,6 +241,9 @@
 &XX3            xt xa xb
 @XX3            ...... ..... ..... ..... ........ ...           &XX3 xt=%xx_xt xa=%xx_xa xb=%xx_xb
 
+&XX3_rc         xt xa xb rc:bool
+@XX3_rc         ...... ..... ..... ..... rc:1 ....... ...       &XX3_rc xt=%xx_xt xa=%xx_xa xb=%xx_xb
+
 # 32 bit GER instructions have all mask bits considered 1
 &MMIRR_XX3      xa xb xt pmsk xmsk ymsk
 %xx_at          23:3
@@ -832,6 +835,14 @@
 VADDCUQ         000100 ..... ..... ..... 00101000000    @VX
 VADDUQM         000100 ..... ..... ..... 00100000000    @VX
 
+VADDSBS         000100 ..... ..... ..... 01100000000    @VX
+VADDSHS         000100 ..... ..... ..... 01101000000    @VX
+VADDSWS         000100 ..... ..... ..... 01110000000    @VX
+
+VADDUBS         000100 ..... ..... ..... 01000000000    @VX
+VADDUHS         000100 ..... ..... ..... 01001000000    @VX
+VADDUWS         000100 ..... ..... ..... 01010000000    @VX
+
 VADDEUQM        000100 ..... ..... ..... ..... 111100   @VA
 VADDECUQ        000100 ..... ..... ..... ..... 111101   @VA
 
@@ -839,6 +850,14 @@
 VSUBCUQ         000100 ..... ..... ..... 10101000000    @VX
 VSUBUQM         000100 ..... ..... ..... 10100000000    @VX
 
+VSUBSBS         000100 ..... ..... ..... 11100000000    @VX
+VSUBSHS         000100 ..... ..... ..... 11101000000    @VX
+VSUBSWS         000100 ..... ..... ..... 11110000000    @VX
+
+VSUBUBS         000100 ..... ..... ..... 11000000000    @VX
+VSUBUHS         000100 ..... ..... ..... 11001000000    @VX
+VSUBUWS         000100 ..... ..... ..... 11010000000    @VX
+
 VSUBECUQ        000100 ..... ..... ..... ..... 111111   @VA
 VSUBEUQM        000100 ..... ..... ..... ..... 111110   @VA
 
@@ -977,6 +996,35 @@
 STXVRWX         011111 ..... ..... ..... 0011001101 .   @X_TSX
 STXVRDX         011111 ..... ..... ..... 0011101101 .   @X_TSX
 
+LXSDX           011111 ..... ..... ..... 1001001100 .   @X_TSX
+LXSIWAX         011111 ..... ..... ..... 0001001100 .   @X_TSX
+LXSIBZX         011111 ..... ..... ..... 1100001101 .   @X_TSX
+LXSIHZX         011111 ..... ..... ..... 1100101101 .   @X_TSX
+LXSIWZX         011111 ..... ..... ..... 0000001100 .   @X_TSX
+LXSSPX          011111 ..... ..... ..... 1000001100 .   @X_TSX
+
+STXSDX          011111 ..... ..... ..... 1011001100 .   @X_TSX
+STXSIBX         011111 ..... ..... ..... 1110001101 .   @X_TSX
+STXSIHX         011111 ..... ..... ..... 1110101101 .   @X_TSX
+STXSIWX         011111 ..... ..... ..... 0010001100 .   @X_TSX
+STXSSPX         011111 ..... ..... ..... 1010001100 .   @X_TSX
+
+LXVB16X         011111 ..... ..... ..... 1101101100 .   @X_TSX
+LXVD2X          011111 ..... ..... ..... 1101001100 .   @X_TSX
+LXVH8X          011111 ..... ..... ..... 1100101100 .   @X_TSX
+LXVW4X          011111 ..... ..... ..... 1100001100 .   @X_TSX
+LXVDSX          011111 ..... ..... ..... 0101001100 .   @X_TSX
+LXVWSX          011111 ..... ..... ..... 0101101100 .   @X_TSX
+LXVL            011111 ..... ..... ..... 0100001101 .   @X_TSX
+LXVLL           011111 ..... ..... ..... 0100101101 .   @X_TSX
+
+STXVB16X        011111 ..... ..... ..... 1111101100 .   @X_TSX
+STXVD2X         011111 ..... ..... ..... 1111001100 .   @X_TSX
+STXVH8X         011111 ..... ..... ..... 1110101100 .   @X_TSX
+STXVW4X         011111 ..... ..... ..... 1110001100 .   @X_TSX
+STXVL           011111 ..... ..... ..... 0110001101 .   @X_TSX
+STXVLL          011111 ..... ..... ..... 0110101101 .   @X_TSX
+
 ## VSX Vector Binary Floating-Point Sign Manipulation Instructions
 
 XVABSDP         111100 ..... 00000 ..... 111011001 ..   @XX2
@@ -988,6 +1036,28 @@
 XVCPSGNDP       111100 ..... ..... ..... 11110000 ...   @XX3
 XVCPSGNSP       111100 ..... ..... ..... 11010000 ...   @XX3
 
+## VSX Binary Floating-Point Arithmetic Instructions
+
+XSADDSP         111100 ..... ..... ..... 00000000 ...   @XX3
+XSSUBSP         111100 ..... ..... ..... 00001000 ...   @XX3
+XSMULSP         111100 ..... ..... ..... 00010000 ...   @XX3
+XSDIVSP         111100 ..... ..... ..... 00011000 ...   @XX3
+
+XSADDDP         111100 ..... ..... ..... 00100000 ...   @XX3
+XSSUBDP         111100 ..... ..... ..... 00101000 ...   @XX3
+XSMULDP         111100 ..... ..... ..... 00110000 ...   @XX3
+XSDIVDP         111100 ..... ..... ..... 00111000 ...   @XX3
+
+XVADDSP         111100 ..... ..... ..... 01000000 ...   @XX3
+XVSUBSP         111100 ..... ..... ..... 01001000 ...   @XX3
+XVMULSP         111100 ..... ..... ..... 01010000 ...   @XX3
+XVDIVSP         111100 ..... ..... ..... 01011000 ...   @XX3
+
+XVADDDP         111100 ..... ..... ..... 01100000 ...   @XX3
+XVSUBDP         111100 ..... ..... ..... 01101000 ...   @XX3
+XVMULDP         111100 ..... ..... ..... 01110000 ...   @XX3
+XVDIVDP         111100 ..... ..... ..... 01111000 ...   @XX3
+
 ## VSX Scalar Multiply-Add Instructions
 
 XSMADDADP       111100 ..... ..... ..... 00100001 . . . @XX3
@@ -1057,6 +1127,23 @@
 XSCMPGEQP       111111 ..... ..... ..... 0011000100 -   @X
 XSCMPGTQP       111111 ..... ..... ..... 0011100100 -   @X
 
+XVCMPEQSP       111100 ..... ..... ..... . 1000011 ...   @XX3_rc
+XVCMPGTSP       111100 ..... ..... ..... . 1001011 ...   @XX3_rc
+XVCMPGESP       111100 ..... ..... ..... . 1010011 ...   @XX3_rc
+XVCMPNESP       111100 ..... ..... ..... . 1011011 ...   @XX3_rc
+XVCMPEQDP       111100 ..... ..... ..... . 1100011 ...   @XX3_rc
+XVCMPGTDP       111100 ..... ..... ..... . 1101011 ...   @XX3_rc
+XVCMPGEDP       111100 ..... ..... ..... . 1110011 ...   @XX3_rc
+XVCMPNEDP       111100 ..... ..... ..... . 1111011 ...   @XX3_rc
+
+XSMAXDP         111100 ..... ..... ..... 10100000 ...   @XX3
+XSMINDP         111100 ..... ..... ..... 10101000 ...   @XX3
+
+XVMAXSP         111100 ..... ..... ..... 11000000 ...   @XX3
+XVMINSP         111100 ..... ..... ..... 11001000 ...   @XX3
+XVMAXDP         111100 ..... ..... ..... 11100000 ...   @XX3
+XVMINDP         111100 ..... ..... ..... 11101000 ...   @XX3
+
 ## VSX Binary Floating-Point Convert Instructions
 
 XSCVQPDP        111111 ..... 10100 ..... 1101000100 .   @X_tb_rc
@@ -1092,6 +1179,17 @@
 XXMTACC         011111 ... -- 00001 ----- 0010110001 -   @X_a
 XXSETACCZ       011111 ... -- 00011 ----- 0010110001 -   @X_a
 
+## VSX Vector Logical instructions
+
+XXLAND          111100 ..... ..... ..... 10000010 ...   @XX3
+XXLANDC         111100 ..... ..... ..... 10001010 ...   @XX3
+XXLOR           111100 ..... ..... ..... 10010010 ...   @XX3
+XXLXOR          111100 ..... ..... ..... 10011010 ...   @XX3
+XXLNOR          111100 ..... ..... ..... 10100010 ...   @XX3
+XXLEQV          111100 ..... ..... ..... 10111010 ...   @XX3
+XXLNAND         111100 ..... ..... ..... 10110010 ...   @XX3
+XXLORC          111100 ..... ..... ..... 10101010 ...   @XX3
+
 ## VSX GER instruction
 
 XVI4GER8        111011 ... -- ..... ..... 00100011 ..-  @XX3_at xa=%xx_xa

diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index 2c6b633..ef4b2e7 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c

@@ -541,7 +541,7 @@
     }
 
 #define VARITHSAT_DO(name, op, optype, cvt, element)                    \
-    void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat,              \
+    void helper_V##name(ppc_avr_t *r, ppc_avr_t *vscr_sat,              \
                         ppc_avr_t *a, ppc_avr_t *b, uint32_t desc)      \
     {                                                                   \
         int sat = 0;                                                    \
@@ -555,17 +555,17 @@
         }                                                               \
     }
 #define VARITHSAT_SIGNED(suffix, element, optype, cvt)          \
-    VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element)      \
-    VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
+    VARITHSAT_DO(ADDS##suffix##S, +, optype, cvt, element)      \
+    VARITHSAT_DO(SUBS##suffix##S, -, optype, cvt, element)
 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt)        \
-    VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element)      \
-    VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
-VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
-VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
-VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
-VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
-VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
-VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
+    VARITHSAT_DO(ADDU##suffix##S, +, optype, cvt, element)      \
+    VARITHSAT_DO(SUBU##suffix##S, -, optype, cvt, element)
+VARITHSAT_SIGNED(B, s8, int16_t, cvtshsb)
+VARITHSAT_SIGNED(H, s16, int32_t, cvtswsh)
+VARITHSAT_SIGNED(W, s32, int64_t, cvtsdsw)
+VARITHSAT_UNSIGNED(B, u8, uint16_t, cvtshub)
+VARITHSAT_UNSIGNED(H, u16, uint32_t, cvtswuh)
+VARITHSAT_UNSIGNED(W, u32, uint64_t, cvtsduw)
 #undef VARITHSAT_CASE
 #undef VARITHSAT_DO
 #undef VARITHSAT_SIGNED

diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 2c39322..907dba6 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c

@@ -48,6 +48,8 @@
 #include "qemu/mmap-alloc.h"
 #include "elf.h"
 #include "sysemu/kvm_int.h"
+#include "sysemu/kvm.h"
+#include "hw/core/accel-cpu.h"
 
 #include CONFIG_DEVICES
 
@@ -2346,6 +2348,30 @@
     }
 }
 
+static bool kvmppc_cpu_realize(CPUState *cs, Error **errp)
+{
+    int ret;
+    const char *vcpu_str = (cs->parent_obj.hotplugged == true) ?
+                           "hotplug" : "create";
+    cs->cpu_index = cpu_get_free_index();
+
+    POWERPC_CPU(cs)->vcpu_id = cs->cpu_index;
+
+    /* create and park to fail gracefully in case vcpu hotplug fails */
+    ret = kvm_create_and_park_vcpu(cs);
+    if (ret) {
+        /*
+         * This causes QEMU to terminate if initial CPU creation
+         * fails, and only CPU hotplug failure if the error happens
+         * there.
+         */
+        error_setg(errp, "%s: vcpu %s failed with %d",
+                         __func__, vcpu_str, ret);
+        return false;
+    }
+    return true;
+}
+
 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
 {
     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
@@ -2966,3 +2992,23 @@
 void kvm_arch_accel_class_init(ObjectClass *oc)
 {
 }
+
+static void kvm_cpu_accel_class_init(ObjectClass *oc, void *data)
+{
+    AccelCPUClass *acc = ACCEL_CPU_CLASS(oc);
+
+    acc->cpu_target_realize = kvmppc_cpu_realize;
+}
+
+static const TypeInfo kvm_cpu_accel_type_info = {
+    .name = ACCEL_CPU_NAME("kvm"),
+
+    .parent = TYPE_ACCEL_CPU,
+    .class_init = kvm_cpu_accel_class_init,
+    .abstract = true,
+};
+static void kvm_cpu_accel_register_types(void)
+{
+    type_register_static(&kvm_cpu_accel_type_info);
+}
+type_init(kvm_cpu_accel_register_types);

diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c
index 953dd08..51b137f 100644
--- a/target/ppc/mem_helper.c
+++ b/target/ppc/mem_helper.c

@@ -475,8 +475,8 @@
     *xt = t;                                                            \
 }
 
-VSX_LXVL(lxvl, 0)
-VSX_LXVL(lxvll, 1)
+VSX_LXVL(LXVL, 0)
+VSX_LXVL(LXVLL, 1)
 #undef VSX_LXVL
 
 #define VSX_STXVL(name, lj)                                       \
@@ -504,8 +504,8 @@
     }                                                             \
 }
 
-VSX_STXVL(stxvl, 0)
-VSX_STXVL(stxvll, 1)
+VSX_STXVL(STXVL, 0)
+VSX_STXVL(STXVLL, 1)
 #undef VSX_STXVL
 #undef GET_NB
 #endif /* TARGET_PPC64 */

diff --git a/target/ppc/misc_helper.c b/target/ppc/misc_helper.c
index fa47be2..1b83971 100644
--- a/target/ppc/misc_helper.c
+++ b/target/ppc/misc_helper.c

@@ -48,9 +48,8 @@
 {
     CPUState *cs = env_cpu(env);
     CPUState *ccs;
-    uint32_t nr_threads = cs->nr_threads;
 
-    if (nr_threads == 1) {
+    if (ppc_cpu_core_single_threaded(cs)) {
         env->spr[sprn] = val;
         return;
     }
@@ -195,7 +194,7 @@
             return;
         }
 
-        if (cs->nr_threads == 1 || !(env->flags & POWERPC_FLAG_SMT_1LPAR)) {
+        if (ppc_cpu_lpar_single_threaded(cs)) {
             env->spr[SPR_PTCR] = val;
             tlb_flush(cs);
         } else {
@@ -242,16 +241,12 @@
 {
     CPUState *cs = env_cpu(env);
     CPUState *ccs;
-    uint32_t nr_threads = cs->nr_threads;
     target_ulong dpdes = 0;
 
     helper_hfscr_facility_check(env, HFSCR_MSGP, "load DPDES", HFSCR_IC_MSGP);
 
-    if (!(env->flags & POWERPC_FLAG_SMT_1LPAR)) {
-        nr_threads = 1; /* DPDES behaves as 1-thread in LPAR-per-thread mode */
-    }
-
-    if (nr_threads == 1) {
+    /* DPDES behaves as 1-thread in LPAR-per-thread mode */
+    if (ppc_cpu_lpar_single_threaded(cs)) {
         if (env->pending_interrupts & PPC_INTERRUPT_DOORBELL) {
             dpdes = 1;
         }
@@ -278,21 +273,11 @@
     PowerPCCPU *cpu = env_archcpu(env);
     CPUState *cs = env_cpu(env);
     CPUState *ccs;
-    uint32_t nr_threads = cs->nr_threads;
 
     helper_hfscr_facility_check(env, HFSCR_MSGP, "store DPDES", HFSCR_IC_MSGP);
 
-    if (!(env->flags & POWERPC_FLAG_SMT_1LPAR)) {
-        nr_threads = 1; /* DPDES behaves as 1-thread in LPAR-per-thread mode */
-    }
-
-    if (val & ~(nr_threads - 1)) {
-        qemu_log_mask(LOG_GUEST_ERROR, "Invalid DPDES register value "
-                      TARGET_FMT_lx"\n", val);
-        val &= (nr_threads - 1); /* Ignore the invalid bits */
-    }
-
-    if (nr_threads == 1) {
+    /* DPDES behaves as 1-thread in LPAR-per-thread mode */
+    if (ppc_cpu_lpar_single_threaded(cs)) {
         ppc_set_irq(cpu, PPC_INTERRUPT_DOORBELL, val & 0x1);
         return;
     }
@@ -308,6 +293,13 @@
     bql_unlock();
 }
 
+/*
+ * qemu-user breaks with pnv headers, so they go under ifdefs for now.
+ * A clean up may be to move powernv specific registers and helpers into
+ * target/ppc/pnv_helper.c
+ */
+#include "hw/ppc/pnv_core.h"
+
 /* Indirect SCOM (SPRC/SPRD) access to SCRATCH0-7 are implemented. */
 void helper_store_sprc(CPUPPCState *env, target_ulong val)
 {
@@ -321,11 +313,35 @@
 
 target_ulong helper_load_sprd(CPUPPCState *env)
 {
+    /*
+     * SPRD is a HV-only register for Power CPUs, so this will only be
+     * accessed by powernv machines.
+     */
+    PowerPCCPU *cpu = env_archcpu(env);
+    PnvCore *pc = pnv_cpu_state(cpu)->pnv_core;
     target_ulong sprc = env->spr[SPR_POWER_SPRC];
 
-    switch (sprc & 0x3c0) {
-    case 0: /* SCRATCH0-7 */
-        return env->scratch[(sprc >> 3) & 0x7];
+    switch (sprc & 0x3e0) {
+    case 0: /* SCRATCH0-3 */
+    case 1: /* SCRATCH4-7 */
+        return pc->scratch[(sprc >> 3) & 0x7];
+
+    case 0x1e0: /* core thread state */
+        if (env->excp_model == POWERPC_EXCP_POWER9) {
+            /*
+             * Only implement for POWER9 because skiboot uses it to check
+             * big-core mode. Other bits are unimplemented so we would
+             * prefer to get unimplemented message on POWER10 if it were
+             * used anywhere.
+             */
+            if (pc->big_core) {
+                return PPC_BIT(63);
+            } else {
+                return 0;
+            }
+        }
+        /* fallthru */
+
     default:
         qemu_log_mask(LOG_UNIMP, "mfSPRD: Unimplemented SPRC:0x"
                                   TARGET_FMT_lx"\n", sprc);
@@ -334,41 +350,28 @@
     return 0;
 }
 
-static void do_store_scratch(CPUPPCState *env, int nr, target_ulong val)
-{
-    CPUState *cs = env_cpu(env);
-    CPUState *ccs;
-    uint32_t nr_threads = cs->nr_threads;
-
-    /*
-     * Log stores to SCRATCH, because some firmware uses these for debugging
-     * and logging, but they would normally be read by the BMC, which is
-     * not implemented in QEMU yet. This gives a way to get at the information.
-     * Could also dump these upon checkstop.
-     */
-    qemu_log("SPRD write 0x" TARGET_FMT_lx " to SCRATCH%d\n", val, nr);
-
-    if (nr_threads == 1) {
-        env->scratch[nr] = val;
-        return;
-    }
-
-    THREAD_SIBLING_FOREACH(cs, ccs) {
-        CPUPPCState *cenv = &POWERPC_CPU(ccs)->env;
-        cenv->scratch[nr] = val;
-    }
-}
-
 void helper_store_sprd(CPUPPCState *env, target_ulong val)
 {
     target_ulong sprc = env->spr[SPR_POWER_SPRC];
+    PowerPCCPU *cpu = env_archcpu(env);
+    PnvCore *pc = pnv_cpu_state(cpu)->pnv_core;
+    int nr;
 
-    switch (sprc & 0x3c0) {
-    case 0: /* SCRATCH0-7 */
-        do_store_scratch(env, (sprc >> 3) & 0x7, val);
+    switch (sprc & 0x3e0) {
+    case 0: /* SCRATCH0-3 */
+    case 1: /* SCRATCH4-7 */
+        /*
+         * Log stores to SCRATCH, because some firmware uses these for
+         * debugging and logging, but they would normally be read by the BMC,
+         * which is not implemented in QEMU yet. This gives a way to get at the
+         * information. Could also dump these upon checkstop.
+         */
+        nr = (sprc >> 3) & 0x7;
+        qemu_log("SPRD write 0x" TARGET_FMT_lx " to SCRATCH%d\n", val, nr);
+        pc->scratch[nr] = val;
         break;
     default:
-        qemu_log_mask(LOG_UNIMP, "mfSPRD: Unimplemented SPRC:0x"
+        qemu_log_mask(LOG_UNIMP, "mtSPRD: Unimplemented SPRC:0x"
                                   TARGET_FMT_lx"\n", sprc);
         break;
     }

diff --git a/target/ppc/mmu-book3s-v3.c b/target/ppc/mmu-book3s-v3.c
index c8f69b3..a812cb5 100644
--- a/target/ppc/mmu-book3s-v3.c
+++ b/target/ppc/mmu-book3s-v3.c

@@ -21,7 +21,6 @@
 #include "cpu.h"
 #include "mmu-hash64.h"
 #include "mmu-book3s-v3.h"
-#include "mmu-radix64.h"
 
 bool ppc64_v3_get_pate(PowerPCCPU *cpu, target_ulong lpid, ppc_v3_pate_t *entry)
 {

diff --git a/target/ppc/mmu-book3s-v3.h b/target/ppc/mmu-book3s-v3.h
index f3f7993..be66e26 100644
--- a/target/ppc/mmu-book3s-v3.h
+++ b/target/ppc/mmu-book3s-v3.h

@@ -20,9 +20,6 @@
 #ifndef PPC_MMU_BOOK3S_V3_H
 #define PPC_MMU_BOOK3S_V3_H
 
-#include "mmu-hash64.h"
-#include "mmu-books.h"
-
 #ifndef CONFIG_USER_ONLY
 
 /*
@@ -83,46 +80,6 @@
     return !!(cpu->env.spr[SPR_LPCR] & LPCR_HR);
 }
 
-static inline hwaddr ppc_hash64_hpt_base(PowerPCCPU *cpu)
-{
-    uint64_t base;
-
-    if (cpu->vhyp) {
-        return 0;
-    }
-    if (cpu->env.mmu_model == POWERPC_MMU_3_00) {
-        ppc_v3_pate_t pate;
-
-        if (!ppc64_v3_get_pate(cpu, cpu->env.spr[SPR_LPIDR], &pate)) {
-            return 0;
-        }
-        base = pate.dw0;
-    } else {
-        base = cpu->env.spr[SPR_SDR1];
-    }
-    return base & SDR_64_HTABORG;
-}
-
-static inline hwaddr ppc_hash64_hpt_mask(PowerPCCPU *cpu)
-{
-    uint64_t base;
-
-    if (cpu->vhyp) {
-        return cpu->vhyp_class->hpt_mask(cpu->vhyp);
-    }
-    if (cpu->env.mmu_model == POWERPC_MMU_3_00) {
-        ppc_v3_pate_t pate;
-
-        if (!ppc64_v3_get_pate(cpu, cpu->env.spr[SPR_LPIDR], &pate)) {
-            return 0;
-        }
-        base = pate.dw0;
-    } else {
-        base = cpu->env.spr[SPR_SDR1];
-    }
-    return (1ULL << ((base & SDR_64_HTABSIZE) + 18 - 7)) - 1;
-}
-
 #endif /* TARGET_PPC64 */
 
 #endif /* CONFIG_USER_ONLY */

diff --git a/target/ppc/mmu-hash32.c b/target/ppc/mmu-hash32.c
index d5f2057..44b1614 100644
--- a/target/ppc/mmu-hash32.c
+++ b/target/ppc/mmu-hash32.c

@@ -37,17 +37,6 @@
 #  define LOG_BATS(...) do { } while (0)
 #endif
 
-static int ppc_hash32_pte_prot(int mmu_idx,
-                               target_ulong sr, ppc_hash_pte32_t pte)
-{
-    unsigned pp, key;
-
-    key = !!(mmuidx_pr(mmu_idx) ? (sr & SR32_KP) : (sr & SR32_KS));
-    pp = pte.pte1 & HPTE32_R_PP;
-
-    return ppc_hash32_pp_prot(key, pp, !!(sr & SR32_NX));
-}
-
 static target_ulong hash32_bat_size(int mmu_idx,
                                     target_ulong batu, target_ulong batl)
 {
@@ -59,22 +48,6 @@
     return BATU32_BEPI & ~((batu & BATU32_BL) << 15);
 }
 
-static int hash32_bat_prot(PowerPCCPU *cpu,
-                           target_ulong batu, target_ulong batl)
-{
-    int pp, prot;
-
-    prot = 0;
-    pp = batl & BATL32_PP;
-    if (pp != 0) {
-        prot = PAGE_READ | PAGE_EXEC;
-        if (pp == 0x2) {
-            prot |= PAGE_WRITE;
-        }
-    }
-    return prot;
-}
-
 static hwaddr ppc_hash32_bat_lookup(PowerPCCPU *cpu, target_ulong ea,
                                     MMUAccessType access_type, int *prot,
                                     int mmu_idx)
@@ -106,7 +79,7 @@
         if (mask && ((ea & mask) == (batu & BATU32_BEPI))) {
             hwaddr raddr = (batl & mask) | (ea & ~mask);
 
-            *prot = hash32_bat_prot(cpu, batu, batl);
+            *prot = ppc_hash32_bat_prot(batu, batl);
 
             return raddr & TARGET_PAGE_MASK;
         }
@@ -145,7 +118,6 @@
 {
     CPUState *cs = CPU(cpu);
     CPUPPCState *env = &cpu->env;
-    int key = !!(mmuidx_pr(mmu_idx) ? (sr & SR32_KP) : (sr & SR32_KS));
 
     qemu_log_mask(CPU_LOG_MMU, "direct store...\n");
 
@@ -206,7 +178,11 @@
         cpu_abort(cs, "ERROR: insn should not need address translation\n");
     }
 
-    *prot = key ? PAGE_READ | PAGE_WRITE : PAGE_READ;
+    if (ppc_hash32_key(mmuidx_pr(mmu_idx), sr)) {
+        *prot = PAGE_READ | PAGE_WRITE;
+    } else {
+        *prot = PAGE_READ;
+    }
     if (check_prot_access_type(*prot, access_type)) {
         *raddr = eaddr;
         return true;
@@ -225,13 +201,6 @@
     return false;
 }
 
-hwaddr get_pteg_offset32(PowerPCCPU *cpu, hwaddr hash)
-{
-    target_ulong mask = ppc_hash32_hpt_mask(cpu);
-
-    return (hash * HASH_PTEG_SIZE_32) & mask;
-}
-
 static hwaddr ppc_hash32_pteg_search(PowerPCCPU *cpu, hwaddr pteg_off,
                                      bool secondary, target_ulong ptem,
                                      ppc_hash_pte32_t *pte)
@@ -322,15 +291,6 @@
     return pte_offset;
 }
 
-static hwaddr ppc_hash32_pte_raddr(target_ulong sr, ppc_hash_pte32_t pte,
-                                   target_ulong eaddr)
-{
-    hwaddr rpn = pte.pte1 & HPTE32_R_RPN;
-    hwaddr mask = ~TARGET_PAGE_MASK;
-
-    return (rpn & ~mask) | (eaddr & mask);
-}
-
 bool ppc_hash32_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type,
                       hwaddr *raddrp, int *psizep, int *protp, int mmu_idx,
                       bool guest_visible)
@@ -338,10 +298,10 @@
     CPUState *cs = CPU(cpu);
     CPUPPCState *env = &cpu->env;
     target_ulong sr;
-    hwaddr pte_offset;
+    hwaddr pte_offset, raddr;
     ppc_hash_pte32_t pte;
+    bool key;
     int prot;
-    hwaddr raddr;
 
     /* There are no hash32 large pages. */
     *psizep = TARGET_PAGE_BITS;
@@ -423,8 +383,8 @@
                 "found PTE at offset %08" HWADDR_PRIx "\n", pte_offset);
 
     /* 7. Check access permissions */
-
-    prot = ppc_hash32_pte_prot(mmu_idx, sr, pte);
+    key = ppc_hash32_key(mmuidx_pr(mmu_idx), sr);
+    prot = ppc_hash32_prot(key, pte.pte1 & HPTE32_R_PP, sr & SR32_NX);
 
     if (!check_prot_access_type(prot, access_type)) {
         /* Access right violation */
@@ -464,11 +424,12 @@
              */
             prot &= ~PAGE_WRITE;
         }
-     }
+    }
+    *protp = prot;
 
     /* 9. Determine the real address from the PTE */
-
-    *raddrp = ppc_hash32_pte_raddr(sr, pte, eaddr);
-    *protp = prot;
+    *raddrp = pte.pte1 & HPTE32_R_RPN;
+    *raddrp &= TARGET_PAGE_MASK;
+    *raddrp |= eaddr & ~TARGET_PAGE_MASK;
     return true;
 }

diff --git a/target/ppc/mmu-hash32.h b/target/ppc/mmu-hash32.h
index f0ce695..2838de0 100644
--- a/target/ppc/mmu-hash32.h
+++ b/target/ppc/mmu-hash32.h

@@ -3,7 +3,6 @@
 
 #ifndef CONFIG_USER_ONLY
 
-hwaddr get_pteg_offset32(PowerPCCPU *cpu, hwaddr hash);
 bool ppc_hash32_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type,
                       hwaddr *raddrp, int *psizep, int *protp, int mmu_idx,
                       bool guest_visible);
@@ -102,48 +101,63 @@
     stl_phys(CPU(cpu)->as, base + pte_offset + HASH_PTE_SIZE_32 / 2, pte1);
 }
 
-static inline int ppc_hash32_pp_prot(bool key, int pp, bool nx)
+static inline hwaddr get_pteg_offset32(PowerPCCPU *cpu, hwaddr hash)
+{
+    return (hash * HASH_PTEG_SIZE_32) & ppc_hash32_hpt_mask(cpu);
+}
+
+static inline bool ppc_hash32_key(bool pr, target_ulong sr)
+{
+    return pr ? (sr & SR32_KP) : (sr & SR32_KS);
+}
+
+static inline int ppc_hash32_prot(bool key, int pp, bool nx)
 {
     int prot;
 
-    if (key == 0) {
-        switch (pp) {
-        case 0x0:
-        case 0x1:
-        case 0x2:
-            prot = PAGE_READ | PAGE_WRITE;
-            break;
-
-        case 0x3:
-            prot = PAGE_READ;
-            break;
-
-        default:
-            abort();
-        }
-    } else {
+    if (key) {
         switch (pp) {
         case 0x0:
             prot = 0;
             break;
-
         case 0x1:
         case 0x3:
             prot = PAGE_READ;
             break;
-
         case 0x2:
             prot = PAGE_READ | PAGE_WRITE;
             break;
-
         default:
-            abort();
+            g_assert_not_reached();
+        }
+    } else {
+        switch (pp) {
+        case 0x0:
+        case 0x1:
+        case 0x2:
+            prot = PAGE_READ | PAGE_WRITE;
+            break;
+        case 0x3:
+            prot = PAGE_READ;
+            break;
+        default:
+            g_assert_not_reached();
         }
     }
-    if (nx == 0) {
-        prot |= PAGE_EXEC;
-    }
+    return nx ? prot : prot | PAGE_EXEC;
+}
 
+static inline int ppc_hash32_bat_prot(target_ulong batu, target_ulong batl)
+{
+    int prot = 0;
+    int pp = batl & BATL32_PP;
+
+    if (pp) {
+        prot = PAGE_READ | PAGE_EXEC;
+        if (pp == 0x2) {
+            prot |= PAGE_WRITE;
+        }
+    }
     return prot;
 }
 

diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c
index cbc8efa..5e1983e 100644
--- a/target/ppc/mmu-hash64.c
+++ b/target/ppc/mmu-hash64.c

@@ -31,6 +31,7 @@
 #include "hw/hw.h"
 #include "internal.h"
 #include "mmu-book3s-v3.h"
+#include "mmu-books.h"
 #include "helper_regs.h"
 
 #ifdef CONFIG_TCG
@@ -508,6 +509,46 @@
     return prot;
 }
 
+static hwaddr ppc_hash64_hpt_base(PowerPCCPU *cpu)
+{
+    uint64_t base;
+
+    if (cpu->vhyp) {
+        return 0;
+    }
+    if (cpu->env.mmu_model == POWERPC_MMU_3_00) {
+        ppc_v3_pate_t pate;
+
+        if (!ppc64_v3_get_pate(cpu, cpu->env.spr[SPR_LPIDR], &pate)) {
+            return 0;
+        }
+        base = pate.dw0;
+    } else {
+        base = cpu->env.spr[SPR_SDR1];
+    }
+    return base & SDR_64_HTABORG;
+}
+
+static hwaddr ppc_hash64_hpt_mask(PowerPCCPU *cpu)
+{
+    uint64_t base;
+
+    if (cpu->vhyp) {
+        return cpu->vhyp_class->hpt_mask(cpu->vhyp);
+    }
+    if (cpu->env.mmu_model == POWERPC_MMU_3_00) {
+        ppc_v3_pate_t pate;
+
+        if (!ppc64_v3_get_pate(cpu, cpu->env.spr[SPR_LPIDR], &pate)) {
+            return 0;
+        }
+        base = pate.dw0;
+    } else {
+        base = cpu->env.spr[SPR_SDR1];
+    }
+    return (1ULL << ((base & SDR_64_HTABSIZE) + 18 - 7)) - 1;
+}
+
 const ppc_hash_pte64_t *ppc_hash64_map_hptes(PowerPCCPU *cpu,
                                              hwaddr ptex, int n)
 {
@@ -545,6 +586,15 @@
                         false, n * HASH_PTE_SIZE_64);
 }
 
+bool ppc_hash64_valid_ptex(PowerPCCPU *cpu, target_ulong ptex)
+{
+    /* hash value/pteg group index is normalized by HPT mask */
+    if (((ptex & ~7ULL) / HPTES_PER_GROUP) & ~ppc_hash64_hpt_mask(cpu)) {
+        return false;
+    }
+    return true;
+}
+
 static unsigned hpte_page_shift(const PPCHash64SegmentPageSizes *sps,
                                 uint64_t pte0, uint64_t pte1)
 {

diff --git a/target/ppc/mmu-hash64.h b/target/ppc/mmu-hash64.h
index de653fc..ae8d4b3 100644
--- a/target/ppc/mmu-hash64.h
+++ b/target/ppc/mmu-hash64.h

@@ -120,6 +120,7 @@
                                              hwaddr ptex, int n);
 void ppc_hash64_unmap_hptes(PowerPCCPU *cpu, const ppc_hash_pte64_t *hptes,
                             hwaddr ptex, int n);
+bool ppc_hash64_valid_ptex(PowerPCCPU *cpu, target_ulong ptex);
 
 static inline uint64_t ppc_hash64_hpte0(PowerPCCPU *cpu,
                                         const ppc_hash_pte64_t *hptes, int i)

diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c
index 5a02e49..be7a45f 100644
--- a/target/ppc/mmu-radix64.c
+++ b/target/ppc/mmu-radix64.c

@@ -28,6 +28,38 @@
 #include "internal.h"
 #include "mmu-radix64.h"
 #include "mmu-book3s-v3.h"
+#include "mmu-books.h"
+
+/* Radix Partition Table Entry Fields */
+#define PATE1_R_PRTB           0x0FFFFFFFFFFFF000
+#define PATE1_R_PRTS           0x000000000000001F
+
+/* Radix Process Table Entry Fields */
+#define PRTBE_R_GET_RTS(rts) \
+    ((((rts >> 58) & 0x18) | ((rts >> 5) & 0x7)) + 31)
+#define PRTBE_R_RPDB            0x0FFFFFFFFFFFFF00
+#define PRTBE_R_RPDS            0x000000000000001F
+
+/* Radix Page Directory/Table Entry Fields */
+#define R_PTE_VALID             0x8000000000000000
+#define R_PTE_LEAF              0x4000000000000000
+#define R_PTE_SW0               0x2000000000000000
+#define R_PTE_RPN               0x01FFFFFFFFFFF000
+#define R_PTE_SW1               0x0000000000000E00
+#define R_GET_SW(sw)            (((sw >> 58) & 0x8) | ((sw >> 9) & 0x7))
+#define R_PTE_R                 0x0000000000000100
+#define R_PTE_C                 0x0000000000000080
+#define R_PTE_ATT               0x0000000000000030
+#define R_PTE_ATT_NORMAL        0x0000000000000000
+#define R_PTE_ATT_SAO           0x0000000000000010
+#define R_PTE_ATT_NI_IO         0x0000000000000020
+#define R_PTE_ATT_TOLERANT_IO   0x0000000000000030
+#define R_PTE_EAA_PRIV          0x0000000000000008
+#define R_PTE_EAA_R             0x0000000000000004
+#define R_PTE_EAA_RW            0x0000000000000002
+#define R_PTE_EAA_X             0x0000000000000001
+#define R_PDE_NLB               PRTBE_R_RPDB
+#define R_PDE_NLS               PRTBE_R_RPDS
 
 static bool ppc_radix64_get_fully_qualified_addr(const CPUPPCState *env,
                                                  vaddr eaddr,
@@ -180,6 +212,24 @@
     }
 }
 
+static int ppc_radix64_get_prot_eaa(uint64_t pte)
+{
+    return (pte & R_PTE_EAA_R ? PAGE_READ : 0) |
+           (pte & R_PTE_EAA_RW ? PAGE_READ | PAGE_WRITE : 0) |
+           (pte & R_PTE_EAA_X ? PAGE_EXEC : 0);
+}
+
+static int ppc_radix64_get_prot_amr(const PowerPCCPU *cpu)
+{
+    const CPUPPCState *env = &cpu->env;
+    int amr = env->spr[SPR_AMR] >> 62; /* We only care about key0 AMR63:62 */
+    int iamr = env->spr[SPR_IAMR] >> 62; /* We only care about key0 IAMR63:62 */
+
+    return (amr & 0x2 ? 0 : PAGE_WRITE) | /* Access denied if bit is set */
+           (amr & 0x1 ? 0 : PAGE_READ) |
+           (iamr & 0x1 ? 0 : PAGE_EXEC);
+}
+
 static bool ppc_radix64_check_prot(PowerPCCPU *cpu, MMUAccessType access_type,
                                    uint64_t pte, int *fault_cause, int *prot,
                                    int mmu_idx, bool partition_scoped)

diff --git a/target/ppc/mmu-radix64.h b/target/ppc/mmu-radix64.h
index c5c04a1..6620b3d 100644
--- a/target/ppc/mmu-radix64.h
+++ b/target/ppc/mmu-radix64.h

@@ -3,7 +3,7 @@
 
 #ifndef CONFIG_USER_ONLY
 
-#include "exec/page-protection.h"
+#ifdef TARGET_PPC64
 
 /* Radix Quadrants */
 #define R_EADDR_MASK            0x3FFFFFFFFFFFFFFF
@@ -14,61 +14,10 @@
 #define R_EADDR_QUADRANT2       0x8000000000000000
 #define R_EADDR_QUADRANT3       0xC000000000000000
 
-/* Radix Partition Table Entry Fields */
-#define PATE1_R_PRTB           0x0FFFFFFFFFFFF000
-#define PATE1_R_PRTS           0x000000000000001F
-
-/* Radix Process Table Entry Fields */
-#define PRTBE_R_GET_RTS(rts) \
-    ((((rts >> 58) & 0x18) | ((rts >> 5) & 0x7)) + 31)
-#define PRTBE_R_RPDB            0x0FFFFFFFFFFFFF00
-#define PRTBE_R_RPDS            0x000000000000001F
-
-/* Radix Page Directory/Table Entry Fields */
-#define R_PTE_VALID             0x8000000000000000
-#define R_PTE_LEAF              0x4000000000000000
-#define R_PTE_SW0               0x2000000000000000
-#define R_PTE_RPN               0x01FFFFFFFFFFF000
-#define R_PTE_SW1               0x0000000000000E00
-#define R_GET_SW(sw)            (((sw >> 58) & 0x8) | ((sw >> 9) & 0x7))
-#define R_PTE_R                 0x0000000000000100
-#define R_PTE_C                 0x0000000000000080
-#define R_PTE_ATT               0x0000000000000030
-#define R_PTE_ATT_NORMAL        0x0000000000000000
-#define R_PTE_ATT_SAO           0x0000000000000010
-#define R_PTE_ATT_NI_IO         0x0000000000000020
-#define R_PTE_ATT_TOLERANT_IO   0x0000000000000030
-#define R_PTE_EAA_PRIV          0x0000000000000008
-#define R_PTE_EAA_R             0x0000000000000004
-#define R_PTE_EAA_RW            0x0000000000000002
-#define R_PTE_EAA_X             0x0000000000000001
-#define R_PDE_NLB               PRTBE_R_RPDB
-#define R_PDE_NLS               PRTBE_R_RPDS
-
-#ifdef TARGET_PPC64
-
 bool ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type,
                        hwaddr *raddr, int *psizep, int *protp, int mmu_idx,
                        bool guest_visible);
 
-static inline int ppc_radix64_get_prot_eaa(uint64_t pte)
-{
-    return (pte & R_PTE_EAA_R ? PAGE_READ : 0) |
-           (pte & R_PTE_EAA_RW ? PAGE_READ | PAGE_WRITE : 0) |
-           (pte & R_PTE_EAA_X ? PAGE_EXEC : 0);
-}
-
-static inline int ppc_radix64_get_prot_amr(const PowerPCCPU *cpu)
-{
-    const CPUPPCState *env = &cpu->env;
-    int amr = env->spr[SPR_AMR] >> 62; /* We only care about key0 AMR63:62 */
-    int iamr = env->spr[SPR_IAMR] >> 62; /* We only care about key0 IAMR63:62 */
-
-    return (amr & 0x2 ? 0 : PAGE_WRITE) | /* Access denied if bit is set */
-           (amr & 0x1 ? 0 : PAGE_READ) |
-           (iamr & 0x1 ? 0 : PAGE_EXEC);
-}
-
 #endif /* TARGET_PPC64 */
 
 #endif /* CONFIG_USER_ONLY */

diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
index e254269..60f8736 100644
--- a/target/ppc/mmu_common.c
+++ b/target/ppc/mmu_common.c

@@ -37,17 +37,6 @@
 
 /* #define DUMP_PAGE_TABLES */
 
-/* Context used internally during MMU translations */
-typedef struct {
-    hwaddr raddr;      /* Real address             */
-    hwaddr eaddr;      /* Effective address        */
-    int prot;          /* Protection bits          */
-    hwaddr hash[2];    /* Pagetable hash values    */
-    target_ulong ptem; /* Virtual segment ID | API */
-    int key;           /* Access key               */
-    int nx;            /* Non-execute area         */
-} mmu_ctx_t;
-
 void ppc_store_sdr1(CPUPPCState *env, target_ulong value)
 {
     PowerPCCPU *cpu = env_archcpu(env);
@@ -94,86 +83,23 @@
     return nr;
 }
 
-static int ppc6xx_tlb_pte_check(mmu_ctx_t *ctx, target_ulong pte0,
-                                target_ulong pte1, int h,
-                                MMUAccessType access_type)
-{
-    target_ulong ptem, mmask;
-    int ret, pteh, ptev, pp;
-
-    ret = -1;
-    /* Check validity and table match */
-    ptev = pte_is_valid(pte0);
-    pteh = (pte0 >> 6) & 1;
-    if (ptev && h == pteh) {
-        /* Check vsid & api */
-        ptem = pte0 & PTE_PTEM_MASK;
-        mmask = PTE_CHECK_MASK;
-        pp = pte1 & 0x00000003;
-        if (ptem == ctx->ptem) {
-            if (ctx->raddr != (hwaddr)-1ULL) {
-                /* all matches should have equal RPN, WIMG & PP */
-                if ((ctx->raddr & mmask) != (pte1 & mmask)) {
-                    qemu_log_mask(CPU_LOG_MMU, "Bad RPN/WIMG/PP\n");
-                    return -3;
-                }
-            }
-            /* Keep the matching PTE information */
-            ctx->raddr = pte1;
-            ctx->prot = ppc_hash32_pp_prot(ctx->key, pp, ctx->nx);
-            if (check_prot_access_type(ctx->prot, access_type)) {
-                /* Access granted */
-                qemu_log_mask(CPU_LOG_MMU, "PTE access granted !\n");
-                ret = 0;
-            } else {
-                /* Access right violation */
-                qemu_log_mask(CPU_LOG_MMU, "PTE access rejected\n");
-                ret = -2;
-            }
-        }
-    }
-
-    return ret;
-}
-
-static int pte_update_flags(mmu_ctx_t *ctx, target_ulong *pte1p,
-                            int ret, MMUAccessType access_type)
-{
-    int store = 0;
-
-    /* Update page flags */
-    if (!(*pte1p & 0x00000100)) {
-        /* Update accessed flag */
-        *pte1p |= 0x00000100;
-        store = 1;
-    }
-    if (!(*pte1p & 0x00000080)) {
-        if (access_type == MMU_DATA_STORE && ret == 0) {
-            /* Update changed flag */
-            *pte1p |= 0x00000080;
-            store = 1;
-        } else {
-            /* Force page fault for first write access */
-            ctx->prot &= ~PAGE_WRITE;
-        }
-    }
-
-    return store;
-}
-
 /* Software driven TLB helpers */
 
-static int ppc6xx_tlb_check(CPUPPCState *env, mmu_ctx_t *ctx,
-                            target_ulong eaddr, MMUAccessType access_type)
+static int ppc6xx_tlb_check(CPUPPCState *env, hwaddr *raddr, int *prot,
+                            target_ulong eaddr, MMUAccessType access_type,
+                            target_ulong ptem, bool key, bool nx)
 {
     ppc6xx_tlb_t *tlb;
-    int nr, best, way;
-    int ret;
+    target_ulong *pte1p;
+    int nr, best, way, ret;
+    bool is_code = (access_type == MMU_INST_FETCH);
 
+    /* Initialize real address with an invalid value */
+    *raddr = (hwaddr)-1ULL;
     best = -1;
     ret = -1; /* No TLB found */
     for (way = 0; way < env->nb_ways; way++) {
-        nr = ppc6xx_tlb_getnum(env, eaddr, way, access_type == MMU_INST_FETCH);
+        nr = ppc6xx_tlb_getnum(env, eaddr, way, is_code);
         tlb = &env->tlb.tlb6[nr];
         /* This test "emulates" the PTE index match for hardware TLBs */
         if ((eaddr & TARGET_PAGE_MASK) != tlb->EPN) {
@@ -191,37 +117,51 @@
                       tlb->EPN, eaddr, tlb->pte1,
                       access_type == MMU_DATA_STORE ? 'S' : 'L',
                       access_type == MMU_INST_FETCH ? 'I' : 'D');
-        switch (ppc6xx_tlb_pte_check(ctx, tlb->pte0, tlb->pte1,
-                                     0, access_type)) {
-        case -2:
-            /* Access violation */
-            ret = -2;
-            best = nr;
-            break;
-        case -1: /* No match */
-        case -3: /* TLB inconsistency */
-        default:
-            break;
-        case 0:
-            /* access granted */
-            /*
-             * XXX: we should go on looping to check all TLBs
-             *      consistency but we can speed-up the whole thing as
-             *      the result would be undefined if TLBs are not
-             *      consistent.
-             */
+        /* Check validity and table match */
+        if (!pte_is_valid(tlb->pte0) || ((tlb->pte0 >> 6) & 1) != 0 ||
+            (tlb->pte0 & PTE_PTEM_MASK) != ptem) {
+            continue;
+        }
+        /* all matches should have equal RPN, WIMG & PP */
+        if (*raddr != (hwaddr)-1ULL &&
+            (*raddr & PTE_CHECK_MASK) != (tlb->pte1 & PTE_CHECK_MASK)) {
+            qemu_log_mask(CPU_LOG_MMU, "Bad RPN/WIMG/PP\n");
+            /* TLB inconsistency */
+            continue;
+        }
+        /* Keep the matching PTE information */
+        best = nr;
+        *raddr = tlb->pte1;
+        *prot = ppc_hash32_prot(key, tlb->pte1 & HPTE32_R_PP, nx);
+        if (check_prot_access_type(*prot, access_type)) {
+            qemu_log_mask(CPU_LOG_MMU, "PTE access granted !\n");
             ret = 0;
-            best = nr;
-            goto done;
+            break;
+        } else {
+            qemu_log_mask(CPU_LOG_MMU, "PTE access rejected\n");
+            ret = -2;
         }
     }
     if (best != -1) {
-done:
         qemu_log_mask(CPU_LOG_MMU, "found TLB at addr " HWADDR_FMT_plx
                       " prot=%01x ret=%d\n",
-                      ctx->raddr & TARGET_PAGE_MASK, ctx->prot, ret);
+                      *raddr & TARGET_PAGE_MASK, *prot, ret);
         /* Update page flags */
-        pte_update_flags(ctx, &env->tlb.tlb6[best].pte1, ret, access_type);
+        pte1p = &env->tlb.tlb6[best].pte1;
+        *pte1p |= 0x00000100; /* Update accessed flag */
+        if (!(*pte1p & 0x00000080)) {
+            if (access_type == MMU_DATA_STORE && ret == 0) {
+                /* Update changed flag */
+                *pte1p |= 0x00000080;
+            } else {
+                /* Force page fault for first write access */
+                *prot &= ~PAGE_WRITE;
+            }
+        }
+    }
+    if (ret == -1) {
+        int r = is_code ? SPR_ICMP : SPR_DCMP;
+        env->spr[r] = ptem;
     }
 #if defined(DUMP_PAGE_TABLES)
     if (qemu_loglevel_mask(CPU_LOG_MMU)) {
@@ -247,44 +187,17 @@
     return ret;
 }
 
-/* Perform BAT hit & translation */
-static inline void bat_size_prot(CPUPPCState *env, target_ulong *blp,
-                                 int *validp, int *protp, target_ulong *BATu,
-                                 target_ulong *BATl)
-{
-    target_ulong bl;
-    int pp, valid, prot;
-
-    bl = (*BATu & 0x00001FFC) << 15;
-    valid = 0;
-    prot = 0;
-    if ((!FIELD_EX64(env->msr, MSR, PR) && (*BATu & 0x00000002)) ||
-        (FIELD_EX64(env->msr, MSR, PR) && (*BATu & 0x00000001))) {
-        valid = 1;
-        pp = *BATl & 0x00000003;
-        if (pp != 0) {
-            prot = PAGE_READ | PAGE_EXEC;
-            if (pp == 0x2) {
-                prot |= PAGE_WRITE;
-            }
-        }
-    }
-    *blp = bl;
-    *validp = valid;
-    *protp = prot;
-}
-
-static int get_bat_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx,
-                           target_ulong virtual, MMUAccessType access_type)
+static int get_bat_6xx_tlb(CPUPPCState *env, hwaddr *raddr, int *prot,
+                           target_ulong eaddr, MMUAccessType access_type,
+                           bool pr)
 {
     target_ulong *BATlt, *BATut, *BATu, *BATl;
     target_ulong BEPIl, BEPIu, bl;
-    int i, valid, prot;
-    int ret = -1;
+    int i, ret = -1;
     bool ifetch = access_type == MMU_INST_FETCH;
 
     qemu_log_mask(CPU_LOG_MMU, "%s: %cBAT v " TARGET_FMT_lx "\n", __func__,
-                  ifetch ? 'I' : 'D', virtual);
+                  ifetch ? 'I' : 'D', eaddr);
     if (ifetch) {
         BATlt = env->IBAT[1];
         BATut = env->IBAT[0];
@@ -295,27 +208,26 @@
     for (i = 0; i < env->nb_BATs; i++) {
         BATu = &BATut[i];
         BATl = &BATlt[i];
-        BEPIu = *BATu & 0xF0000000;
-        BEPIl = *BATu & 0x0FFE0000;
-        bat_size_prot(env, &bl, &valid, &prot, BATu, BATl);
+        BEPIu = *BATu & BATU32_BEPIU;
+        BEPIl = *BATu & BATU32_BEPIL;
         qemu_log_mask(CPU_LOG_MMU, "%s: %cBAT%d v " TARGET_FMT_lx " BATu "
                       TARGET_FMT_lx " BATl " TARGET_FMT_lx "\n", __func__,
-                      ifetch ? 'I' : 'D', i, virtual, *BATu, *BATl);
-        if ((virtual & 0xF0000000) == BEPIu &&
-            ((virtual & 0x0FFE0000) & ~bl) == BEPIl) {
-            /* BAT matches */
-            if (valid != 0) {
+                      ifetch ? 'I' : 'D', i, eaddr, *BATu, *BATl);
+        bl = (*BATu & BATU32_BL) << 15;
+        if ((!pr && (*BATu & BATU32_VS)) || (pr && (*BATu & BATU32_VP))) {
+            if ((eaddr & BATU32_BEPIU) == BEPIu &&
+                ((eaddr & BATU32_BEPIL) & ~bl) == BEPIl) {
                 /* Get physical address */
-                ctx->raddr = (*BATl & 0xF0000000) |
-                    ((virtual & 0x0FFE0000 & bl) | (*BATl & 0x0FFE0000)) |
-                    (virtual & 0x0001F000);
+                *raddr = (*BATl & BATU32_BEPIU) |
+                    ((eaddr & BATU32_BEPIL & bl) | (*BATl & BATU32_BEPIL)) |
+                    (eaddr & 0x0001F000);
                 /* Compute access rights */
-                ctx->prot = prot;
-                if (check_prot_access_type(ctx->prot, access_type)) {
+                *prot = ppc_hash32_bat_prot(*BATu, *BATl);
+                if (check_prot_access_type(*prot, access_type)) {
                     qemu_log_mask(CPU_LOG_MMU, "BAT %d match: r " HWADDR_FMT_plx
-                                  " prot=%c%c\n", i, ctx->raddr,
-                                  ctx->prot & PAGE_READ ? 'R' : '-',
-                                  ctx->prot & PAGE_WRITE ? 'W' : '-');
+                                  " prot=%c%c\n", i, *raddr,
+                                  *prot & PAGE_READ ? 'R' : '-',
+                                  *prot & PAGE_WRITE ? 'W' : '-');
                     ret = 0;
                 } else {
                     ret = -2;
@@ -327,18 +239,18 @@
     if (ret < 0) {
         if (qemu_log_enabled()) {
             qemu_log_mask(CPU_LOG_MMU, "no BAT match for "
-                          TARGET_FMT_lx ":\n", virtual);
+                          TARGET_FMT_lx ":\n", eaddr);
             for (i = 0; i < 4; i++) {
                 BATu = &BATut[i];
                 BATl = &BATlt[i];
-                BEPIu = *BATu & 0xF0000000;
-                BEPIl = *BATu & 0x0FFE0000;
-                bl = (*BATu & 0x00001FFC) << 15;
+                BEPIu = *BATu & BATU32_BEPIU;
+                BEPIl = *BATu & BATU32_BEPIL;
+                bl = (*BATu & BATU32_BL) << 15;
                 qemu_log_mask(CPU_LOG_MMU, "%s: %cBAT%d v " TARGET_FMT_lx
                               " BATu " TARGET_FMT_lx " BATl " TARGET_FMT_lx
                               "\n\t" TARGET_FMT_lx " " TARGET_FMT_lx " "
                               TARGET_FMT_lx "\n", __func__, ifetch ? 'I' : 'D',
-                              i, virtual, *BATu, *BATl, BEPIu, BEPIl, bl);
+                              i, eaddr, *BATu, *BATl, BEPIu, BEPIl, bl);
             }
         }
     }
@@ -346,32 +258,30 @@
     return ret;
 }
 
-static int mmu6xx_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx,
-                                       target_ulong eaddr,
+static int mmu6xx_get_physical_address(CPUPPCState *env, hwaddr *raddr,
+                                       int *prot, target_ulong eaddr,
+                                       hwaddr *hashp, bool *keyp,
                                        MMUAccessType access_type, int type)
 {
     PowerPCCPU *cpu = env_archcpu(env);
     hwaddr hash;
-    target_ulong vsid, sr, pgidx;
-    int ds, target_page_bits;
-    bool pr;
+    target_ulong vsid, sr, pgidx, ptem;
+    bool key, ds, nx;
+    bool pr = FIELD_EX64(env->msr, MSR, PR);
 
     /* First try to find a BAT entry if there are any */
-    if (env->nb_BATs && get_bat_6xx_tlb(env, ctx, eaddr, access_type) == 0) {
+    if (env->nb_BATs &&
+        get_bat_6xx_tlb(env, raddr, prot, eaddr, access_type, pr) == 0) {
         return 0;
     }
 
     /* Perform segment based translation when no BATs matched */
-    pr = FIELD_EX64(env->msr, MSR, PR);
-    ctx->eaddr = eaddr;
-
     sr = env->sr[eaddr >> 28];
-    ctx->key = (((sr & 0x20000000) && pr) ||
-                ((sr & 0x40000000) && !pr)) ? 1 : 0;
-    ds = sr & 0x80000000 ? 1 : 0;
-    ctx->nx = sr & 0x10000000 ? 1 : 0;
-    vsid = sr & 0x00FFFFFF;
-    target_page_bits = TARGET_PAGE_BITS;
+    key = ppc_hash32_key(pr, sr);
+    *keyp = key;
+    ds = sr & SR32_T;
+    nx = sr & SR32_NX;
+    vsid = sr & SR32_VSID;
     qemu_log_mask(CPU_LOG_MMU,
                   "Check segment v=" TARGET_FMT_lx " %d " TARGET_FMT_lx
                   " nip=" TARGET_FMT_lx " lr=" TARGET_FMT_lx
@@ -380,15 +290,15 @@
                   (int)FIELD_EX64(env->msr, MSR, IR),
                   (int)FIELD_EX64(env->msr, MSR, DR), pr ? 1 : 0,
                   access_type == MMU_DATA_STORE, type);
-    pgidx = (eaddr & ~SEGMENT_MASK_256M) >> target_page_bits;
+    pgidx = (eaddr & ~SEGMENT_MASK_256M) >> TARGET_PAGE_BITS;
     hash = vsid ^ pgidx;
-    ctx->ptem = (vsid << 7) | (pgidx >> 10);
+    ptem = (vsid << 7) | (pgidx >> 10); /* Virtual segment ID | API */
 
     qemu_log_mask(CPU_LOG_MMU, "pte segment: key=%d ds %d nx %d vsid "
-                  TARGET_FMT_lx "\n", ctx->key, ds, ctx->nx, vsid);
+                  TARGET_FMT_lx "\n", key, ds, nx, vsid);
     if (!ds) {
         /* Check if instruction fetch is allowed, if needed */
-        if (type == ACCESS_CODE && ctx->nx) {
+        if (type == ACCESS_CODE && nx) {
             qemu_log_mask(CPU_LOG_MMU, "No access allowed\n");
             return -3;
         }
@@ -396,13 +306,11 @@
         qemu_log_mask(CPU_LOG_MMU, "htab_base " HWADDR_FMT_plx " htab_mask "
                       HWADDR_FMT_plx " hash " HWADDR_FMT_plx "\n",
                       ppc_hash32_hpt_base(cpu), ppc_hash32_hpt_mask(cpu), hash);
-        ctx->hash[0] = hash;
-        ctx->hash[1] = ~hash;
+        *hashp = hash;
 
-        /* Initialize real address with an invalid value */
-        ctx->raddr = (hwaddr)-1ULL;
         /* Software TLB search */
-        return ppc6xx_tlb_check(env, ctx, eaddr, access_type);
+        return ppc6xx_tlb_check(env, raddr, prot, eaddr,
+                                access_type, ptem, key, nx);
     }
 
     /* Direct-store segment : absolutely *BUGGY* for now */
@@ -411,15 +319,6 @@
     case ACCESS_INT:
         /* Integer load/store : only access allowed */
         break;
-    case ACCESS_CODE:
-        /* No code fetch is allowed in direct-store areas */
-        return -4;
-    case ACCESS_FLOAT:
-        /* Floating point load/store */
-        return -4;
-    case ACCESS_RES:
-        /* lwarx, ldarx or srwcx. */
-        return -4;
     case ACCESS_CACHE:
         /*
          * dcba, dcbt, dcbtst, dcbf, dcbi, dcbst, dcbz, or icbi
@@ -427,19 +326,17 @@
          * Should make the instruction do no-op.  As it already do
          * no-op, it's quite easy :-)
          */
-        ctx->raddr = eaddr;
+        *raddr = eaddr;
         return 0;
-    case ACCESS_EXT:
-        /* eciwx or ecowx */
-        return -4;
-    default:
-        qemu_log_mask(CPU_LOG_MMU, "ERROR: instruction should not need address"
-                                   " translation\n");
+    case ACCESS_CODE: /* No code fetch is allowed in direct-store areas */
+    case ACCESS_FLOAT: /* Floating point load/store */
+    case ACCESS_RES: /* lwarx, ldarx or srwcx. */
+    case ACCESS_EXT: /* eciwx or ecowx */
         return -4;
     }
-    if ((access_type == MMU_DATA_STORE || ctx->key != 1) &&
-        (access_type == MMU_DATA_LOAD || ctx->key != 0)) {
-        ctx->raddr = eaddr;
+    if ((access_type == MMU_DATA_STORE || !key) &&
+        (access_type == MMU_DATA_LOAD || key)) {
+        *raddr = eaddr;
         return 2;
     }
     return -2;
@@ -589,9 +486,9 @@
     for (i = 0; i < env->nb_BATs; i++) {
         BATu = &BATut[i];
         BATl = &BATlt[i];
-        BEPIu = *BATu & 0xF0000000;
-        BEPIl = *BATu & 0x0FFE0000;
-        bl = (*BATu & 0x00001FFC) << 15;
+        BEPIu = *BATu & BATU32_BEPIU;
+        BEPIl = *BATu & BATU32_BEPIL;
+        bl = (*BATu & BATU32_BL) << 15;
         qemu_printf("%s BAT%d BATu " TARGET_FMT_lx
                     " BATl " TARGET_FMT_lx "\n\t" TARGET_FMT_lx " "
                     TARGET_FMT_lx " " TARGET_FMT_lx "\n",
@@ -777,9 +674,9 @@
 {
     CPUState *cs = CPU(cpu);
     CPUPPCState *env = &cpu->env;
-    mmu_ctx_t ctx;
-    int type;
-    int ret;
+    hwaddr hash = 0; /* init to 0 to avoid used uninit warning */
+    bool key;
+    int type, ret;
 
     if (ppc_real_mode_xlate(cpu, eaddr, access_type, raddrp, psizep, protp)) {
         return true;
@@ -795,13 +692,9 @@
         type = ACCESS_INT;
     }
 
-    ctx.prot = 0;
-    ctx.hash[0] = 0;
-    ctx.hash[1] = 0;
-    ret = mmu6xx_get_physical_address(env, &ctx, eaddr, access_type, type);
+    ret = mmu6xx_get_physical_address(env, raddrp, protp, eaddr, &hash, &key,
+                                      access_type, type);
     if (ret == 0) {
-        *raddrp = ctx.raddr;
-        *protp = ctx.prot;
         *psizep = TARGET_PAGE_BITS;
         return true;
     } else if (!guest_visible) {
@@ -816,7 +709,7 @@
             cs->exception_index = POWERPC_EXCP_IFTLB;
             env->error_code = 1 << 18;
             env->spr[SPR_IMISS] = eaddr;
-            env->spr[SPR_ICMP] = 0x80000000 | ctx.ptem;
+            env->spr[SPR_ICMP] |= 0x80000000;
             goto tlb_miss;
         case -2:
             /* Access rights violation */
@@ -847,13 +740,13 @@
                 env->error_code = 0;
             }
             env->spr[SPR_DMISS] = eaddr;
-            env->spr[SPR_DCMP] = 0x80000000 | ctx.ptem;
+            env->spr[SPR_DCMP] |= 0x80000000;
 tlb_miss:
-            env->error_code |= ctx.key << 19;
+            env->error_code |= key << 19;
             env->spr[SPR_HASH1] = ppc_hash32_hpt_base(cpu) +
-                                  get_pteg_offset32(cpu, ctx.hash[0]);
+                                  get_pteg_offset32(cpu, hash);
             env->spr[SPR_HASH2] = ppc_hash32_hpt_base(cpu) +
-                                  get_pteg_offset32(cpu, ctx.hash[1]);
+                                  get_pteg_offset32(cpu, ~hash);
             break;
         case -2:
             /* Access rights violation */

diff --git a/target/ppc/timebase_helper.c b/target/ppc/timebase_helper.c
index 39d3974..7312032 100644
--- a/target/ppc/timebase_helper.c
+++ b/target/ppc/timebase_helper.c

@@ -62,9 +62,8 @@
 {
     CPUState *cs = env_cpu(env);
     CPUState *ccs;
-    uint32_t nr_threads = cs->nr_threads;
 
-    if (nr_threads == 1 || !(env->flags & POWERPC_FLAG_SMT_1LPAR)) {
+    if (ppc_cpu_lpar_single_threaded(cs)) {
         cpu_ppc_store_purr(env, val);
         return;
     }
@@ -81,9 +80,8 @@
 {
     CPUState *cs = env_cpu(env);
     CPUState *ccs;
-    uint32_t nr_threads = cs->nr_threads;
 
-    if (nr_threads == 1 || !(env->flags & POWERPC_FLAG_SMT_1LPAR)) {
+    if (ppc_cpu_lpar_single_threaded(cs)) {
         cpu_ppc_store_tbl(env, val);
         return;
     }
@@ -98,9 +96,8 @@
 {
     CPUState *cs = env_cpu(env);
     CPUState *ccs;
-    uint32_t nr_threads = cs->nr_threads;
 
-    if (nr_threads == 1 || !(env->flags & POWERPC_FLAG_SMT_1LPAR)) {
+    if (ppc_cpu_lpar_single_threaded(cs)) {
         cpu_ppc_store_tbu(env, val);
         return;
     }
@@ -140,9 +137,8 @@
 {
     CPUState *cs = env_cpu(env);
     CPUState *ccs;
-    uint32_t nr_threads = cs->nr_threads;
 
-    if (nr_threads == 1 || !(env->flags & POWERPC_FLAG_SMT_1LPAR)) {
+    if (ppc_cpu_lpar_single_threaded(cs)) {
         cpu_ppc_store_hdecr(env, val);
         return;
     }
@@ -157,9 +153,8 @@
 {
     CPUState *cs = env_cpu(env);
     CPUState *ccs;
-    uint32_t nr_threads = cs->nr_threads;
 
-    if (nr_threads == 1 || !(env->flags & POWERPC_FLAG_SMT_1LPAR)) {
+    if (ppc_cpu_lpar_single_threaded(cs)) {
         cpu_ppc_store_vtb(env, val);
         return;
     }
@@ -174,9 +169,8 @@
 {
     CPUState *cs = env_cpu(env);
     CPUState *ccs;
-    uint32_t nr_threads = cs->nr_threads;
 
-    if (nr_threads == 1 || !(env->flags & POWERPC_FLAG_SMT_1LPAR)) {
+    if (ppc_cpu_lpar_single_threaded(cs)) {
         cpu_ppc_store_tbu40(env, val);
         return;
     }
@@ -217,7 +211,14 @@
     store_booke_tsr(env, val);
 }
 
-#if defined(TARGET_PPC64)
+#if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY)
+/*
+ * qemu-user breaks with pnv headers, so they go under ifdefs for now.
+ * A clean up may be to move powernv specific registers and helpers into
+ * target/ppc/pnv_helper.c
+ */
+#include "hw/ppc/pnv_core.h"
+#include "hw/ppc/pnv_chip.h"
 /*
  * POWER processor Timebase Facility
  */
@@ -287,7 +288,7 @@
 {
     CPUState *cs = env_cpu(env);
 
-    if (cs->nr_threads == 1) {
+    if (ppc_cpu_core_single_threaded(cs)) {
         env->spr[SPR_TFMR] = val;
     } else {
         CPUState *ccs;
@@ -298,8 +299,25 @@
     }
 }
 
+static PnvCoreTODState *cpu_get_tbst(PowerPCCPU *cpu)
+{
+    PnvCore *pc = pnv_cpu_state(cpu)->pnv_core;
+
+    if (pc->big_core && pc->tod_state.big_core_quirk) {
+        /* Must operate on the even small core */
+        int core_id = CPU_CORE(pc)->core_id;
+        if (core_id & 1) {
+            pc = pc->chip->cores[core_id & ~1];
+        }
+    }
+
+    return &pc->tod_state;
+}
+
 static void tb_state_machine_step(CPUPPCState *env)
 {
+    PowerPCCPU *cpu = env_archcpu(env);
+    PnvCoreTODState *tod_state = cpu_get_tbst(cpu);
     uint64_t tfmr = env->spr[SPR_TFMR];
     unsigned int tbst = tfmr_get_tb_state(tfmr);
 
@@ -307,15 +325,15 @@
         return;
     }
 
-    if (env->pnv_tod_tbst.tb_sync_pulse_timer) {
-        env->pnv_tod_tbst.tb_sync_pulse_timer--;
+    if (tod_state->tb_sync_pulse_timer) {
+        tod_state->tb_sync_pulse_timer--;
     } else {
         tfmr |= TFMR_TB_SYNC_OCCURED;
         write_tfmr(env, tfmr);
     }
 
-    if (env->pnv_tod_tbst.tb_state_timer) {
-        env->pnv_tod_tbst.tb_state_timer--;
+    if (tod_state->tb_state_timer) {
+        tod_state->tb_state_timer--;
         return;
     }
 
@@ -332,20 +350,20 @@
     } else if (tfmr & TFMR_MOVE_CHIP_TOD_TO_TB) {
         if (tbst == TBST_SYNC_WAIT) {
             tfmr = tfmr_new_tb_state(tfmr, TBST_GET_TOD);
-            env->pnv_tod_tbst.tb_state_timer = 3;
+            tod_state->tb_state_timer = 3;
         } else if (tbst == TBST_GET_TOD) {
-            if (env->pnv_tod_tbst.tod_sent_to_tb) {
+            if (tod_state->tod_sent_to_tb) {
                 tfmr = tfmr_new_tb_state(tfmr, TBST_TB_RUNNING);
                 tfmr &= ~TFMR_MOVE_CHIP_TOD_TO_TB;
-                env->pnv_tod_tbst.tb_ready_for_tod = 0;
-                env->pnv_tod_tbst.tod_sent_to_tb = 0;
+                tod_state->tb_ready_for_tod = 0;
+                tod_state->tod_sent_to_tb = 0;
             }
         } else {
             qemu_log_mask(LOG_GUEST_ERROR, "TFMR error: MOVE_CHIP_TOD_TO_TB "
                           "state machine in invalid state 0x%x\n", tbst);
             tfmr = tfmr_new_tb_state(tfmr, TBST_TB_ERROR);
             tfmr |= TFMR_FIRMWARE_CONTROL_ERROR;
-            env->pnv_tod_tbst.tb_ready_for_tod = 0;
+            tod_state->tb_ready_for_tod = 0;
         }
     }
 
@@ -361,6 +379,8 @@
 
 void helper_store_tfmr(CPUPPCState *env, target_ulong val)
 {
+    PowerPCCPU *cpu = env_archcpu(env);
+    PnvCoreTODState *tod_state = cpu_get_tbst(cpu);
     uint64_t tfmr = env->spr[SPR_TFMR];
     uint64_t clear_on_write;
     unsigned int tbst = tfmr_get_tb_state(tfmr);
@@ -384,14 +404,7 @@
      * after the second mfspr.
      */
     tfmr &= ~TFMR_TB_SYNC_OCCURED;
-    env->pnv_tod_tbst.tb_sync_pulse_timer = 1;
-
-    if (ppc_cpu_tir(env_archcpu(env)) != 0 &&
-        (val & (TFMR_LOAD_TOD_MOD | TFMR_MOVE_CHIP_TOD_TO_TB))) {
-        qemu_log_mask(LOG_UNIMP, "TFMR timebase state machine can only be "
-                                 "driven by thread 0\n");
-        goto out;
-    }
+    tod_state->tb_sync_pulse_timer = 1;
 
     if (((tfmr | val) & (TFMR_LOAD_TOD_MOD | TFMR_MOVE_CHIP_TOD_TO_TB)) ==
                         (TFMR_LOAD_TOD_MOD | TFMR_MOVE_CHIP_TOD_TO_TB)) {
@@ -399,7 +412,7 @@
                                        "MOVE_CHIP_TOD_TO_TB both set\n");
         tfmr = tfmr_new_tb_state(tfmr, TBST_TB_ERROR);
         tfmr |= TFMR_FIRMWARE_CONTROL_ERROR;
-        env->pnv_tod_tbst.tb_ready_for_tod = 0;
+        tod_state->tb_ready_for_tod = 0;
         goto out;
     }
 
@@ -413,8 +426,8 @@
         tfmr &= ~TFMR_LOAD_TOD_MOD;
         tfmr &= ~TFMR_MOVE_CHIP_TOD_TO_TB;
         tfmr &= ~TFMR_FIRMWARE_CONTROL_ERROR; /* XXX: should this be cleared? */
-        env->pnv_tod_tbst.tb_ready_for_tod = 0;
-        env->pnv_tod_tbst.tod_sent_to_tb = 0;
+        tod_state->tb_ready_for_tod = 0;
+        tod_state->tod_sent_to_tb = 0;
         goto out;
     }
 
@@ -427,19 +440,19 @@
 
     if (tfmr & TFMR_LOAD_TOD_MOD) {
         /* Wait for an arbitrary 3 mfspr until the next state transition. */
-        env->pnv_tod_tbst.tb_state_timer = 3;
+        tod_state->tb_state_timer = 3;
     } else if (tfmr & TFMR_MOVE_CHIP_TOD_TO_TB) {
         if (tbst == TBST_NOT_SET) {
             tfmr = tfmr_new_tb_state(tfmr, TBST_SYNC_WAIT);
-            env->pnv_tod_tbst.tb_ready_for_tod = 1;
-            env->pnv_tod_tbst.tb_state_timer = 3; /* arbitrary */
+            tod_state->tb_ready_for_tod = 1;
+            tod_state->tb_state_timer = 3; /* arbitrary */
         } else {
             qemu_log_mask(LOG_GUEST_ERROR, "TFMR error: MOVE_CHIP_TOD_TO_TB "
                                            "not in TB not set state 0x%x\n",
                                            tbst);
             tfmr = tfmr_new_tb_state(tfmr, TBST_TB_ERROR);
             tfmr |= TFMR_FIRMWARE_CONTROL_ERROR;
-            env->pnv_tod_tbst.tb_ready_for_tod = 0;
+            tod_state->tb_ready_for_tod = 0;
         }
     }
 

diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index cba943a..71513ba 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c

@@ -2543,6 +2543,7 @@
                       (ctx->opcode & 0x03FF0000) | POWERPC_EXCP_ALIGN_LE);
 }
 
+/* EA <- {(ra == 0) ? 0 : GPR[ra]} + displ */
 static TCGv do_ea_calc(DisasContext *ctx, int ra, TCGv displ)
 {
     TCGv ea = tcg_temp_new();
@@ -2557,6 +2558,22 @@
     return ea;
 }
 
+#if defined(TARGET_PPC64)
+/* EA <- (ra == 0) ? 0 : GPR[ra] */
+static TCGv do_ea_calc_ra(DisasContext *ctx, int ra)
+{
+    TCGv EA = tcg_temp_new();
+    if (!ra) {
+        tcg_gen_movi_tl(EA, 0);
+    } else if (NARROW_MODE(ctx)) {
+        tcg_gen_ext32u_tl(EA, cpu_gpr[ra]);
+    } else {
+        tcg_gen_mov_tl(EA, cpu_gpr[ra]);
+    }
+    return EA;
+}
+#endif
+
 /***                             Integer load                              ***/
 #define DEF_MEMOP(op) ((op) | ctx->default_tcg_memop_mask)
 #define BSWAP_MEMOP(op) ((op) | (ctx->default_tcg_memop_mask ^ MO_BSWAP))
@@ -5541,16 +5558,6 @@
     tcg_gen_st_i64(tcg_constant_i64(0), tcg_env, vsr64_offset(regno, false));
 }
 
-static inline void get_avr64(TCGv_i64 dst, int regno, bool high)
-{
-    tcg_gen_ld_i64(dst, tcg_env, avr64_offset(regno, high));
-}
-
-static inline void set_avr64(int regno, TCGv_i64 src, bool high)
-{
-    tcg_gen_st_i64(src, tcg_env, avr64_offset(regno, high));
-}
-
 /*
  * Helpers for decodetree used by !function for decoding arguments.
  */

diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc
index 8084af7..70d0ad2 100644
--- a/target/ppc/translate/vmx-impl.c.inc
+++ b/target/ppc/translate/vmx-impl.c.inc

@@ -14,25 +14,39 @@
     return r;
 }
 
+static inline void get_avr64(TCGv_i64 dst, int regno, bool high)
+{
+    tcg_gen_ld_i64(dst, tcg_env, avr64_offset(regno, high));
+}
+
+static inline void set_avr64(int regno, TCGv_i64 src, bool high)
+{
+    tcg_gen_st_i64(src, tcg_env, avr64_offset(regno, high));
+}
+
+static inline void get_avr_full(TCGv_i128 dst, int regno)
+{
+    tcg_gen_ld_i128(dst, tcg_env, avr_full_offset(regno));
+}
+
+static inline void set_avr_full(int regno, TCGv_i128 src)
+{
+    tcg_gen_st_i128(src, tcg_env, avr_full_offset(regno));
+}
+
 static bool trans_LVX(DisasContext *ctx, arg_X *a)
 {
     TCGv EA;
-    TCGv_i64 avr;
+    TCGv_i128 avr;
     REQUIRE_INSNS_FLAGS(ctx, ALTIVEC);
     REQUIRE_VECTOR(ctx);
     gen_set_access_type(ctx, ACCESS_INT);
-    avr = tcg_temp_new_i64();
+    avr = tcg_temp_new_i128();
     EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]);
     tcg_gen_andi_tl(EA, EA, ~0xf);
-    /*
-     * We only need to swap high and low halves. gen_qemu_ld64_i64
-     * does necessary 64-bit byteswap already.
-     */
-    gen_qemu_ld64_i64(ctx, avr, EA);
-    set_avr64(a->rt, avr, !ctx->le_mode);
-    tcg_gen_addi_tl(EA, EA, 8);
-    gen_qemu_ld64_i64(ctx, avr, EA);
-    set_avr64(a->rt, avr, ctx->le_mode);
+    tcg_gen_qemu_ld_i128(avr, EA, ctx->mem_idx,
+                         DEF_MEMOP(MO_128 | MO_ATOM_IFALIGN_PAIR));
+    set_avr_full(a->rt, avr);
     return true;
 }
 
@@ -46,22 +60,16 @@
 static bool trans_STVX(DisasContext *ctx, arg_STVX *a)
 {
     TCGv EA;
-    TCGv_i64 avr;
+    TCGv_i128 avr;
     REQUIRE_INSNS_FLAGS(ctx, ALTIVEC);
     REQUIRE_VECTOR(ctx);
     gen_set_access_type(ctx, ACCESS_INT);
-    avr = tcg_temp_new_i64();
+    avr = tcg_temp_new_i128();
     EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]);
     tcg_gen_andi_tl(EA, EA, ~0xf);
-    /*
-     * We only need to swap high and low halves. gen_qemu_st64_i64
-     * does necessary 64-bit byteswap already.
-     */
-    get_avr64(avr, a->rt, !ctx->le_mode);
-    gen_qemu_st64_i64(ctx, avr, EA);
-    tcg_gen_addi_tl(EA, EA, 8);
-    get_avr64(avr, a->rt, ctx->le_mode);
-    gen_qemu_st64_i64(ctx, avr, EA);
+    get_avr_full(avr, a->rt);
+    tcg_gen_qemu_st_i128(avr, EA, ctx->mem_idx,
+                         DEF_MEMOP(MO_128 | MO_ATOM_IFALIGN_PAIR));
     return true;
 }
 
@@ -1047,58 +1055,6 @@
 TRANS(VRLQNM, do_vector_rotl_quad, true, false)
 TRANS(VRLQMI, do_vector_rotl_quad, false, true)
 
-#define GEN_VXFORM_SAT(NAME, VECE, NORM, SAT, OPC2, OPC3)               \
-static void glue(glue(gen_, NAME), _vec)(unsigned vece, TCGv_vec t,     \
-                                         TCGv_vec sat, TCGv_vec a,      \
-                                         TCGv_vec b)                    \
-{                                                                       \
-    TCGv_vec x = tcg_temp_new_vec_matching(t);                          \
-    glue(glue(tcg_gen_, NORM), _vec)(VECE, x, a, b);                    \
-    glue(glue(tcg_gen_, SAT), _vec)(VECE, t, a, b);                     \
-    tcg_gen_cmp_vec(TCG_COND_NE, VECE, x, x, t);                        \
-    tcg_gen_or_vec(VECE, sat, sat, x);                                  \
-}                                                                       \
-static void glue(gen_, NAME)(DisasContext *ctx)                         \
-{                                                                       \
-    static const TCGOpcode vecop_list[] = {                             \
-        glue(glue(INDEX_op_, NORM), _vec),                              \
-        glue(glue(INDEX_op_, SAT), _vec),                               \
-        INDEX_op_cmp_vec, 0                                             \
-    };                                                                  \
-    static const GVecGen4 g = {                                         \
-        .fniv = glue(glue(gen_, NAME), _vec),                           \
-        .fno = glue(gen_helper_, NAME),                                 \
-        .opt_opc = vecop_list,                                          \
-        .write_aofs = true,                                             \
-        .vece = VECE,                                                   \
-    };                                                                  \
-    if (unlikely(!ctx->altivec_enabled)) {                              \
-        gen_exception(ctx, POWERPC_EXCP_VPU);                           \
-        return;                                                         \
-    }                                                                   \
-    tcg_gen_gvec_4(avr_full_offset(rD(ctx->opcode)),                    \
-                   offsetof(CPUPPCState, vscr_sat),                     \
-                   avr_full_offset(rA(ctx->opcode)),                    \
-                   avr_full_offset(rB(ctx->opcode)),                    \
-                   16, 16, &g);                                         \
-}
-
-GEN_VXFORM_SAT(vaddubs, MO_8, add, usadd, 0, 8);
-GEN_VXFORM_DUAL_EXT(vaddubs, PPC_ALTIVEC, PPC_NONE, 0,       \
-                    vmul10uq, PPC_NONE, PPC2_ISA300, 0x0000F800)
-GEN_VXFORM_SAT(vadduhs, MO_16, add, usadd, 0, 9);
-GEN_VXFORM_DUAL(vadduhs, PPC_ALTIVEC, PPC_NONE, \
-                vmul10euq, PPC_NONE, PPC2_ISA300)
-GEN_VXFORM_SAT(vadduws, MO_32, add, usadd, 0, 10);
-GEN_VXFORM_SAT(vaddsbs, MO_8, add, ssadd, 0, 12);
-GEN_VXFORM_SAT(vaddshs, MO_16, add, ssadd, 0, 13);
-GEN_VXFORM_SAT(vaddsws, MO_32, add, ssadd, 0, 14);
-GEN_VXFORM_SAT(vsububs, MO_8, sub, ussub, 0, 24);
-GEN_VXFORM_SAT(vsubuhs, MO_16, sub, ussub, 0, 25);
-GEN_VXFORM_SAT(vsubuws, MO_32, sub, ussub, 0, 26);
-GEN_VXFORM_SAT(vsubsbs, MO_8, sub, sssub, 0, 28);
-GEN_VXFORM_SAT(vsubshs, MO_16, sub, sssub, 0, 29);
-GEN_VXFORM_SAT(vsubsws, MO_32, sub, sssub, 0, 30);
 GEN_VXFORM_TRANS(vsl, 2, 7);
 GEN_VXFORM_TRANS(vsr, 2, 11);
 GEN_VXFORM_ENV(vpkuhum, 7, 0);
@@ -2641,26 +2597,14 @@
     }
 }
 
-
-GEN_VXFORM_DUAL(vsubsws, PPC_ALTIVEC, PPC_NONE, \
-                xpnd04_2, PPC_NONE, PPC2_ISA300)
-
 GEN_VXFORM_DUAL(vsububm, PPC_ALTIVEC, PPC_NONE, \
                 bcdadd, PPC_NONE, PPC2_ALTIVEC_207)
-GEN_VXFORM_DUAL(vsububs, PPC_ALTIVEC, PPC_NONE, \
-                bcdadd, PPC_NONE, PPC2_ALTIVEC_207)
 GEN_VXFORM_DUAL(vsubuhm, PPC_ALTIVEC, PPC_NONE, \
                 bcdsub, PPC_NONE, PPC2_ALTIVEC_207)
-GEN_VXFORM_DUAL(vsubuhs, PPC_ALTIVEC, PPC_NONE, \
-                bcdsub, PPC_NONE, PPC2_ALTIVEC_207)
-GEN_VXFORM_DUAL(vaddshs, PPC_ALTIVEC, PPC_NONE, \
-                bcdcpsgn, PPC_NONE, PPC2_ISA300)
 GEN_VXFORM_DUAL(vsubudm, PPC2_ALTIVEC_207, PPC_NONE, \
                 bcds, PPC_NONE, PPC2_ISA300)
 GEN_VXFORM_DUAL(vsubuwm, PPC_ALTIVEC, PPC_NONE, \
                 bcdus, PPC_NONE, PPC2_ISA300)
-GEN_VXFORM_DUAL(vsubsbs, PPC_ALTIVEC, PPC_NONE, \
-                bcdtrunc, PPC_NONE, PPC2_ISA300)
 
 static void gen_vsbox(DisasContext *ctx)
 {
@@ -2937,6 +2881,180 @@
 TRANS(VSUBCUW, do_vx_vaddsubcuw, 0)
 TRANS(VADDCUW, do_vx_vaddsubcuw, 1)
 
+/* Integer Add/Sub Saturate Instructions */
+static inline void do_vadd_vsub_sat
+(
+    unsigned vece, TCGv_vec t, TCGv_vec qc, TCGv_vec a, TCGv_vec b,
+    void (*norm_op)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec),
+    void (*sat_op)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec))
+{
+    TCGv_vec x = tcg_temp_new_vec_matching(t);
+    norm_op(vece, x, a, b);
+    sat_op(vece, t, a, b);
+    tcg_gen_xor_vec(vece, x, x, t);
+    tcg_gen_or_vec(vece, qc, qc, x);
+}
+
+static void gen_vadd_sat_u(unsigned vece, TCGv_vec t, TCGv_vec sat,
+                           TCGv_vec a, TCGv_vec b)
+{
+    do_vadd_vsub_sat(vece, t, sat, a, b, tcg_gen_add_vec, tcg_gen_usadd_vec);
+}
+
+static void gen_vadd_sat_s(unsigned vece, TCGv_vec t, TCGv_vec sat,
+                           TCGv_vec a, TCGv_vec b)
+{
+    do_vadd_vsub_sat(vece, t, sat, a, b, tcg_gen_add_vec, tcg_gen_ssadd_vec);
+}
+
+static void gen_vsub_sat_u(unsigned vece, TCGv_vec t, TCGv_vec sat,
+                           TCGv_vec a, TCGv_vec b)
+{
+    do_vadd_vsub_sat(vece, t, sat, a, b, tcg_gen_sub_vec, tcg_gen_ussub_vec);
+}
+
+static void gen_vsub_sat_s(unsigned vece, TCGv_vec t, TCGv_vec sat,
+                           TCGv_vec a, TCGv_vec b)
+{
+    do_vadd_vsub_sat(vece, t, sat, a, b, tcg_gen_sub_vec, tcg_gen_sssub_vec);
+}
+
+/*
+ * Signed/Unsigned add/sub helper ops for byte/halfword/word
+ * GVecGen4 struct variants.
+ */
+static const TCGOpcode vecop_list_sub_u[] = {
+    INDEX_op_sub_vec, INDEX_op_ussub_vec, 0
+};
+static const TCGOpcode vecop_list_sub_s[] = {
+    INDEX_op_sub_vec, INDEX_op_sssub_vec, 0
+};
+static const TCGOpcode vecop_list_add_u[] = {
+    INDEX_op_add_vec, INDEX_op_usadd_vec, 0
+};
+static const TCGOpcode vecop_list_add_s[] = {
+    INDEX_op_add_vec, INDEX_op_ssadd_vec, 0
+};
+
+static const GVecGen4 op_vsububs = {
+    .fniv = gen_vsub_sat_u,
+    .fno = gen_helper_VSUBUBS,
+    .opt_opc = vecop_list_sub_u,
+    .write_aofs = true,
+    .vece = MO_8
+};
+
+static const GVecGen4 op_vaddubs = {
+    .fniv = gen_vadd_sat_u,
+    .fno = gen_helper_VADDUBS,
+    .opt_opc = vecop_list_add_u,
+    .write_aofs = true,
+    .vece = MO_8
+};
+
+static const GVecGen4 op_vsubuhs = {
+    .fniv = gen_vsub_sat_u,
+    .fno = gen_helper_VSUBUHS,
+    .opt_opc = vecop_list_sub_u,
+    .write_aofs = true,
+    .vece = MO_16
+};
+
+static const GVecGen4 op_vadduhs = {
+    .fniv = gen_vadd_sat_u,
+    .fno = gen_helper_VADDUHS,
+    .opt_opc = vecop_list_add_u,
+    .write_aofs = true,
+    .vece = MO_16
+};
+
+static const GVecGen4 op_vsubuws = {
+    .fniv = gen_vsub_sat_u,
+    .fno = gen_helper_VSUBUWS,
+    .opt_opc = vecop_list_sub_u,
+    .write_aofs = true,
+    .vece = MO_32
+};
+
+static const GVecGen4 op_vadduws = {
+    .fniv = gen_vadd_sat_u,
+    .fno = gen_helper_VADDUWS,
+    .opt_opc = vecop_list_add_u,
+    .write_aofs = true,
+    .vece = MO_32
+};
+
+static const GVecGen4 op_vsubsbs = {
+    .fniv = gen_vsub_sat_s,
+    .fno = gen_helper_VSUBSBS,
+    .opt_opc = vecop_list_sub_s,
+    .write_aofs = true,
+    .vece = MO_8
+};
+
+static const GVecGen4 op_vaddsbs = {
+    .fniv = gen_vadd_sat_s,
+    .fno = gen_helper_VADDSBS,
+    .opt_opc = vecop_list_add_s,
+    .write_aofs = true,
+    .vece = MO_8
+};
+
+static const GVecGen4 op_vsubshs = {
+    .fniv = gen_vsub_sat_s,
+    .fno = gen_helper_VSUBSHS,
+    .opt_opc = vecop_list_sub_s,
+    .write_aofs = true,
+    .vece = MO_16
+};
+
+static const GVecGen4 op_vaddshs = {
+    .fniv = gen_vadd_sat_s,
+    .fno = gen_helper_VADDSHS,
+    .opt_opc = vecop_list_add_s,
+    .write_aofs = true,
+    .vece = MO_16
+};
+
+static const GVecGen4 op_vsubsws = {
+    .fniv = gen_vsub_sat_s,
+    .fno = gen_helper_VSUBSWS,
+    .opt_opc = vecop_list_sub_s,
+    .write_aofs = true,
+    .vece = MO_32
+};
+
+static const GVecGen4 op_vaddsws = {
+    .fniv = gen_vadd_sat_s,
+    .fno = gen_helper_VADDSWS,
+    .opt_opc = vecop_list_add_s,
+    .write_aofs = true,
+    .vece = MO_32
+};
+
+static bool do_vx_vadd_vsub_sat(DisasContext *ctx, arg_VX *a, const GVecGen4 *op)
+{
+    REQUIRE_VECTOR(ctx);
+    tcg_gen_gvec_4(avr_full_offset(a->vrt), offsetof(CPUPPCState, vscr_sat),
+                   avr_full_offset(a->vra), avr_full_offset(a->vrb),
+                   16, 16, op);
+
+    return true;
+}
+
+TRANS_FLAGS(ALTIVEC, VSUBUBS, do_vx_vadd_vsub_sat, &op_vsububs)
+TRANS_FLAGS(ALTIVEC, VSUBUHS, do_vx_vadd_vsub_sat, &op_vsubuhs)
+TRANS_FLAGS(ALTIVEC, VSUBUWS, do_vx_vadd_vsub_sat, &op_vsubuws)
+TRANS_FLAGS(ALTIVEC, VSUBSBS, do_vx_vadd_vsub_sat, &op_vsubsbs)
+TRANS_FLAGS(ALTIVEC, VSUBSHS, do_vx_vadd_vsub_sat, &op_vsubshs)
+TRANS_FLAGS(ALTIVEC, VSUBSWS, do_vx_vadd_vsub_sat, &op_vsubsws)
+TRANS_FLAGS(ALTIVEC, VADDUBS, do_vx_vadd_vsub_sat, &op_vaddubs)
+TRANS_FLAGS(ALTIVEC, VADDUHS, do_vx_vadd_vsub_sat, &op_vadduhs)
+TRANS_FLAGS(ALTIVEC, VADDUWS, do_vx_vadd_vsub_sat, &op_vadduws)
+TRANS_FLAGS(ALTIVEC, VADDSBS, do_vx_vadd_vsub_sat, &op_vaddsbs)
+TRANS_FLAGS(ALTIVEC, VADDSHS, do_vx_vadd_vsub_sat, &op_vaddshs)
+TRANS_FLAGS(ALTIVEC, VADDSWS, do_vx_vadd_vsub_sat, &op_vaddsws)
+
 static bool do_vx_vmuleo(DisasContext *ctx, arg_VX *a, bool even,
                          void (*gen_mul)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
 {

diff --git a/target/ppc/translate/vmx-ops.c.inc b/target/ppc/translate/vmx-ops.c.inc
index 7bb11b0..e28958a 100644
--- a/target/ppc/translate/vmx-ops.c.inc
+++ b/target/ppc/translate/vmx-ops.c.inc

@@ -54,18 +54,13 @@
 GEN_VXFORM(xpnd04_1, 0, 22),
 GEN_VXFORM_300(bcdsr, 0, 23),
 GEN_VXFORM_300(bcdsr, 0, 31),
-GEN_VXFORM_DUAL(vaddubs, vmul10uq, 0, 8, PPC_ALTIVEC, PPC_NONE),
-GEN_VXFORM_DUAL(vadduhs, vmul10euq, 0, 9, PPC_ALTIVEC, PPC_NONE),
-GEN_VXFORM(vadduws, 0, 10),
-GEN_VXFORM(vaddsbs, 0, 12),
-GEN_VXFORM_DUAL(vaddshs, bcdcpsgn, 0, 13, PPC_ALTIVEC, PPC_NONE),
-GEN_VXFORM(vaddsws, 0, 14),
-GEN_VXFORM_DUAL(vsububs, bcdadd, 0, 24, PPC_ALTIVEC, PPC_NONE),
-GEN_VXFORM_DUAL(vsubuhs, bcdsub, 0, 25, PPC_ALTIVEC, PPC_NONE),
-GEN_VXFORM(vsubuws, 0, 26),
-GEN_VXFORM_DUAL(vsubsbs, bcdtrunc, 0, 28, PPC_ALTIVEC, PPC2_ISA300),
-GEN_VXFORM(vsubshs, 0, 29),
-GEN_VXFORM_DUAL(vsubsws, xpnd04_2, 0, 30, PPC_ALTIVEC, PPC_NONE),
+GEN_VXFORM_300_EXT(vmul10uq, 0, 8, 0x0000F800),
+GEN_VXFORM_300(vmul10euq, 0, 9),
+GEN_VXFORM_300(bcdcpsgn, 0, 13),
+GEN_VXFORM_207(bcdadd, 0, 24),
+GEN_VXFORM_207(bcdsub, 0, 25),
+GEN_VXFORM_300(bcdtrunc, 0, 28),
+GEN_VXFORM_300(xpnd04_2, 0, 30),
 GEN_VXFORM_300(bcdtrunc, 0, 20),
 GEN_VXFORM_300(bcdutrunc, 0, 21),
 GEN_VXFORM(vsl, 2, 7),

diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc
index 0266f09..40a87dd 100644
--- a/target/ppc/translate/vsx-impl.c.inc
+++ b/target/ppc/translate/vsx-impl.c.inc

@@ -10,6 +10,16 @@
     tcg_gen_st_i64(src, tcg_env, vsr64_offset(n, high));
 }
 
+static inline void get_vsr_full(TCGv_i128 dst, int reg)
+{
+    tcg_gen_ld_i128(dst, tcg_env, vsr_full_offset(reg));
+}
+
+static inline void set_vsr_full(int reg, TCGv_i128 src)
+{
+    tcg_gen_st_i128(src, tcg_env, vsr_full_offset(reg));
+}
+
 static inline TCGv_ptr gen_vsr_ptr(int reg)
 {
     TCGv_ptr r = tcg_temp_new_ptr();
@@ -24,66 +34,59 @@
     return r;
 }
 
-#define VSX_LOAD_SCALAR(name, operation)                      \
-static void gen_##name(DisasContext *ctx)                     \
-{                                                             \
-    TCGv EA;                                                  \
-    TCGv_i64 t0;                                              \
-    if (unlikely(!ctx->vsx_enabled)) {                        \
-        gen_exception(ctx, POWERPC_EXCP_VSXU);                \
-        return;                                               \
-    }                                                         \
-    t0 = tcg_temp_new_i64();                                  \
-    gen_set_access_type(ctx, ACCESS_INT);                     \
-    EA = tcg_temp_new();                                      \
-    gen_addr_reg_index(ctx, EA);                              \
-    gen_qemu_##operation(ctx, t0, EA);                        \
-    set_cpu_vsr(xT(ctx->opcode), t0, true);                   \
-    /* NOTE: cpu_vsrl is undefined */                         \
-}
-
-VSX_LOAD_SCALAR(lxsdx, ld64_i64)
-VSX_LOAD_SCALAR(lxsiwax, ld32s_i64)
-VSX_LOAD_SCALAR(lxsibzx, ld8u_i64)
-VSX_LOAD_SCALAR(lxsihzx, ld16u_i64)
-VSX_LOAD_SCALAR(lxsiwzx, ld32u_i64)
-VSX_LOAD_SCALAR(lxsspx, ld32fs)
-
-static void gen_lxvd2x(DisasContext *ctx)
+static bool do_lxs(DisasContext *ctx, arg_X *a,
+                   void (*op)(DisasContext *, TCGv_i64, TCGv))
 {
     TCGv EA;
     TCGv_i64 t0;
-    if (unlikely(!ctx->vsx_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_VSXU);
-        return;
-    }
+    REQUIRE_VSX(ctx);
     t0 = tcg_temp_new_i64();
     gen_set_access_type(ctx, ACCESS_INT);
-    EA = tcg_temp_new();
-    gen_addr_reg_index(ctx, EA);
-    gen_qemu_ld64_i64(ctx, t0, EA);
-    set_cpu_vsr(xT(ctx->opcode), t0, true);
-    tcg_gen_addi_tl(EA, EA, 8);
-    gen_qemu_ld64_i64(ctx, t0, EA);
-    set_cpu_vsr(xT(ctx->opcode), t0, false);
+    EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]);
+    op(ctx, t0, EA);
+    set_cpu_vsr(a->rt, t0, true);
+    /* NOTE: cpu_vsrl is undefined */
+    return true;
 }
 
-static void gen_lxvw4x(DisasContext *ctx)
+TRANS_FLAGS2(VSX, LXSDX, do_lxs, gen_qemu_ld64_i64);
+TRANS_FLAGS2(VSX207, LXSIWAX, do_lxs, gen_qemu_ld32s_i64);
+TRANS_FLAGS2(ISA300, LXSIBZX, do_lxs, gen_qemu_ld8u_i64);
+TRANS_FLAGS2(ISA300, LXSIHZX, do_lxs, gen_qemu_ld16u_i64);
+TRANS_FLAGS2(VSX207, LXSIWZX, do_lxs, gen_qemu_ld32u_i64);
+TRANS_FLAGS2(VSX207, LXSSPX, do_lxs, gen_qemu_ld32fs);
+
+static bool trans_LXVD2X(DisasContext *ctx, arg_LXVD2X *a)
 {
     TCGv EA;
-    TCGv_i64 xth;
-    TCGv_i64 xtl;
-    if (unlikely(!ctx->vsx_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_VSXU);
-        return;
-    }
+    TCGv_i64 t0;
+
+    REQUIRE_VSX(ctx);
+    REQUIRE_INSNS_FLAGS2(ctx, VSX);
+
+    t0 = tcg_temp_new_i64();
+    gen_set_access_type(ctx, ACCESS_INT);
+    EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]);
+    gen_qemu_ld64_i64(ctx, t0, EA);
+    set_cpu_vsr(a->rt, t0, true);
+    tcg_gen_addi_tl(EA, EA, 8);
+    gen_qemu_ld64_i64(ctx, t0, EA);
+    set_cpu_vsr(a->rt, t0, false);
+    return true;
+}
+
+static bool trans_LXVW4X(DisasContext *ctx, arg_LXVW4X *a)
+{
+    TCGv EA;
+    TCGv_i64 xth, xtl;
+
+    REQUIRE_VSX(ctx);
+    REQUIRE_INSNS_FLAGS2(ctx, VSX);
+
     xth = tcg_temp_new_i64();
     xtl = tcg_temp_new_i64();
-
     gen_set_access_type(ctx, ACCESS_INT);
-    EA = tcg_temp_new();
-
-    gen_addr_reg_index(ctx, EA);
+    EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]);
     if (ctx->le_mode) {
         TCGv_i64 t0 = tcg_temp_new_i64();
         TCGv_i64 t1 = tcg_temp_new_i64();
@@ -100,55 +103,45 @@
         tcg_gen_addi_tl(EA, EA, 8);
         tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEUQ);
     }
-    set_cpu_vsr(xT(ctx->opcode), xth, true);
-    set_cpu_vsr(xT(ctx->opcode), xtl, false);
+    set_cpu_vsr(a->rt, xth, true);
+    set_cpu_vsr(a->rt, xtl, false);
+    return true;
 }
 
-static void gen_lxvwsx(DisasContext *ctx)
+static bool trans_LXVWSX(DisasContext *ctx, arg_LXVWSX *a)
 {
     TCGv EA;
     TCGv_i32 data;
 
-    if (xT(ctx->opcode) < 32) {
-        if (unlikely(!ctx->vsx_enabled)) {
-            gen_exception(ctx, POWERPC_EXCP_VSXU);
-            return;
-        }
+    if (a->rt < 32) {
+        REQUIRE_VSX(ctx);
     } else {
-        if (unlikely(!ctx->altivec_enabled)) {
-            gen_exception(ctx, POWERPC_EXCP_VPU);
-            return;
-        }
+        REQUIRE_VECTOR(ctx);
     }
+    REQUIRE_INSNS_FLAGS2(ctx, ISA300);
 
     gen_set_access_type(ctx, ACCESS_INT);
-    EA = tcg_temp_new();
-
-    gen_addr_reg_index(ctx, EA);
-
+    EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]);
     data = tcg_temp_new_i32();
     tcg_gen_qemu_ld_i32(data, EA, ctx->mem_idx, DEF_MEMOP(MO_UL));
-    tcg_gen_gvec_dup_i32(MO_UL, vsr_full_offset(xT(ctx->opcode)), 16, 16, data);
+    tcg_gen_gvec_dup_i32(MO_UL, vsr_full_offset(a->rt), 16, 16, data);
+    return true;
 }
 
-static void gen_lxvdsx(DisasContext *ctx)
+static bool trans_LXVDSX(DisasContext *ctx, arg_LXVDSX *a)
 {
     TCGv EA;
     TCGv_i64 data;
 
-    if (unlikely(!ctx->vsx_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_VSXU);
-        return;
-    }
+    REQUIRE_VSX(ctx);
+    REQUIRE_INSNS_FLAGS2(ctx, VSX);
 
     gen_set_access_type(ctx, ACCESS_INT);
-    EA = tcg_temp_new();
-
-    gen_addr_reg_index(ctx, EA);
-
+    EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]);
     data = tcg_temp_new_i64();
     tcg_gen_qemu_ld_i64(data, EA, ctx->mem_idx, DEF_MEMOP(MO_UQ));
-    tcg_gen_gvec_dup_i64(MO_UQ, vsr_full_offset(xT(ctx->opcode)), 16, 16, data);
+    tcg_gen_gvec_dup_i64(MO_UQ, vsr_full_offset(a->rt), 16, 16, data);
+    return true;
 }
 
 static void gen_bswap16x8(TCGv_i64 outh, TCGv_i64 outl,
@@ -187,145 +180,166 @@
     tcg_gen_deposit_i64(outl, outl, lo, 32, 32);
 }
 
-static void gen_lxvh8x(DisasContext *ctx)
+static bool trans_LXVH8X(DisasContext *ctx, arg_LXVH8X *a)
 {
     TCGv EA;
-    TCGv_i64 xth;
-    TCGv_i64 xtl;
+    TCGv_i64 xth, xtl;
 
-    if (unlikely(!ctx->vsx_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_VSXU);
-        return;
-    }
+    REQUIRE_VSX(ctx);
+    REQUIRE_INSNS_FLAGS2(ctx, ISA300);
+
     xth = tcg_temp_new_i64();
     xtl = tcg_temp_new_i64();
     gen_set_access_type(ctx, ACCESS_INT);
-
-    EA = tcg_temp_new();
-    gen_addr_reg_index(ctx, EA);
+    EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]);
     tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_BEUQ);
     tcg_gen_addi_tl(EA, EA, 8);
     tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEUQ);
     if (ctx->le_mode) {
         gen_bswap16x8(xth, xtl, xth, xtl);
     }
-    set_cpu_vsr(xT(ctx->opcode), xth, true);
-    set_cpu_vsr(xT(ctx->opcode), xtl, false);
+    set_cpu_vsr(a->rt, xth, true);
+    set_cpu_vsr(a->rt, xtl, false);
+    return true;
 }
 
-static void gen_lxvb16x(DisasContext *ctx)
+static bool trans_LXVB16X(DisasContext *ctx, arg_LXVB16X *a)
 {
     TCGv EA;
-    TCGv_i64 xth;
-    TCGv_i64 xtl;
+    TCGv_i128 data;
 
-    if (unlikely(!ctx->vsx_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_VSXU);
-        return;
-    }
-    xth = tcg_temp_new_i64();
-    xtl = tcg_temp_new_i64();
+    REQUIRE_VSX(ctx);
+    REQUIRE_INSNS_FLAGS2(ctx, ISA300);
+
+    data = tcg_temp_new_i128();
     gen_set_access_type(ctx, ACCESS_INT);
-    EA = tcg_temp_new();
-    gen_addr_reg_index(ctx, EA);
-    tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_BEUQ);
-    tcg_gen_addi_tl(EA, EA, 8);
-    tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEUQ);
-    set_cpu_vsr(xT(ctx->opcode), xth, true);
-    set_cpu_vsr(xT(ctx->opcode), xtl, false);
+    EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]);
+    tcg_gen_qemu_ld_i128(data, EA, ctx->mem_idx,
+                         MO_BE | MO_128 | MO_ATOM_IFALIGN_PAIR);
+    set_vsr_full(a->rt, data);
+    return true;
 }
 
-#ifdef TARGET_PPC64
-#define VSX_VECTOR_LOAD_STORE_LENGTH(name)                         \
-static void gen_##name(DisasContext *ctx)                          \
-{                                                                  \
-    TCGv EA;                                                       \
-    TCGv_ptr xt;                                                   \
-                                                                   \
-    if (xT(ctx->opcode) < 32) {                                    \
-        if (unlikely(!ctx->vsx_enabled)) {                         \
-            gen_exception(ctx, POWERPC_EXCP_VSXU);                 \
-            return;                                                \
-        }                                                          \
-    } else {                                                       \
-        if (unlikely(!ctx->altivec_enabled)) {                     \
-            gen_exception(ctx, POWERPC_EXCP_VPU);                  \
-            return;                                                \
-        }                                                          \
-    }                                                              \
-    EA = tcg_temp_new();                                           \
-    xt = gen_vsr_ptr(xT(ctx->opcode));                             \
-    gen_set_access_type(ctx, ACCESS_INT);                          \
-    gen_addr_register(ctx, EA);                                    \
-    gen_helper_##name(tcg_env, EA, xt, cpu_gpr[rB(ctx->opcode)]);  \
+#if defined(TARGET_PPC64)
+static bool do_ld_st_vl(DisasContext *ctx, arg_X *a,
+                        void (*helper)(TCGv_ptr, TCGv, TCGv_ptr, TCGv))
+{
+    TCGv EA;
+    TCGv_ptr xt;
+    if (a->rt < 32) {
+        REQUIRE_VSX(ctx);
+    } else {
+        REQUIRE_VECTOR(ctx);
+    }
+    xt = gen_vsr_ptr(a->rt);
+    gen_set_access_type(ctx, ACCESS_INT);
+    EA = do_ea_calc_ra(ctx, a->ra);
+    helper(tcg_env, EA, xt, cpu_gpr[a->rb]);
+    return true;
 }
-
-VSX_VECTOR_LOAD_STORE_LENGTH(lxvl)
-VSX_VECTOR_LOAD_STORE_LENGTH(lxvll)
-VSX_VECTOR_LOAD_STORE_LENGTH(stxvl)
-VSX_VECTOR_LOAD_STORE_LENGTH(stxvll)
 #endif
 
-#define VSX_STORE_SCALAR(name, operation)                     \
-static void gen_##name(DisasContext *ctx)                     \
-{                                                             \
-    TCGv EA;                                                  \
-    TCGv_i64 t0;                                              \
-    if (unlikely(!ctx->vsx_enabled)) {                        \
-        gen_exception(ctx, POWERPC_EXCP_VSXU);                \
-        return;                                               \
-    }                                                         \
-    t0 = tcg_temp_new_i64();                                  \
-    gen_set_access_type(ctx, ACCESS_INT);                     \
-    EA = tcg_temp_new();                                      \
-    gen_addr_reg_index(ctx, EA);                              \
-    get_cpu_vsr(t0, xS(ctx->opcode), true);                   \
-    gen_qemu_##operation(ctx, t0, EA);                        \
+static bool trans_LXVL(DisasContext *ctx, arg_LXVL *a)
+{
+    REQUIRE_64BIT(ctx);
+    REQUIRE_INSNS_FLAGS2(ctx, ISA300);
+#if defined(TARGET_PPC64)
+    return do_ld_st_vl(ctx, a, gen_helper_LXVL);
+#else
+    qemu_build_not_reached();
+#endif
+    return true;
 }
 
-VSX_STORE_SCALAR(stxsdx, st64_i64)
+static bool trans_LXVLL(DisasContext *ctx, arg_LXVLL *a)
+{
+    REQUIRE_64BIT(ctx);
+    REQUIRE_INSNS_FLAGS2(ctx, ISA300);
+#if defined(TARGET_PPC64)
+    return do_ld_st_vl(ctx, a, gen_helper_LXVLL);
+#else
+    qemu_build_not_reached();
+#endif
+    return true;
+}
 
-VSX_STORE_SCALAR(stxsibx, st8_i64)
-VSX_STORE_SCALAR(stxsihx, st16_i64)
-VSX_STORE_SCALAR(stxsiwx, st32_i64)
-VSX_STORE_SCALAR(stxsspx, st32fs)
+static bool trans_STXVL(DisasContext *ctx, arg_STXVL *a)
+{
+    REQUIRE_64BIT(ctx);
+    REQUIRE_INSNS_FLAGS2(ctx, ISA300);
+#if defined(TARGET_PPC64)
+    return do_ld_st_vl(ctx, a, gen_helper_STXVL);
+#else
+    qemu_build_not_reached();
+#endif
+    return true;
+}
 
-static void gen_stxvd2x(DisasContext *ctx)
+static bool trans_STXVLL(DisasContext *ctx, arg_STXVLL *a)
+{
+    REQUIRE_64BIT(ctx);
+    REQUIRE_INSNS_FLAGS2(ctx, ISA300);
+#if defined(TARGET_PPC64)
+    return do_ld_st_vl(ctx, a, gen_helper_STXVLL);
+#else
+    qemu_build_not_reached();
+#endif
+    return true;
+}
+
+static bool do_stxs(DisasContext *ctx, arg_X *a,
+                    void (*op)(DisasContext *, TCGv_i64, TCGv))
 {
     TCGv EA;
     TCGv_i64 t0;
-    if (unlikely(!ctx->vsx_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_VSXU);
-        return;
-    }
+    REQUIRE_VSX(ctx);
     t0 = tcg_temp_new_i64();
     gen_set_access_type(ctx, ACCESS_INT);
-    EA = tcg_temp_new();
-    gen_addr_reg_index(ctx, EA);
-    get_cpu_vsr(t0, xS(ctx->opcode), true);
-    gen_qemu_st64_i64(ctx, t0, EA);
-    tcg_gen_addi_tl(EA, EA, 8);
-    get_cpu_vsr(t0, xS(ctx->opcode), false);
-    gen_qemu_st64_i64(ctx, t0, EA);
+    EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]);
+    get_cpu_vsr(t0, a->rt, true);
+    op(ctx, t0, EA);
+    return true;
 }
 
-static void gen_stxvw4x(DisasContext *ctx)
+TRANS_FLAGS2(VSX, STXSDX, do_stxs, gen_qemu_st64_i64);
+TRANS_FLAGS2(ISA300, STXSIBX, do_stxs, gen_qemu_st8_i64);
+TRANS_FLAGS2(ISA300, STXSIHX, do_stxs, gen_qemu_st16_i64);
+TRANS_FLAGS2(VSX207, STXSIWX, do_stxs, gen_qemu_st32_i64);
+TRANS_FLAGS2(VSX207, STXSSPX, do_stxs, gen_qemu_st32fs);
+
+static bool trans_STXVD2X(DisasContext *ctx, arg_STXVD2X *a)
 {
     TCGv EA;
-    TCGv_i64 xsh;
-    TCGv_i64 xsl;
+    TCGv_i64 t0;
 
-    if (unlikely(!ctx->vsx_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_VSXU);
-        return;
-    }
+    REQUIRE_VSX(ctx);
+    REQUIRE_INSNS_FLAGS2(ctx, VSX);
+
+    t0 = tcg_temp_new_i64();
+    gen_set_access_type(ctx, ACCESS_INT);
+    EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]);
+    get_cpu_vsr(t0, a->rt, true);
+    gen_qemu_st64_i64(ctx, t0, EA);
+    tcg_gen_addi_tl(EA, EA, 8);
+    get_cpu_vsr(t0, a->rt, false);
+    gen_qemu_st64_i64(ctx, t0, EA);
+    return true;
+}
+
+static bool trans_STXVW4X(DisasContext *ctx, arg_STXVW4X *a)
+{
+    TCGv EA;
+    TCGv_i64 xsh, xsl;
+
+    REQUIRE_VSX(ctx);
+    REQUIRE_INSNS_FLAGS2(ctx, VSX);
+
     xsh = tcg_temp_new_i64();
     xsl = tcg_temp_new_i64();
-    get_cpu_vsr(xsh, xS(ctx->opcode), true);
-    get_cpu_vsr(xsl, xS(ctx->opcode), false);
+    get_cpu_vsr(xsh, a->rt, true);
+    get_cpu_vsr(xsl, a->rt, false);
     gen_set_access_type(ctx, ACCESS_INT);
-    EA = tcg_temp_new();
-    gen_addr_reg_index(ctx, EA);
+    EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]);
     if (ctx->le_mode) {
         TCGv_i64 t0 = tcg_temp_new_i64();
         TCGv_i64 t1 = tcg_temp_new_i64();
@@ -342,25 +356,23 @@
         tcg_gen_addi_tl(EA, EA, 8);
         tcg_gen_qemu_st_i64(xsl, EA, ctx->mem_idx, MO_BEUQ);
     }
+    return true;
 }
 
-static void gen_stxvh8x(DisasContext *ctx)
+static bool trans_STXVH8X(DisasContext *ctx, arg_STXVH8X *a)
 {
     TCGv EA;
-    TCGv_i64 xsh;
-    TCGv_i64 xsl;
+    TCGv_i64 xsh, xsl;
 
-    if (unlikely(!ctx->vsx_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_VSXU);
-        return;
-    }
+    REQUIRE_VSX(ctx);
+    REQUIRE_INSNS_FLAGS2(ctx, ISA300);
+
     xsh = tcg_temp_new_i64();
     xsl = tcg_temp_new_i64();
-    get_cpu_vsr(xsh, xS(ctx->opcode), true);
-    get_cpu_vsr(xsl, xS(ctx->opcode), false);
+    get_cpu_vsr(xsh, a->rt, true);
+    get_cpu_vsr(xsl, a->rt, false);
     gen_set_access_type(ctx, ACCESS_INT);
-    EA = tcg_temp_new();
-    gen_addr_reg_index(ctx, EA);
+    EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]);
     if (ctx->le_mode) {
         TCGv_i64 outh = tcg_temp_new_i64();
         TCGv_i64 outl = tcg_temp_new_i64();
@@ -374,28 +386,24 @@
         tcg_gen_addi_tl(EA, EA, 8);
         tcg_gen_qemu_st_i64(xsl, EA, ctx->mem_idx, MO_BEUQ);
     }
+    return true;
 }
 
-static void gen_stxvb16x(DisasContext *ctx)
+static bool trans_STXVB16X(DisasContext *ctx, arg_STXVB16X *a)
 {
     TCGv EA;
-    TCGv_i64 xsh;
-    TCGv_i64 xsl;
+    TCGv_i128 data;
 
-    if (unlikely(!ctx->vsx_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_VSXU);
-        return;
-    }
-    xsh = tcg_temp_new_i64();
-    xsl = tcg_temp_new_i64();
-    get_cpu_vsr(xsh, xS(ctx->opcode), true);
-    get_cpu_vsr(xsl, xS(ctx->opcode), false);
+    REQUIRE_VSX(ctx);
+    REQUIRE_INSNS_FLAGS2(ctx, ISA300);
+
+    data = tcg_temp_new_i128();
     gen_set_access_type(ctx, ACCESS_INT);
-    EA = tcg_temp_new();
-    gen_addr_reg_index(ctx, EA);
-    tcg_gen_qemu_st_i64(xsh, EA, ctx->mem_idx, MO_BEUQ);
-    tcg_gen_addi_tl(EA, EA, 8);
-    tcg_gen_qemu_st_i64(xsl, EA, ctx->mem_idx, MO_BEUQ);
+    EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]);
+    get_vsr_full(data, a->rt);
+    tcg_gen_qemu_st_i128(data, EA, ctx->mem_idx,
+                         MO_BE | MO_128 | MO_ATOM_IFALIGN_PAIR);
+    return true;
 }
 
 static void gen_mfvsrwz(DisasContext *ctx)
@@ -788,34 +796,28 @@
 TRANS(XVCPSGNSP, do_xvcpsgn, MO_32)
 TRANS(XVCPSGNDP, do_xvcpsgn, MO_64)
 
-#define VSX_CMP(name, op1, op2, inval, type)                                  \
-static void gen_##name(DisasContext *ctx)                                     \
-{                                                                             \
-    TCGv_i32 ignored;                                                         \
-    TCGv_ptr xt, xa, xb;                                                      \
-    if (unlikely(!ctx->vsx_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_VSXU);                                \
-        return;                                                               \
-    }                                                                         \
-    xt = gen_vsr_ptr(xT(ctx->opcode));                                        \
-    xa = gen_vsr_ptr(xA(ctx->opcode));                                        \
-    xb = gen_vsr_ptr(xB(ctx->opcode));                                        \
-    if ((ctx->opcode >> (31 - 21)) & 1) {                                     \
-        gen_helper_##name(cpu_crf[6], tcg_env, xt, xa, xb);                   \
-    } else {                                                                  \
-        ignored = tcg_temp_new_i32();                                         \
-        gen_helper_##name(ignored, tcg_env, xt, xa, xb);                      \
-    }                                                                         \
+static bool do_cmp(DisasContext *ctx, arg_XX3_rc *a,
+            void (*helper)(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr))
+{
+    TCGv_i32 dest;
+    TCGv_ptr xt, xa, xb;
+    REQUIRE_VSX(ctx);
+    xt = gen_vsr_ptr(a->xt);
+    xa = gen_vsr_ptr(a->xa);
+    xb = gen_vsr_ptr(a->xb);
+    dest = a->rc ? cpu_crf[6] : tcg_temp_new_i32();
+    helper(dest, tcg_env, xt, xa, xb);
+    return true;
 }
 
-VSX_CMP(xvcmpeqdp, 0x0C, 0x0C, 0, PPC2_VSX)
-VSX_CMP(xvcmpgedp, 0x0C, 0x0E, 0, PPC2_VSX)
-VSX_CMP(xvcmpgtdp, 0x0C, 0x0D, 0, PPC2_VSX)
-VSX_CMP(xvcmpnedp, 0x0C, 0x0F, 0, PPC2_ISA300)
-VSX_CMP(xvcmpeqsp, 0x0C, 0x08, 0, PPC2_VSX)
-VSX_CMP(xvcmpgesp, 0x0C, 0x0A, 0, PPC2_VSX)
-VSX_CMP(xvcmpgtsp, 0x0C, 0x09, 0, PPC2_VSX)
-VSX_CMP(xvcmpnesp, 0x0C, 0x0B, 0, PPC2_VSX)
+TRANS_FLAGS2(VSX, XVCMPEQSP, do_cmp, gen_helper_XVCMPEQSP);
+TRANS_FLAGS2(VSX, XVCMPGTSP, do_cmp, gen_helper_XVCMPGTSP);
+TRANS_FLAGS2(VSX, XVCMPGESP, do_cmp, gen_helper_XVCMPGESP);
+TRANS_FLAGS2(ISA300, XVCMPNESP, do_cmp, gen_helper_XVCMPNESP);
+TRANS_FLAGS2(VSX, XVCMPEQDP, do_cmp, gen_helper_XVCMPEQDP);
+TRANS_FLAGS2(VSX, XVCMPGTDP, do_cmp, gen_helper_XVCMPGTDP);
+TRANS_FLAGS2(VSX, XVCMPGEDP, do_cmp, gen_helper_XVCMPGEDP);
+TRANS_FLAGS2(ISA300, XVCMPNEDP, do_cmp, gen_helper_XVCMPNEDP);
 
 static bool trans_XSCVQPDP(DisasContext *ctx, arg_X_tb_rc *a)
 {
@@ -864,20 +866,6 @@
     gen_helper_##name(tcg_env, opc);                                          \
 }
 
-#define GEN_VSX_HELPER_X3(name, op1, op2, inval, type)                        \
-static void gen_##name(DisasContext *ctx)                                     \
-{                                                                             \
-    TCGv_ptr xt, xa, xb;                                                      \
-    if (unlikely(!ctx->vsx_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_VSXU);                                \
-        return;                                                               \
-    }                                                                         \
-    xt = gen_vsr_ptr(xT(ctx->opcode));                                        \
-    xa = gen_vsr_ptr(xA(ctx->opcode));                                        \
-    xb = gen_vsr_ptr(xB(ctx->opcode));                                        \
-    gen_helper_##name(tcg_env, xt, xa, xb);                                   \
-}
-
 #define GEN_VSX_HELPER_X2(name, op1, op2, inval, type)                        \
 static void gen_##name(DisasContext *ctx)                                     \
 {                                                                             \
@@ -983,12 +971,8 @@
     set_cpu_vsr(xT(ctx->opcode), tcg_constant_i64(0), false); \
 }
 
-GEN_VSX_HELPER_X3(xsadddp, 0x00, 0x04, 0, PPC2_VSX)
 GEN_VSX_HELPER_R3(xsaddqp, 0x04, 0x00, 0, PPC2_ISA300)
-GEN_VSX_HELPER_X3(xssubdp, 0x00, 0x05, 0, PPC2_VSX)
-GEN_VSX_HELPER_X3(xsmuldp, 0x00, 0x06, 0, PPC2_VSX)
 GEN_VSX_HELPER_R3(xsmulqp, 0x04, 0x01, 0, PPC2_ISA300)
-GEN_VSX_HELPER_X3(xsdivdp, 0x00, 0x07, 0, PPC2_VSX)
 GEN_VSX_HELPER_R3(xsdivqp, 0x04, 0x11, 0, PPC2_ISA300)
 GEN_VSX_HELPER_X2(xsredp, 0x14, 0x05, 0, PPC2_VSX)
 GEN_VSX_HELPER_X2(xssqrtdp, 0x16, 0x04, 0, PPC2_VSX)
@@ -1001,8 +985,6 @@
 GEN_VSX_HELPER_X2_AB(xscmpudp, 0x0C, 0x04, 0, PPC2_VSX)
 GEN_VSX_HELPER_R2_AB(xscmpoqp, 0x04, 0x04, 0, PPC2_VSX)
 GEN_VSX_HELPER_R2_AB(xscmpuqp, 0x04, 0x14, 0, PPC2_VSX)
-GEN_VSX_HELPER_X3(xsmaxdp, 0x00, 0x14, 0, PPC2_VSX)
-GEN_VSX_HELPER_X3(xsmindp, 0x00, 0x15, 0, PPC2_VSX)
 GEN_VSX_HELPER_X2(xscvdphp, 0x16, 0x15, 0x11, PPC2_ISA300)
 GEN_VSX_HELPER_X2(xscvdpsp, 0x12, 0x10, 0, PPC2_VSX)
 GEN_VSX_HELPER_R2(xscvdpqp, 0x04, 0x1A, 0x16, PPC2_ISA300)
@@ -1233,27 +1215,17 @@
 GEN_VSX_HELPER_R2(xsrqpxp, 0x05, 0x01, 0, PPC2_ISA300)
 GEN_VSX_HELPER_R2(xssqrtqp, 0x04, 0x19, 0x1B, PPC2_ISA300)
 GEN_VSX_HELPER_R3(xssubqp, 0x04, 0x10, 0, PPC2_ISA300)
-GEN_VSX_HELPER_X3(xsaddsp, 0x00, 0x00, 0, PPC2_VSX207)
-GEN_VSX_HELPER_X3(xssubsp, 0x00, 0x01, 0, PPC2_VSX207)
-GEN_VSX_HELPER_X3(xsmulsp, 0x00, 0x02, 0, PPC2_VSX207)
-GEN_VSX_HELPER_X3(xsdivsp, 0x00, 0x03, 0, PPC2_VSX207)
 GEN_VSX_HELPER_X2(xsresp, 0x14, 0x01, 0, PPC2_VSX207)
 GEN_VSX_HELPER_X2(xssqrtsp, 0x16, 0x00, 0, PPC2_VSX207)
 GEN_VSX_HELPER_X2(xsrsqrtesp, 0x14, 0x00, 0, PPC2_VSX207)
 GEN_VSX_HELPER_X2(xscvsxdsp, 0x10, 0x13, 0, PPC2_VSX207)
 GEN_VSX_HELPER_X2(xscvuxdsp, 0x10, 0x12, 0, PPC2_VSX207)
 
-GEN_VSX_HELPER_X3(xvadddp, 0x00, 0x0C, 0, PPC2_VSX)
-GEN_VSX_HELPER_X3(xvsubdp, 0x00, 0x0D, 0, PPC2_VSX)
-GEN_VSX_HELPER_X3(xvmuldp, 0x00, 0x0E, 0, PPC2_VSX)
-GEN_VSX_HELPER_X3(xvdivdp, 0x00, 0x0F, 0, PPC2_VSX)
 GEN_VSX_HELPER_X2(xvredp, 0x14, 0x0D, 0, PPC2_VSX)
 GEN_VSX_HELPER_X2(xvsqrtdp, 0x16, 0x0C, 0, PPC2_VSX)
 GEN_VSX_HELPER_X2(xvrsqrtedp, 0x14, 0x0C, 0, PPC2_VSX)
 GEN_VSX_HELPER_X2_AB(xvtdivdp, 0x14, 0x0F, 0, PPC2_VSX)
 GEN_VSX_HELPER_X1(xvtsqrtdp, 0x14, 0x0E, 0, PPC2_VSX)
-GEN_VSX_HELPER_X3(xvmaxdp, 0x00, 0x1C, 0, PPC2_VSX)
-GEN_VSX_HELPER_X3(xvmindp, 0x00, 0x1D, 0, PPC2_VSX)
 GEN_VSX_HELPER_X2(xvcvdpsp, 0x12, 0x18, 0, PPC2_VSX)
 GEN_VSX_HELPER_X2(xvcvdpsxds, 0x10, 0x1D, 0, PPC2_VSX)
 GEN_VSX_HELPER_X2(xvcvdpsxws, 0x10, 0x0D, 0, PPC2_VSX)
@@ -1269,17 +1241,11 @@
 GEN_VSX_HELPER_X2(xvrdpip, 0x12, 0x0E, 0, PPC2_VSX)
 GEN_VSX_HELPER_X2(xvrdpiz, 0x12, 0x0D, 0, PPC2_VSX)
 
-GEN_VSX_HELPER_X3(xvaddsp, 0x00, 0x08, 0, PPC2_VSX)
-GEN_VSX_HELPER_X3(xvsubsp, 0x00, 0x09, 0, PPC2_VSX)
-GEN_VSX_HELPER_X3(xvmulsp, 0x00, 0x0A, 0, PPC2_VSX)
-GEN_VSX_HELPER_X3(xvdivsp, 0x00, 0x0B, 0, PPC2_VSX)
 GEN_VSX_HELPER_X2(xvresp, 0x14, 0x09, 0, PPC2_VSX)
 GEN_VSX_HELPER_X2(xvsqrtsp, 0x16, 0x08, 0, PPC2_VSX)
 GEN_VSX_HELPER_X2(xvrsqrtesp, 0x14, 0x08, 0, PPC2_VSX)
 GEN_VSX_HELPER_X2_AB(xvtdivsp, 0x14, 0x0B, 0, PPC2_VSX)
 GEN_VSX_HELPER_X1(xvtsqrtsp, 0x14, 0x0A, 0, PPC2_VSX)
-GEN_VSX_HELPER_X3(xvmaxsp, 0x00, 0x18, 0, PPC2_VSX)
-GEN_VSX_HELPER_X3(xvminsp, 0x00, 0x19, 0, PPC2_VSX)
 GEN_VSX_HELPER_X2(xvcvspdp, 0x12, 0x1C, 0, PPC2_VSX)
 GEN_VSX_HELPER_X2(xvcvhpsp, 0x16, 0x1D, 0x18, PPC2_ISA300)
 GEN_VSX_HELPER_X2(xvcvsphp, 0x16, 0x1D, 0x19, PPC2_ISA300)
@@ -1609,26 +1575,24 @@
     set_cpu_vsr(xT(ctx->opcode), xtl, false);
 }
 
-#define VSX_LOGICAL(name, vece, tcg_op)                              \
-static void glue(gen_, name)(DisasContext *ctx)                      \
-    {                                                                \
-        if (unlikely(!ctx->vsx_enabled)) {                           \
-            gen_exception(ctx, POWERPC_EXCP_VSXU);                   \
-            return;                                                  \
-        }                                                            \
-        tcg_op(vece, vsr_full_offset(xT(ctx->opcode)),               \
-               vsr_full_offset(xA(ctx->opcode)),                     \
-               vsr_full_offset(xB(ctx->opcode)), 16, 16);            \
-    }
+static bool do_logical_op(DisasContext *ctx, arg_XX3 *a, unsigned vece,
+    void (*helper)(unsigned, uint32_t, uint32_t, uint32_t, uint32_t, uint32_t))
+{
+    REQUIRE_VSX(ctx);
+    helper(vece, vsr_full_offset(a->xt),
+            vsr_full_offset(a->xa),
+            vsr_full_offset(a->xb), 16, 16);
+    return true;
+}
 
-VSX_LOGICAL(xxland, MO_64, tcg_gen_gvec_and)
-VSX_LOGICAL(xxlandc, MO_64, tcg_gen_gvec_andc)
-VSX_LOGICAL(xxlor, MO_64, tcg_gen_gvec_or)
-VSX_LOGICAL(xxlxor, MO_64, tcg_gen_gvec_xor)
-VSX_LOGICAL(xxlnor, MO_64, tcg_gen_gvec_nor)
-VSX_LOGICAL(xxleqv, MO_64, tcg_gen_gvec_eqv)
-VSX_LOGICAL(xxlnand, MO_64, tcg_gen_gvec_nand)
-VSX_LOGICAL(xxlorc, MO_64, tcg_gen_gvec_orc)
+TRANS_FLAGS2(VSX, XXLAND, do_logical_op, MO_64, tcg_gen_gvec_and);
+TRANS_FLAGS2(VSX, XXLANDC, do_logical_op, MO_64, tcg_gen_gvec_andc);
+TRANS_FLAGS2(VSX, XXLOR, do_logical_op, MO_64, tcg_gen_gvec_or);
+TRANS_FLAGS2(VSX, XXLXOR, do_logical_op, MO_64, tcg_gen_gvec_xor);
+TRANS_FLAGS2(VSX, XXLNOR, do_logical_op, MO_64, tcg_gen_gvec_nor);
+TRANS_FLAGS2(VSX207, XXLEQV, do_logical_op, MO_64, tcg_gen_gvec_eqv);
+TRANS_FLAGS2(VSX207, XXLNAND, do_logical_op, MO_64, tcg_gen_gvec_nand);
+TRANS_FLAGS2(VSX207, XXLORC, do_logical_op, MO_64, tcg_gen_gvec_orc);
 
 #define VSX_XXMRG(name, high)                               \
 static void glue(gen_, name)(DisasContext *ctx)             \
@@ -2215,13 +2179,13 @@
                      int rt, bool store, bool paired)
 {
     TCGv ea;
-    TCGv_i64 xt;
+    TCGv_i128 data;
     MemOp mop;
     int rt1, rt2;
 
-    xt = tcg_temp_new_i64();
+    data = tcg_temp_new_i128();
 
-    mop = DEF_MEMOP(MO_UQ);
+    mop = DEF_MEMOP(MO_128 | MO_ATOM_IFALIGN_PAIR);
 
     gen_set_access_type(ctx, ACCESS_INT);
     ea = do_ea_calc(ctx, ra, displ);
@@ -2235,32 +2199,20 @@
     }
 
     if (store) {
-        get_cpu_vsr(xt, rt1, !ctx->le_mode);
-        tcg_gen_qemu_st_i64(xt, ea, ctx->mem_idx, mop);
-        gen_addr_add(ctx, ea, ea, 8);
-        get_cpu_vsr(xt, rt1, ctx->le_mode);
-        tcg_gen_qemu_st_i64(xt, ea, ctx->mem_idx, mop);
+        get_vsr_full(data, rt1);
+        tcg_gen_qemu_st_i128(data, ea, ctx->mem_idx, mop);
         if (paired) {
-            gen_addr_add(ctx, ea, ea, 8);
-            get_cpu_vsr(xt, rt2, !ctx->le_mode);
-            tcg_gen_qemu_st_i64(xt, ea, ctx->mem_idx, mop);
-            gen_addr_add(ctx, ea, ea, 8);
-            get_cpu_vsr(xt, rt2, ctx->le_mode);
-            tcg_gen_qemu_st_i64(xt, ea, ctx->mem_idx, mop);
+            gen_addr_add(ctx, ea, ea, 16);
+            get_vsr_full(data, rt2);
+            tcg_gen_qemu_st_i128(data, ea, ctx->mem_idx, mop);
         }
     } else {
-        tcg_gen_qemu_ld_i64(xt, ea, ctx->mem_idx, mop);
-        set_cpu_vsr(rt1, xt, !ctx->le_mode);
-        gen_addr_add(ctx, ea, ea, 8);
-        tcg_gen_qemu_ld_i64(xt, ea, ctx->mem_idx, mop);
-        set_cpu_vsr(rt1, xt, ctx->le_mode);
+        tcg_gen_qemu_ld_i128(data, ea, ctx->mem_idx, mop);
+        set_vsr_full(rt1, data);
         if (paired) {
-            gen_addr_add(ctx, ea, ea, 8);
-            tcg_gen_qemu_ld_i64(xt, ea, ctx->mem_idx, mop);
-            set_cpu_vsr(rt2, xt, !ctx->le_mode);
-            gen_addr_add(ctx, ea, ea, 8);
-            tcg_gen_qemu_ld_i64(xt, ea, ctx->mem_idx, mop);
-            set_cpu_vsr(rt2, xt, ctx->le_mode);
+            gen_addr_add(ctx, ea, ea, 16);
+            tcg_gen_qemu_ld_i128(data, ea, ctx->mem_idx, mop);
+            set_vsr_full(rt2, data);
         }
     }
     return true;
@@ -2712,8 +2664,6 @@
     void (*helper)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr))
 {
     TCGv_ptr xt, xa, xb;
-
-    REQUIRE_INSNS_FLAGS2(ctx, ISA300);
     REQUIRE_VSX(ctx);
 
     xt = gen_vsr_ptr(a->xt);
@@ -2724,13 +2674,40 @@
     return true;
 }
 
-TRANS(XSCMPEQDP, do_helper_XX3, gen_helper_XSCMPEQDP)
-TRANS(XSCMPGEDP, do_helper_XX3, gen_helper_XSCMPGEDP)
-TRANS(XSCMPGTDP, do_helper_XX3, gen_helper_XSCMPGTDP)
-TRANS(XSMAXCDP, do_helper_XX3, gen_helper_XSMAXCDP)
-TRANS(XSMINCDP, do_helper_XX3, gen_helper_XSMINCDP)
-TRANS(XSMAXJDP, do_helper_XX3, gen_helper_XSMAXJDP)
-TRANS(XSMINJDP, do_helper_XX3, gen_helper_XSMINJDP)
+TRANS_FLAGS2(ISA300, XSCMPEQDP, do_helper_XX3, gen_helper_XSCMPEQDP)
+TRANS_FLAGS2(ISA300, XSCMPGEDP, do_helper_XX3, gen_helper_XSCMPGEDP)
+TRANS_FLAGS2(ISA300, XSCMPGTDP, do_helper_XX3, gen_helper_XSCMPGTDP)
+TRANS_FLAGS2(ISA300, XSMAXCDP, do_helper_XX3, gen_helper_XSMAXCDP)
+TRANS_FLAGS2(ISA300, XSMINCDP, do_helper_XX3, gen_helper_XSMINCDP)
+TRANS_FLAGS2(ISA300, XSMAXJDP, do_helper_XX3, gen_helper_XSMAXJDP)
+TRANS_FLAGS2(ISA300, XSMINJDP, do_helper_XX3, gen_helper_XSMINJDP)
+
+TRANS_FLAGS2(VSX207, XSADDSP, do_helper_XX3, gen_helper_XSADDSP)
+TRANS_FLAGS2(VSX207, XSSUBSP, do_helper_XX3, gen_helper_XSSUBSP)
+TRANS_FLAGS2(VSX207, XSMULSP, do_helper_XX3, gen_helper_XSMULSP)
+TRANS_FLAGS2(VSX207, XSDIVSP, do_helper_XX3, gen_helper_XSDIVSP)
+
+TRANS_FLAGS2(VSX, XSADDDP, do_helper_XX3, gen_helper_XSADDDP)
+TRANS_FLAGS2(VSX, XSSUBDP, do_helper_XX3, gen_helper_XSSUBDP)
+TRANS_FLAGS2(VSX, XSMULDP, do_helper_XX3, gen_helper_XSMULDP)
+TRANS_FLAGS2(VSX, XSDIVDP, do_helper_XX3, gen_helper_XSDIVDP)
+
+TRANS_FLAGS2(VSX, XVADDSP, do_helper_XX3, gen_helper_XVADDSP)
+TRANS_FLAGS2(VSX, XVSUBSP, do_helper_XX3, gen_helper_XVSUBSP)
+TRANS_FLAGS2(VSX, XVMULSP, do_helper_XX3, gen_helper_XVMULSP)
+TRANS_FLAGS2(VSX, XVDIVSP, do_helper_XX3, gen_helper_XVDIVSP)
+
+TRANS_FLAGS2(VSX, XVADDDP, do_helper_XX3, gen_helper_XVADDDP)
+TRANS_FLAGS2(VSX, XVSUBDP, do_helper_XX3, gen_helper_XVSUBDP)
+TRANS_FLAGS2(VSX, XVMULDP, do_helper_XX3, gen_helper_XVMULDP)
+TRANS_FLAGS2(VSX, XVDIVDP, do_helper_XX3, gen_helper_XVDIVDP)
+
+TRANS_FLAGS2(VSX, XSMAXDP, do_helper_XX3, gen_helper_XSMAXDP)
+TRANS_FLAGS2(VSX, XSMINDP, do_helper_XX3, gen_helper_XSMINDP)
+TRANS_FLAGS2(VSX, XVMAXSP, do_helper_XX3, gen_helper_XVMAXSP)
+TRANS_FLAGS2(VSX, XVMINSP, do_helper_XX3, gen_helper_XVMINSP)
+TRANS_FLAGS2(VSX, XVMAXDP, do_helper_XX3, gen_helper_XVMAXDP)
+TRANS_FLAGS2(VSX, XVMINDP, do_helper_XX3, gen_helper_XVMINDP)
 
 static bool do_helper_X(arg_X *a,
     void (*helper)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr))
@@ -2910,4 +2887,3 @@
 #undef GEN_XX2IFORM
 #undef GEN_XX3_RC_FORM
 #undef GEN_XX3FORM_DM
-#undef VSX_LOGICAL

diff --git a/target/ppc/translate/vsx-ops.c.inc b/target/ppc/translate/vsx-ops.c.inc
index a3ba094..e553b5b 100644
--- a/target/ppc/translate/vsx-ops.c.inc
+++ b/target/ppc/translate/vsx-ops.c.inc

@@ -1,34 +1,3 @@
-GEN_HANDLER_E(lxsdx, 0x1F, 0x0C, 0x12, 0, PPC_NONE, PPC2_VSX),
-GEN_HANDLER_E(lxsiwax, 0x1F, 0x0C, 0x02, 0, PPC_NONE, PPC2_VSX207),
-GEN_HANDLER_E(lxsiwzx, 0x1F, 0x0C, 0x00, 0, PPC_NONE, PPC2_VSX207),
-GEN_HANDLER_E(lxsibzx, 0x1F, 0x0D, 0x18, 0, PPC_NONE, PPC2_ISA300),
-GEN_HANDLER_E(lxsihzx, 0x1F, 0x0D, 0x19, 0, PPC_NONE, PPC2_ISA300),
-GEN_HANDLER_E(lxsspx, 0x1F, 0x0C, 0x10, 0, PPC_NONE, PPC2_VSX207),
-GEN_HANDLER_E(lxvd2x, 0x1F, 0x0C, 0x1A, 0, PPC_NONE, PPC2_VSX),
-GEN_HANDLER_E(lxvwsx, 0x1F, 0x0C, 0x0B, 0, PPC_NONE, PPC2_ISA300),
-GEN_HANDLER_E(lxvdsx, 0x1F, 0x0C, 0x0A, 0, PPC_NONE, PPC2_VSX),
-GEN_HANDLER_E(lxvw4x, 0x1F, 0x0C, 0x18, 0, PPC_NONE, PPC2_VSX),
-GEN_HANDLER_E(lxvh8x, 0x1F, 0x0C, 0x19, 0, PPC_NONE,  PPC2_ISA300),
-GEN_HANDLER_E(lxvb16x, 0x1F, 0x0C, 0x1B, 0, PPC_NONE, PPC2_ISA300),
-#if defined(TARGET_PPC64)
-GEN_HANDLER_E(lxvl, 0x1F, 0x0D, 0x08, 0, PPC_NONE, PPC2_ISA300),
-GEN_HANDLER_E(lxvll, 0x1F, 0x0D, 0x09, 0, PPC_NONE, PPC2_ISA300),
-#endif
-
-GEN_HANDLER_E(stxsdx, 0x1F, 0xC, 0x16, 0, PPC_NONE, PPC2_VSX),
-GEN_HANDLER_E(stxsibx, 0x1F, 0xD, 0x1C, 0, PPC_NONE, PPC2_ISA300),
-GEN_HANDLER_E(stxsihx, 0x1F, 0xD, 0x1D, 0, PPC_NONE, PPC2_ISA300),
-GEN_HANDLER_E(stxsiwx, 0x1F, 0xC, 0x04, 0, PPC_NONE, PPC2_VSX207),
-GEN_HANDLER_E(stxsspx, 0x1F, 0xC, 0x14, 0, PPC_NONE, PPC2_VSX207),
-GEN_HANDLER_E(stxvd2x, 0x1F, 0xC, 0x1E, 0, PPC_NONE, PPC2_VSX),
-GEN_HANDLER_E(stxvw4x, 0x1F, 0xC, 0x1C, 0, PPC_NONE, PPC2_VSX),
-GEN_HANDLER_E(stxvh8x, 0x1F, 0x0C, 0x1D, 0, PPC_NONE,  PPC2_ISA300),
-GEN_HANDLER_E(stxvb16x, 0x1F, 0x0C, 0x1F, 0, PPC_NONE, PPC2_ISA300),
-#if defined(TARGET_PPC64)
-GEN_HANDLER_E(stxvl, 0x1F, 0x0D, 0x0C, 0, PPC_NONE, PPC2_ISA300),
-GEN_HANDLER_E(stxvll, 0x1F, 0x0D, 0x0D, 0, PPC_NONE, PPC2_ISA300),
-#endif
-
 GEN_HANDLER_E(mfvsrwz, 0x1F, 0x13, 0x03, 0x0000F800, PPC_NONE, PPC2_VSX207),
 GEN_HANDLER_E(mtvsrwa, 0x1F, 0x13, 0x06, 0x0000F800, PPC_NONE, PPC2_VSX207),
 GEN_HANDLER_E(mtvsrwz, 0x1F, 0x13, 0x07, 0x0000F800, PPC_NONE, PPC2_VSX207),
@@ -74,16 +43,6 @@
 GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 2, opc3, 1, PPC_NONE, fl2), \
 GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 3, opc3, 1, PPC_NONE, fl2)
 
-#define GEN_XX3_RC_FORM(name, opc2, opc3, fl2)                          \
-GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x00, opc3 | 0x00, 0, PPC_NONE, fl2), \
-GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x01, opc3 | 0x00, 0, PPC_NONE, fl2), \
-GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x02, opc3 | 0x00, 0, PPC_NONE, fl2), \
-GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x03, opc3 | 0x00, 0, PPC_NONE, fl2), \
-GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x00, opc3 | 0x10, 0, PPC_NONE, fl2), \
-GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x01, opc3 | 0x10, 0, PPC_NONE, fl2), \
-GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x02, opc3 | 0x10, 0, PPC_NONE, fl2), \
-GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x03, opc3 | 0x10, 0, PPC_NONE, fl2)
-
 #define GEN_XX3FORM_DM(name, opc2, opc3) \
 GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x00, opc3|0x00, 0, PPC_NONE, PPC2_VSX),\
 GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x01, opc3|0x00, 0, PPC_NONE, PPC2_VSX),\
@@ -153,12 +112,8 @@
 GEN_XX2FORM_EO(xvxsigdp, 0x16, 0x1D, 0x01, PPC2_ISA300),
 GEN_XX2FORM_EO(xvxexpsp, 0x16, 0x1D, 0x08, PPC2_ISA300),
 
-GEN_XX3FORM(xsadddp, 0x00, 0x04, PPC2_VSX),
 GEN_VSX_XFORM_300(xsaddqp, 0x04, 0x00, 0x0),
-GEN_XX3FORM(xssubdp, 0x00, 0x05, PPC2_VSX),
-GEN_XX3FORM(xsmuldp, 0x00, 0x06, PPC2_VSX),
 GEN_VSX_XFORM_300(xsmulqp, 0x04, 0x01, 0x0),
-GEN_XX3FORM(xsdivdp, 0x00, 0x07, PPC2_VSX),
 GEN_XX2FORM(xsredp,  0x14, 0x05, PPC2_VSX),
 GEN_XX2FORM(xssqrtdp,  0x16, 0x04, PPC2_VSX),
 GEN_XX2FORM(xsrsqrtedp,  0x14, 0x04, PPC2_VSX),
@@ -170,8 +125,6 @@
 GEN_XX2IFORM(xscmpudp,  0x0C, 0x04, PPC2_VSX),
 GEN_VSX_XFORM_300(xscmpoqp, 0x04, 0x04, 0x00600001),
 GEN_VSX_XFORM_300(xscmpuqp, 0x04, 0x14, 0x00600001),
-GEN_XX3FORM(xsmaxdp, 0x00, 0x14, PPC2_VSX),
-GEN_XX3FORM(xsmindp, 0x00, 0x15, PPC2_VSX),
 GEN_XX2FORM_EO(xscvdphp, 0x16, 0x15, 0x11, PPC2_ISA300),
 GEN_XX2FORM(xscvdpsp, 0x12, 0x10, PPC2_VSX),
 GEN_XX2FORM(xscvdpspn, 0x16, 0x10, PPC2_VSX207),
@@ -191,10 +144,6 @@
 GEN_XX2FORM(xsrdpip, 0x12, 0x06, PPC2_VSX),
 GEN_XX2FORM(xsrdpiz, 0x12, 0x05, PPC2_VSX),
 
-GEN_XX3FORM(xsaddsp, 0x00, 0x00, PPC2_VSX207),
-GEN_XX3FORM(xssubsp, 0x00, 0x01, PPC2_VSX207),
-GEN_XX3FORM(xsmulsp, 0x00, 0x02, PPC2_VSX207),
-GEN_XX3FORM(xsdivsp, 0x00, 0x03, PPC2_VSX207),
 GEN_VSX_XFORM_300(xsdivqp, 0x04, 0x11, 0x0),
 GEN_XX2FORM(xsresp,  0x14, 0x01, PPC2_VSX207),
 GEN_XX2FORM(xsrsp, 0x12, 0x11, PPC2_VSX207),
@@ -203,10 +152,6 @@
 GEN_XX2FORM(xscvsxdsp, 0x10, 0x13, PPC2_VSX207),
 GEN_XX2FORM(xscvuxdsp, 0x10, 0x12, PPC2_VSX207),
 
-GEN_XX3FORM(xvadddp, 0x00, 0x0C, PPC2_VSX),
-GEN_XX3FORM(xvsubdp, 0x00, 0x0D, PPC2_VSX),
-GEN_XX3FORM(xvmuldp, 0x00, 0x0E, PPC2_VSX),
-GEN_XX3FORM(xvdivdp, 0x00, 0x0F, PPC2_VSX),
 GEN_XX2FORM(xvredp,  0x14, 0x0D, PPC2_VSX),
 GEN_XX2FORM(xvsqrtdp,  0x16, 0x0C, PPC2_VSX),
 GEN_XX2FORM(xvrsqrtedp,  0x14, 0x0C, PPC2_VSX),
@@ -220,12 +165,6 @@
 GEN_XX3FORM_NAME(xvnmadddp, "xvnmaddmdp", 0x04, 0x1D, PPC2_VSX),
 GEN_XX3FORM_NAME(xvnmsubdp, "xvnmsubadp", 0x04, 0x1E, PPC2_VSX),
 GEN_XX3FORM_NAME(xvnmsubdp, "xvnmsubmdp", 0x04, 0x1F, PPC2_VSX),
-GEN_XX3FORM(xvmaxdp, 0x00, 0x1C, PPC2_VSX),
-GEN_XX3FORM(xvmindp, 0x00, 0x1D, PPC2_VSX),
-GEN_XX3_RC_FORM(xvcmpeqdp, 0x0C, 0x0C, PPC2_VSX),
-GEN_XX3_RC_FORM(xvcmpgtdp, 0x0C, 0x0D, PPC2_VSX),
-GEN_XX3_RC_FORM(xvcmpgedp, 0x0C, 0x0E, PPC2_VSX),
-GEN_XX3_RC_FORM(xvcmpnedp, 0x0C, 0x0F, PPC2_ISA300),
 GEN_XX2FORM(xvcvdpsp, 0x12, 0x18, PPC2_VSX),
 GEN_XX2FORM(xvcvdpsxds, 0x10, 0x1D, PPC2_VSX),
 GEN_XX2FORM(xvcvdpsxws, 0x10, 0x0D, PPC2_VSX),
@@ -241,10 +180,6 @@
 GEN_XX2FORM(xvrdpip, 0x12, 0x0E, PPC2_VSX),
 GEN_XX2FORM(xvrdpiz, 0x12, 0x0D, PPC2_VSX),
 
-GEN_XX3FORM(xvaddsp, 0x00, 0x08, PPC2_VSX),
-GEN_XX3FORM(xvsubsp, 0x00, 0x09, PPC2_VSX),
-GEN_XX3FORM(xvmulsp, 0x00, 0x0A, PPC2_VSX),
-GEN_XX3FORM(xvdivsp, 0x00, 0x0B, PPC2_VSX),
 GEN_XX2FORM(xvresp, 0x14, 0x09, PPC2_VSX),
 GEN_XX2FORM(xvsqrtsp, 0x16, 0x08, PPC2_VSX),
 GEN_XX2FORM(xvrsqrtesp, 0x14, 0x08, PPC2_VSX),
@@ -258,12 +193,6 @@
 GEN_XX3FORM_NAME(xvnmaddsp, "xvnmaddmsp", 0x04, 0x19, PPC2_VSX),
 GEN_XX3FORM_NAME(xvnmsubsp, "xvnmsubasp", 0x04, 0x1A, PPC2_VSX),
 GEN_XX3FORM_NAME(xvnmsubsp, "xvnmsubmsp", 0x04, 0x1B, PPC2_VSX),
-GEN_XX3FORM(xvmaxsp, 0x00, 0x18, PPC2_VSX),
-GEN_XX3FORM(xvminsp, 0x00, 0x19, PPC2_VSX),
-GEN_XX3_RC_FORM(xvcmpeqsp, 0x0C, 0x08, PPC2_VSX),
-GEN_XX3_RC_FORM(xvcmpgtsp, 0x0C, 0x09, PPC2_VSX),
-GEN_XX3_RC_FORM(xvcmpgesp, 0x0C, 0x0A, PPC2_VSX),
-GEN_XX3_RC_FORM(xvcmpnesp, 0x0C, 0x0B, PPC2_ISA300),
 GEN_XX2FORM(xvcvspdp, 0x12, 0x1C, PPC2_VSX),
 GEN_XX2FORM(xvcvspsxds, 0x10, 0x19, PPC2_VSX),
 GEN_XX2FORM(xvcvspsxws, 0x10, 0x09, PPC2_VSX),
@@ -285,17 +214,6 @@
 GEN_XX2FORM_EO(xvcvsphp, 0x16, 0x1D, 0x19, PPC2_ISA300),
 GEN_XX2FORM_EO(xxbrq, 0x16, 0x1D, 0x1F, PPC2_ISA300),
 
-#define VSX_LOGICAL(name, opc2, opc3, fl2) \
-GEN_XX3FORM(name, opc2, opc3, fl2)
-
-VSX_LOGICAL(xxland, 0x8, 0x10, PPC2_VSX),
-VSX_LOGICAL(xxlandc, 0x8, 0x11, PPC2_VSX),
-VSX_LOGICAL(xxlor, 0x8, 0x12, PPC2_VSX),
-VSX_LOGICAL(xxlxor, 0x8, 0x13, PPC2_VSX),
-VSX_LOGICAL(xxlnor, 0x8, 0x14, PPC2_VSX),
-VSX_LOGICAL(xxleqv, 0x8, 0x17, PPC2_VSX207),
-VSX_LOGICAL(xxlnand, 0x8, 0x16, PPC2_VSX207),
-VSX_LOGICAL(xxlorc, 0x8, 0x15, PPC2_VSX207),
 GEN_XX3FORM(xxmrghw, 0x08, 0x02, PPC2_VSX),
 GEN_XX3FORM(xxmrglw, 0x08, 0x06, PPC2_VSX),
 GEN_XX3FORM_DM(xxsldwi, 0x08, 0x00),

diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
index e7ab2a4..2f0d3ef 100644
--- a/tests/qtest/meson.build
+++ b/tests/qtest/meson.build

@@ -171,6 +171,7 @@
   qtests_ppc + \
   (config_all_devices.has_key('CONFIG_PSERIES') ? ['device-plug-test'] : []) +               \
   (config_all_devices.has_key('CONFIG_POWERNV') ? ['pnv-xscom-test'] : []) +                 \
+  (config_all_devices.has_key('CONFIG_POWERNV') ? ['pnv-spi-seeprom-test'] : []) +           \
   (config_all_devices.has_key('CONFIG_POWERNV') ? ['pnv-host-i2c-test'] : []) +              \
   (config_all_devices.has_key('CONFIG_PSERIES') ? ['rtas-test'] : []) +                      \
   (slirp.found() ? ['pxe-test'] : []) +              \

diff --git a/tests/qtest/pnv-spi-seeprom-test.c b/tests/qtest/pnv-spi-seeprom-test.c
new file mode 100644
index 0000000..57f20af
--- /dev/null
+++ b/tests/qtest/pnv-spi-seeprom-test.c

@@ -0,0 +1,110 @@
+/*
+ * QTest testcase for PowerNV 10 Seeprom Communications
+ *
+ * Copyright (c) 2024, IBM Corporation.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#include <unistd.h>
+#include "qemu/osdep.h"
+#include "libqtest.h"
+#include "qemu/bswap.h"
+#include "hw/ssi/pnv_spi_regs.h"
+#include "pnv-xscom.h"
+
+#define FLASH_SIZE              (512 * 1024)
+#define SPIC2_XSCOM_BASE        0xc0040
+
+/* To transmit READ opcode and address */
+#define READ_OP_TDR_DATA        0x0300010000000000
+/*
+ * N1 shift - tx 4 bytes (transmit opcode and address)
+ * N2 shift - tx and rx 8 bytes.
+ */
+#define READ_OP_COUNTER_CONFIG  0x2040000000002b00
+/* SEQ_OP_SELECT_RESPONDER - N1 Shift - N2 Shift * 5 - SEQ_OP_STOP */
+#define READ_OP_SEQUENCER       0x1130404040404010
+
+/* To transmit WREN(Set Write Enable Latch in status0 register) opcode */
+#define WRITE_OP_WREN           0x0600000000000000
+/* To transmit WRITE opcode, address and data */
+#define WRITE_OP_TDR_DATA       0x0300010012345678
+/* N1 shift - tx 8 bytes (transmit opcode, address and data) */
+#define WRITE_OP_COUNTER_CONFIG 0x4000000000002000
+/* SEQ_OP_SELECT_RESPONDER - N1 Shift - SEQ_OP_STOP */
+#define WRITE_OP_SEQUENCER      0x1130100000000000
+
+static void pnv_spi_xscom_write(QTestState *qts, const PnvChip *chip,
+        uint32_t reg, uint64_t val)
+{
+    uint32_t pcba = SPIC2_XSCOM_BASE + reg;
+    qtest_writeq(qts, pnv_xscom_addr(chip, pcba), val);
+}
+
+static uint64_t pnv_spi_xscom_read(QTestState *qts, const PnvChip *chip,
+        uint32_t reg)
+{
+    uint32_t pcba = SPIC2_XSCOM_BASE + reg;
+    return qtest_readq(qts, pnv_xscom_addr(chip, pcba));
+}
+
+static void spi_seeprom_transaction(QTestState *qts, const PnvChip *chip)
+{
+    /* SPI transactions to SEEPROM to read from SEEPROM image */
+    pnv_spi_xscom_write(qts, chip, SPI_CTR_CFG_REG, READ_OP_COUNTER_CONFIG);
+    pnv_spi_xscom_write(qts, chip, SPI_SEQ_OP_REG, READ_OP_SEQUENCER);
+    pnv_spi_xscom_write(qts, chip, SPI_XMIT_DATA_REG, READ_OP_TDR_DATA);
+    pnv_spi_xscom_write(qts, chip, SPI_XMIT_DATA_REG, 0);
+    /* Read 5*8 bytes from SEEPROM at 0x100 */
+    uint64_t rdr_val = pnv_spi_xscom_read(qts, chip, SPI_RCV_DATA_REG);
+    g_test_message("RDR READ = 0x%" PRIx64, rdr_val);
+    rdr_val = pnv_spi_xscom_read(qts, chip, SPI_RCV_DATA_REG);
+    rdr_val = pnv_spi_xscom_read(qts, chip, SPI_RCV_DATA_REG);
+    rdr_val = pnv_spi_xscom_read(qts, chip, SPI_RCV_DATA_REG);
+    rdr_val = pnv_spi_xscom_read(qts, chip, SPI_RCV_DATA_REG);
+    g_test_message("RDR READ = 0x%" PRIx64, rdr_val);
+
+    /* SPI transactions to SEEPROM to write to SEEPROM image */
+    pnv_spi_xscom_write(qts, chip, SPI_CTR_CFG_REG, WRITE_OP_COUNTER_CONFIG);
+    /* Set Write Enable Latch bit of status0 register */
+    pnv_spi_xscom_write(qts, chip, SPI_SEQ_OP_REG, WRITE_OP_SEQUENCER);
+    pnv_spi_xscom_write(qts, chip, SPI_XMIT_DATA_REG, WRITE_OP_WREN);
+    /* write 8 bytes to SEEPROM at 0x100 */
+    pnv_spi_xscom_write(qts, chip, SPI_SEQ_OP_REG, WRITE_OP_SEQUENCER);
+    pnv_spi_xscom_write(qts, chip, SPI_XMIT_DATA_REG, WRITE_OP_TDR_DATA);
+}
+
+static void test_spi_seeprom(const void *data)
+{
+    const PnvChip *chip = data;
+    QTestState *qts = NULL;
+    g_autofree char *tmp_path = NULL;
+    int ret;
+    int fd;
+
+    /* Create a temporary raw image */
+    fd = g_file_open_tmp("qtest-seeprom-XXXXXX", &tmp_path, NULL);
+    g_assert(fd >= 0);
+    ret = ftruncate(fd, FLASH_SIZE);
+    g_assert(ret == 0);
+    close(fd);
+
+    qts = qtest_initf("-machine powernv10 -smp 2,cores=2,"
+                      "threads=1 -accel tcg,thread=single -nographic "
+                      "-blockdev node-name=pib_spic2,driver=file,"
+                      "filename=%s -device 25csm04,bus=pnv-spi-bus.2,cs=0,"
+                      "drive=pib_spic2", tmp_path);
+    spi_seeprom_transaction(qts, chip);
+    qtest_quit(qts);
+    unlink(tmp_path);
+}
+
+int main(int argc, char **argv)
+{
+    g_test_init(&argc, &argv, NULL);
+    char *tname = g_strdup_printf("pnv-xscom/spi-seeprom/%s",
+            pnv_chips[3].cpu_model);
+    qtest_add_data_func(tname, &pnv_chips[3], test_spi_seeprom);
+    g_free(tname);
+    return g_test_run();
+}

diff --git a/tests/qtest/pnv-xscom.h b/tests/qtest/pnv-xscom.h
index 6f62941..5aa1701 100644
--- a/tests/qtest/pnv-xscom.h
+++ b/tests/qtest/pnv-xscom.h

@@ -56,7 +56,7 @@
         .chip_type  = PNV_CHIP_POWER10,
         .cpu_model  = "POWER10",
         .xscom_base = 0x000603fc00000000ull,
-        .cfam_id    = 0x120da04900008000ull,
+        .cfam_id    = 0x220da04980000000ull,
         .first_core = 0x0,
         .num_i2c    = 4,
     },

diff --git a/tests/tcg/ppc64/Makefile.target b/tests/tcg/ppc64/Makefile.target
index 8c3e4e4..509a20b 100644
--- a/tests/tcg/ppc64/Makefile.target
+++ b/tests/tcg/ppc64/Makefile.target

@@ -11,6 +11,18 @@
 
 -include config-cc.mak
 
+# multi-threaded tests are known to fail (e.g., clang-user CI job)
+# See: https://gitlab.com/qemu-project/qemu/-/issues/2456
+run-signals: signals
+	$(call skip-test, $<, "BROKEN (flaky with clang) ")
+run-plugin-signals-with-%:
+	$(call skip-test, $<, "BROKEN (flaky with clang) ")
+
+run-threadcount: threadcount
+	$(call skip-test, $<, "BROKEN (flaky with clang) ")
+run-plugin-threadcount-with-%:
+	$(call skip-test, $<, "BROKEN (flaky with clang) ")
+
 ifneq ($(CROSS_CC_HAS_POWER8_VECTOR),)
 PPC64_TESTS=bcdsub non_signalling_xscv
 endif
commit	93b799fafd9170da3a79a533ea6f73a18de82e22	[log] [tgz]
author	Richard Henderson <richard.henderson@linaro.org>	Fri Jul 26 15:10:45 2024 +1000
committer	Richard Henderson <richard.henderson@linaro.org>	Fri Jul 26 15:10:45 2024 +1000
tree	9fe0972bbff5c2ec6e899ae4dc76cfcc349575f9
parent	8e466dd092469e5ab0f355775c571ea96f3a8e23 [diff]
parent	d741ecffd2ca260ce7875a4596f17736b5ccb7c3 [diff]