Merge branch 'for-upstream-0.15' of git://git.linaro.org/people/pmaydell/qemu-arm
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index adef427..1974d82 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -374,6 +374,7 @@
     ARM_FEATURE_V4T,
     ARM_FEATURE_V5,
     ARM_FEATURE_STRONGARM,
+    ARM_FEATURE_VAPA, /* cp15 VA to PA lookups */
 };
 
 static inline int arm_feature(CPUARMState *env, int feature)
diff --git a/target-arm/helper.c b/target-arm/helper.c
index ae4f334..5b0b492 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -70,13 +70,24 @@
         env->cp15.c0_cachetype = 0x1dd20d2;
         env->cp15.c1_sys = 0x00090078;
         break;
-    case ARM_CPUID_ARM1136_R2:
     case ARM_CPUID_ARM1136:
+        /* This is the 1136 r1, which is a v6K core */
+        set_feature(env, ARM_FEATURE_V6K);
+        /* Fall through */
+    case ARM_CPUID_ARM1136_R2:
+        /* What qemu calls "arm1136_r2" is actually the 1136 r0p2, ie an
+         * older core than plain "arm1136". In particular this does not
+         * have the v6K features.
+         */
         set_feature(env, ARM_FEATURE_V4T);
         set_feature(env, ARM_FEATURE_V5);
         set_feature(env, ARM_FEATURE_V6);
         set_feature(env, ARM_FEATURE_VFP);
         set_feature(env, ARM_FEATURE_AUXCR);
+        /* These ID register values are correct for 1136 but may be wrong
+         * for 1136_r2 (in particular r0p2 does not actually implement most
+         * of the ID registers).
+         */
         env->vfp.xregs[ARM_VFP_FPSID] = 0x410120b4;
         env->vfp.xregs[ARM_VFP_MVFR0] = 0x11111111;
         env->vfp.xregs[ARM_VFP_MVFR1] = 0x00000000;
@@ -92,6 +103,7 @@
         set_feature(env, ARM_FEATURE_V6K);
         set_feature(env, ARM_FEATURE_VFP);
         set_feature(env, ARM_FEATURE_AUXCR);
+        set_feature(env, ARM_FEATURE_VAPA);
         env->vfp.xregs[ARM_VFP_FPSID] = 0x410120b4;
         env->vfp.xregs[ARM_VFP_MVFR0] = 0x11111111;
         env->vfp.xregs[ARM_VFP_MVFR1] = 0x00000000;
@@ -222,6 +234,11 @@
         cpu_abort(env, "Bad CPU ID: %x\n", id);
         break;
     }
+
+    /* Some features automatically imply others: */
+    if (arm_feature(env, ARM_FEATURE_V7)) {
+        set_feature(env, ARM_FEATURE_VAPA);
+    }
 }
 
 void cpu_reset(CPUARMState *env)
@@ -1502,7 +1519,7 @@
             goto bad_reg;
         }
         /* No cache, so nothing to do except VA->PA translations. */
-        if (arm_feature(env, ARM_FEATURE_V6K)) {
+        if (arm_feature(env, ARM_FEATURE_VAPA)) {
             switch (crm) {
             case 4:
                 if (arm_feature(env, ARM_FEATURE_V7)) {
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 34d5e6e..fcb41d1 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -2498,12 +2498,6 @@
         if (op == 2 || (op == 3 && (insn & ARM_CP_RW_BIT)))
             return 1;
     }
-    if (cpn == 7) {
-        /* ISB, DSB, DMB.  */
-        if ((cpm == 5 && op == 4)
-                || (cpm == 10 && (op == 4 || op == 5)))
-            return 1;
-    }
     return 0;
 }
 
@@ -2579,39 +2573,60 @@
         /* cdp */
         return 1;
     }
-    if (IS_USER(s) && !cp15_user_ok(env, insn)) {
-        return 1;
-    }
-
-    /* Pre-v7 versions of the architecture implemented WFI via coprocessor
-     * instructions rather than a separate instruction.
+    /* We special case a number of cp15 instructions which were used
+     * for things which are real instructions in ARMv7. This allows
+     * them to work in linux-user mode which doesn't provide functional
+     * get_cp15/set_cp15 helpers, and is more efficient anyway.
      */
-    if ((insn & 0x0fff0fff) == 0x0e070f90) {
+    switch ((insn & 0x0fff0fff)) {
+    case 0x0e070f90:
         /* 0,c7,c0,4: Standard v6 WFI (also used in some pre-v6 cores).
          * In v7, this must NOP.
          */
+        if (IS_USER(s)) {
+            return 1;
+        }
         if (!arm_feature(env, ARM_FEATURE_V7)) {
             /* Wait for interrupt.  */
             gen_set_pc_im(s->pc);
             s->is_jmp = DISAS_WFI;
         }
         return 0;
-    }
-
-    if ((insn & 0x0fff0fff) == 0x0e070f58) {
+    case 0x0e070f58:
         /* 0,c7,c8,2: Not all pre-v6 cores implemented this WFI,
          * so this is slightly over-broad.
          */
-        if (!arm_feature(env, ARM_FEATURE_V6)) {
+        if (!IS_USER(s) && !arm_feature(env, ARM_FEATURE_V6)) {
             /* Wait for interrupt.  */
             gen_set_pc_im(s->pc);
             s->is_jmp = DISAS_WFI;
             return 0;
         }
-        /* Otherwise fall through to handle via helper function.
+        /* Otherwise continue to handle via helper function.
          * In particular, on v7 and some v6 cores this is one of
          * the VA-PA registers.
          */
+        break;
+    case 0x0e070f3d:
+        /* 0,c7,c13,1: prefetch-by-MVA in v6, NOP in v7 */
+        if (arm_feature(env, ARM_FEATURE_V6)) {
+            return IS_USER(s) ? 1 : 0;
+        }
+        break;
+    case 0x0e070f95: /* 0,c7,c5,4 : ISB */
+    case 0x0e070f9a: /* 0,c7,c10,4: DSB */
+    case 0x0e070fba: /* 0,c7,c10,5: DMB */
+        /* Barriers in both v6 and v7 */
+        if (arm_feature(env, ARM_FEATURE_V6)) {
+            return 0;
+        }
+        break;
+    default:
+        break;
+    }
+
+    if (IS_USER(s) && !cp15_user_ok(env, insn)) {
+        return 1;
     }
 
     rd = (insn >> 12) & 0xf;
@@ -3056,6 +3071,17 @@
                     /* Source and destination the same.  */
                     gen_mov_F0_vreg(dp, rd);
                     break;
+                case 4:
+                case 5:
+                case 6:
+                case 7:
+                    /* VCVTB, VCVTT: only present with the halfprec extension,
+                     * UNPREDICTABLE if bit 8 is set (we choose to UNDEF)
+                     */
+                    if (dp || !arm_feature(env, ARM_FEATURE_VFP_FP16)) {
+                        return 1;
+                    }
+                    /* Otherwise fall through */
                 default:
                     /* One source operand.  */
                     gen_mov_F0_vreg(dp, rm);
@@ -3152,24 +3178,18 @@
                         gen_vfp_sqrt(dp);
                         break;
                     case 4: /* vcvtb.f32.f16 */
-                        if (!arm_feature(env, ARM_FEATURE_VFP_FP16))
-                          return 1;
                         tmp = gen_vfp_mrs();
                         tcg_gen_ext16u_i32(tmp, tmp);
                         gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp, cpu_env);
                         tcg_temp_free_i32(tmp);
                         break;
                     case 5: /* vcvtt.f32.f16 */
-                        if (!arm_feature(env, ARM_FEATURE_VFP_FP16))
-                          return 1;
                         tmp = gen_vfp_mrs();
                         tcg_gen_shri_i32(tmp, tmp, 16);
                         gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp, cpu_env);
                         tcg_temp_free_i32(tmp);
                         break;
                     case 6: /* vcvtb.f16.f32 */
-                        if (!arm_feature(env, ARM_FEATURE_VFP_FP16))
-                          return 1;
                         tmp = tcg_temp_new_i32();
                         gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
                         gen_mov_F0_vreg(0, rd);
@@ -3180,8 +3200,6 @@
                         gen_vfp_msr(tmp);
                         break;
                     case 7: /* vcvtt.f16.f32 */
-                        if (!arm_feature(env, ARM_FEATURE_VFP_FP16))
-                          return 1;
                         tmp = tcg_temp_new_i32();
                         gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
                         tcg_gen_shli_i32(tmp, tmp, 16);
@@ -3382,17 +3400,18 @@
                 VFP_DREG_D(rd, insn);
             else
                 rd = VFP_SREG_D(insn);
-            if (s->thumb && rn == 15) {
-                addr = tcg_temp_new_i32();
-                tcg_gen_movi_i32(addr, s->pc & ~2);
-            } else {
-                addr = load_reg(s, rn);
-            }
             if ((insn & 0x01200000) == 0x01000000) {
                 /* Single load/store */
                 offset = (insn & 0xff) << 2;
                 if ((insn & (1 << 23)) == 0)
                     offset = -offset;
+                if (s->thumb && rn == 15) {
+                    /* This is actually UNPREDICTABLE */
+                    addr = tcg_temp_new_i32();
+                    tcg_gen_movi_i32(addr, s->pc & ~2);
+                } else {
+                    addr = load_reg(s, rn);
+                }
                 tcg_gen_addi_i32(addr, addr, offset);
                 if (insn & (1 << 20)) {
                     gen_vfp_ld(s, dp, addr);
@@ -3404,11 +3423,34 @@
                 tcg_temp_free_i32(addr);
             } else {
                 /* load/store multiple */
+                int w = insn & (1 << 21);
                 if (dp)
                     n = (insn >> 1) & 0x7f;
                 else
                     n = insn & 0xff;
 
+                if (w && !(((insn >> 23) ^ (insn >> 24)) & 1)) {
+                    /* P == U , W == 1  => UNDEF */
+                    return 1;
+                }
+                if (n == 0 || (rd + n) > 32 || (dp && n > 16)) {
+                    /* UNPREDICTABLE cases for bad immediates: we choose to
+                     * UNDEF to avoid generating huge numbers of TCG ops
+                     */
+                    return 1;
+                }
+                if (rn == 15 && w) {
+                    /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
+                    return 1;
+                }
+
+                if (s->thumb && rn == 15) {
+                    /* This is actually UNPREDICTABLE */
+                    addr = tcg_temp_new_i32();
+                    tcg_gen_movi_i32(addr, s->pc & ~2);
+                } else {
+                    addr = load_reg(s, rn);
+                }
                 if (insn & (1 << 24)) /* pre-decrement */
                     tcg_gen_addi_i32(addr, addr, -((insn & 0xff) << 2));
 
@@ -3428,7 +3470,7 @@
                     }
                     tcg_gen_addi_i32(addr, addr, offset);
                 }
-                if (insn & (1 << 21)) {
+                if (w) {
                     /* writeback */
                     if (insn & (1 << 24))
                         offset = -offset * n;