[kernel][vm] Work around protect bug

This is a temporary workaround for fxbug.dev/90014 that simply reduces
the permissions of any arch aspace protect operation to not include the
write permission.

Bug: 90014
Change-Id: I118fd42f0693e62ca71e8389e5b085893f341136
Reviewed-on: https://fuchsia-review.googlesource.com/c/fuchsia/+/615461
Reviewed-by: Nick Maniscalco <maniscalco@google.com>
Commit-Queue: Adrian Danis <adanis@google.com>
diff --git a/zircon/kernel/vm/vm_address_region.cc b/zircon/kernel/vm/vm_address_region.cc
index 9146fe778..69416cf0 100644
--- a/zircon/kernel/vm/vm_address_region.cc
+++ b/zircon/kernel/vm/vm_address_region.cc
@@ -1050,13 +1050,16 @@
   vmo->set_name(name, strlen(name));
   // allocate a region and put it in the aspace list
   fbl::RefPtr<VmMapping> r(nullptr);
-  // Here we use permissive arch_mmu_flags so that the following Protect call would actually
-  // call arch_aspace().Protect to change the mmu_flags in PTE.
-  status = CreateVmMapping(
-      offset, size, 0, VMAR_FLAG_SPECIFIC, vmo, 0,
-      ARCH_MMU_FLAG_PERM_READ | ARCH_MMU_FLAG_PERM_WRITE | ARCH_MMU_FLAG_PERM_EXECUTE, name, &r);
+  status = CreateVmMapping(offset, size, 0, VMAR_FLAG_SPECIFIC, vmo, 0, arch_mmu_flags, name, &r);
   if (status != ZX_OK) {
     return status;
   }
-  return r->Protect(base, size, arch_mmu_flags);
+  // Directly invoke a protect on the hardware aspace to modify the protection of the existing
+  // mappings. If the desired protection flags is "no permissions" then we need to use unmap instead
+  // of protect since a mapping with no permissions is not valid on most architectures.
+  if ((arch_mmu_flags & ARCH_MMU_FLAG_PERM_RWX_MASK) == 0) {
+    return aspace_->arch_aspace().Unmap(base, size / PAGE_SIZE, nullptr);
+  } else {
+    return aspace_->arch_aspace().Protect(base, size / PAGE_SIZE, arch_mmu_flags);
+  }
 }
diff --git a/zircon/kernel/vm/vm_mapping.cc b/zircon/kernel/vm/vm_mapping.cc
index aaffded..68ab91f 100644
--- a/zircon/kernel/vm/vm_mapping.cc
+++ b/zircon/kernel/vm/vm_mapping.cc
@@ -34,6 +34,7 @@
 KCOUNTER(vm_mapping_attribution_cache_hits, "vm.attributed_pages.mapping.cache_hits")
 KCOUNTER(vm_mapping_attribution_cache_misses, "vm.attributed_pages.mapping.cache_misses")
 KCOUNTER(vm_mappings_merged, "vm.aspace.mapping.merged_neighbors")
+KCOUNTER(vm_mappings_protect_no_write, "vm.aspace.mapping.protect_without_write")
 
 }  // namespace
 
@@ -146,6 +147,23 @@
 // Implementation helper for ProtectLocked
 zx_status_t ProtectOrUnmap(const fbl::RefPtr<VmAspace>& aspace, vaddr_t base, size_t size,
                            uint new_arch_mmu_flags) {
+  // If the desired arch mmu flags contain the WRITE permissions then we cannot go and change any
+  // existing mappings to be writeable. This is because the backing VMO might be wanting to do
+  // copy-on-write, and changing the permissions will prevent that and potentially allow writing to
+  // what was supposed to be a read-only page provided by the VMO. We cannot do nothing though
+  // since we might be removing permissions. For now we simply set the permissions to what was
+  // requested, just without write. This is not optimal since there could be legitimate existing
+  // writable mappings that we will now go and needlessly remove write from, however it is at least
+  // correct. Unfortunately it is not correct if this is a kernel aspace, since we cannot use faults
+  // to bring back in the correct mappings. For now we require, via an assert, that this is not a
+  // kernel aspace.
+  // TODO(fxb/90014): Make this more optimal and find a solution for the kernel path.
+  if (new_arch_mmu_flags & ARCH_MMU_FLAG_PERM_WRITE) {
+    ASSERT(aspace->is_user());
+    vm_mappings_protect_no_write.Add(1);
+    new_arch_mmu_flags &= ~ARCH_MMU_FLAG_PERM_WRITE;
+  }
+
   if (new_arch_mmu_flags & ARCH_MMU_FLAG_PERM_RWX_MASK) {
     return aspace->arch_aspace().Protect(base, size / PAGE_SIZE, new_arch_mmu_flags);
   } else {
diff --git a/zircon/system/utest/core/vmar/vmar.cc b/zircon/system/utest/core/vmar/vmar.cc
index aeeaa05..0f17d20 100644
--- a/zircon/system/utest/core/vmar/vmar.cc
+++ b/zircon/system/utest/core/vmar/vmar.cc
@@ -2249,4 +2249,43 @@
             ZX_OK);
 }
 
+TEST(Vmar, ProtectCowWritable) {
+  zx::vmo vmo;
+  ASSERT_OK(zx::vmo::create(zx_system_get_page_size() * 2, 0, &vmo));
+
+  uint64_t val = 42;
+  EXPECT_OK(vmo.write(&val, 0, sizeof(uint64_t)));
+
+  zx::vmo clone;
+  ASSERT_OK(vmo.create_child(ZX_VMO_CHILD_SNAPSHOT, 0, zx_system_get_page_size() * 2, &clone));
+
+  // Map the clone in read/write.
+  zx_vaddr_t addr;
+  ASSERT_OK(zx::vmar::root_self()->map(ZX_VM_PERM_READ | ZX_VM_PERM_WRITE, 0, clone, 0,
+                                       zx_system_get_page_size() * 2, &addr));
+
+  // Protect it read-only.
+  EXPECT_OK(zx::vmar::root_self()->protect(ZX_VM_PERM_READ, addr, zx_system_get_page_size() * 2));
+
+  // Perform some reads to ensure there are mappings.
+  uint64_t val2 = *reinterpret_cast<uint64_t*>(addr);
+  EXPECT_OK(clone.read(&val, 0, sizeof(uint64_t)));
+  EXPECT_EQ(val2, val);
+
+  // Now protect the first page back to write.
+  EXPECT_OK(zx::vmar::root_self()->protect(ZX_VM_PERM_READ | ZX_VM_PERM_WRITE, addr,
+                                           zx_system_get_page_size()));
+
+  // Write to the page.
+  *reinterpret_cast<volatile uint64_t*>(addr) = 77;
+
+  // Original vmo should be unchanged.
+  EXPECT_OK(vmo.read(&val, 0, sizeof(uint64_t)));
+  EXPECT_EQ(42, val);
+
+  // Clone should have been modified.
+  EXPECT_OK(clone.read(&val, 0, sizeof(uint64_t)));
+  EXPECT_EQ(77, val);
+}
+
 }  // namespace