[kernel][test] Microbenchmarks for brwlock

Adds microbenchmarks for uncontended reader and writer acquisition, similar to that of
the mutex microbenchmark.

ZX-3085 #comment

Test: k ut bench

Change-Id: Id6a78d217b3d437e8c2008ef9ba53434754485b4
diff --git a/kernel/tests/benchmarks.cpp b/kernel/tests/benchmarks.cpp
index 67a3062..8b86056 100644
--- a/kernel/tests/benchmarks.cpp
+++ b/kernel/tests/benchmarks.cpp
@@ -10,6 +10,7 @@
 #include <arch/ops.h>
 #include <err.h>
 #include <inttypes.h>
+#include <kernel/brwlock.h>
 #include <kernel/mp.h>
 #include <kernel/mutex.h>
 #include <kernel/spinlock.h>
@@ -270,6 +271,28 @@
     printf("%" PRIu64 " cycles to acquire/release uncontended mutex %u times (%" PRIu64 " cycles per)\n", c, count, c / count);
 }
 
+__NO_INLINE static void bench_rwlock() {
+    BrwLock rw;
+    static const uint count = 128 * 1024 * 1024;
+    uint64_t c = arch_cycle_count();
+    for (size_t i = 0; i < count; i++) {
+        rw.ReadAcquire();
+        rw.ReadRelease();
+    }
+    c = arch_cycle_count() - c;
+
+    printf("%" PRIu64 " cycles to acquire/release uncontended brwlock for read %u times (%" PRIu64 " cycles per)\n", c, count, c / count);
+
+    c = arch_cycle_count();
+    for (size_t i = 0; i < count; i++) {
+        rw.WriteAcquire();
+        rw.WriteRelease();
+    }
+    c = arch_cycle_count() - c;
+
+    printf("%" PRIu64 " cycles to acquire/release uncontended brwlock for write %u times (%" PRIu64 " cycles per)\n", c, count, c / count);
+}
+
 int benchmarks(int, const cmd_args*, uint32_t) {
     bench_set_overhead();
     bench_memcpy();
@@ -286,6 +309,7 @@
 
     bench_spinlock();
     bench_mutex();
+    bench_rwlock();
 
     return 0;
 }