[ram] Full AML Ram driver

This driver implements
fuchsia.hardware.ram.metrics:fuchsia.hardware.ram.metrics

Driver is able to report bandwidth bus metrics
for multiple clients. The basic CLI client
In review https://fuchsia-review.googlesource.com/c/fuchsia/+/384801

Test: New test added
fx test fuchsia-pkg://fuchsia.com/aml-ram-test#meta/aml-ram-test.cmx

Bug: 48254

Change-Id: Iad14268cdb3b3134281409010d89a6269fb9f35e
Reviewed-on: https://fuchsia-review.googlesource.com/c/fuchsia/+/383717
Commit-Queue: Carlos Pizano <cpu@google.com>
Testability-Review: Suraj Malhotra <surajmalhotra@google.com>
Testability-Review: Andres Oportus <andresoportus@google.com>
Reviewed-by: Andres Oportus <andresoportus@google.com>
Reviewed-by: Suraj Malhotra <surajmalhotra@google.com>
diff --git a/src/devices/BUILD.gn b/src/devices/BUILD.gn
index 20ed830..3611ef6 100644
--- a/src/devices/BUILD.gn
+++ b/src/devices/BUILD.gn
@@ -29,6 +29,7 @@
     "power:tests",
     "pwm:tests",
     "pwm:tests",
+    "ram:tests",
     "serial:tests",
     "shareddma:tests",
     "tests",
diff --git a/src/devices/ram/BUILD.gn b/src/devices/ram/BUILD.gn
new file mode 100644
index 0000000..4f9ba9d9
--- /dev/null
+++ b/src/devices/ram/BUILD.gn
@@ -0,0 +1,9 @@
+# Copyright 2020 The Fuchsia Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+group("tests") {
+  testonly = true
+
+  deps = [ "drivers:tests" ]
+}
diff --git a/src/devices/ram/drivers/BUILD.gn b/src/devices/ram/drivers/BUILD.gn
new file mode 100644
index 0000000..098f975
--- /dev/null
+++ b/src/devices/ram/drivers/BUILD.gn
@@ -0,0 +1,10 @@
+# Copyright 2020 The Fuchsia Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import("//build/unification/images/migrated_manifest.gni")
+
+group("tests") {
+  testonly = true
+  deps = [ "aml-ram:tests" ]
+}
diff --git a/src/devices/ram/drivers/aml-ram/BUILD.gn b/src/devices/ram/drivers/aml-ram/BUILD.gn
index 3cb8539..dbd751c 100644
--- a/src/devices/ram/drivers/aml-ram/BUILD.gn
+++ b/src/devices/ram/drivers/aml-ram/BUILD.gn
@@ -3,6 +3,8 @@
 # found in the LICENSE file.
 
 import("//build/config/fuchsia/rules.gni")
+import("//build/test.gni")
+import("//build/test/test_package.gni")
 import("//build/unification/images/migrated_manifest.gni")
 
 driver_module("aml-ram") {
@@ -15,12 +17,40 @@
   }
   sources = [ "aml-ram.cc" ]
   deps = [
-    "//src/devices/bus/lib/device-protocol-pdev",
-    "//src/devices/lib/amlogic",
+    ":common",
     "//src/devices/lib/driver",
     "//zircon/public/lib/ddk",
+    "//zircon/public/lib/mmio",
+  ]
+}
+
+test("aml-ram-test") {
+  configs += [ "//build/unification/config:zircon-migrated" ]
+  output_name = "aml-ram-test"
+  sources = [
+    "aml-ram-test.cc",
+    "aml-ram.cc",
+  ]
+  deps = [
+    ":common",
+    "//src/devices/testing/fake-bti",
+    "//src/devices/testing/fake-mmio-reg",
+    "//src/devices/testing/fake_ddk",
+    "//zircon/public/lib/mock-function",
+    "//zircon/public/lib/zxtest",
+  ]
+}
+
+group("common") {
+  public_deps = [
+    "//sdk/fidl/fuchsia.hardware.ram.metrics:fuchsia.hardware.ram.metrics_llcpp",
+    "//src/devices/bus/lib/device-protocol-pdev",
+    "//src/devices/bus/lib/device-protocol-platform-device",
+    "//src/devices/lib/amlogic",
+    "//zircon/public/lib/ddk",
     "//zircon/public/lib/ddktl",
     "//zircon/public/lib/fbl",
+    "//zircon/public/lib/hwreg",
     "//zircon/public/lib/mmio",
     "//zircon/public/lib/sync",
     "//zircon/public/lib/zircon-internal",
@@ -32,3 +62,20 @@
 migrated_manifest("aml-ram-manifest") {
   deps = [ ":aml-ram" ]
 }
+
+unittest_package("aml-ram-test-package") {
+  package_name = "aml-ram-test"
+  deps = [ ":aml-ram-test" ]
+
+  tests = [
+    {
+      name = "aml-ram-test"
+      environments = basic_envs
+    },
+  ]
+}
+
+group("tests") {
+  testonly = true
+  deps = [ ":aml-ram-test-package" ]
+}
diff --git a/src/devices/ram/drivers/aml-ram/aml-ram-test.cc b/src/devices/ram/drivers/aml-ram/aml-ram-test.cc
new file mode 100644
index 0000000..02a17b2
--- /dev/null
+++ b/src/devices/ram/drivers/aml-ram/aml-ram-test.cc
@@ -0,0 +1,219 @@
+// Copyright 2020 The Fuchsia Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "aml-ram.h"
+
+#include <lib/device-protocol/pdev.h>
+#include <lib/fake-bti/bti.h>
+#include <lib/fake_ddk/fake_ddk.h>
+#include <lib/fake_ddk/fidl-helper.h>
+
+#include <atomic>
+#include <memory>
+
+#include <ddktl/device.h>
+#include <ddktl/protocol/platform/bus.h>
+#include <ddktl/protocol/platform/device.h>
+#include <fake-mmio-reg/fake-mmio-reg.h>
+
+namespace amlogic_ram {
+
+constexpr size_t kRegSize = 0x01000 / sizeof(uint32_t);
+
+class FakePDev : public ddk::PDevProtocol<FakePDev, ddk::base_protocol> {
+ public:
+  FakePDev() : proto_({&pdev_protocol_ops_, this}) {
+    regs_ = std::make_unique<ddk_fake::FakeMmioReg[]>(kRegSize);
+    mmio_ = std::make_unique<ddk_fake::FakeMmioRegRegion>(regs_.get(), sizeof(uint32_t), kRegSize);
+  }
+
+  zx_status_t PDevGetMmio(uint32_t index, pdev_mmio_t* out_mmio) {
+    EXPECT_EQ(index, 0);
+    out_mmio->offset = reinterpret_cast<size_t>(this);
+    return ZX_OK;
+  }
+
+  zx_status_t PDevGetInterrupt(uint32_t index, uint32_t flags, zx::interrupt* out_irq) {
+    return ZX_ERR_NOT_SUPPORTED;
+  }
+
+  zx_status_t PDevGetBti(uint32_t index, zx::bti* out_bti) { return ZX_ERR_NOT_SUPPORTED; }
+
+  zx_status_t PDevGetSmc(uint32_t index, zx::resource* out_resource) {
+    return ZX_ERR_NOT_SUPPORTED;
+  }
+
+  zx_status_t PDevGetDeviceInfo(pdev_device_info_t* out_info) { return ZX_ERR_NOT_SUPPORTED; }
+
+  zx_status_t PDevGetBoardInfo(pdev_board_info_t* out_info) { return ZX_ERR_NOT_SUPPORTED; }
+
+  const pdev_protocol_t* proto() const { return &proto_; }
+  ddk::MmioBuffer mmio() { return ddk::MmioBuffer(mmio_->GetMmioBuffer()); }
+
+  ddk_fake::FakeMmioReg& reg(size_t ix) {
+    // AML registers are in virtual address units.
+    return regs_[ix >> 2];
+  }
+
+ private:
+  pdev_protocol_t proto_;
+  std::unique_ptr<ddk_fake::FakeMmioReg[]> regs_;
+  std::unique_ptr<ddk_fake::FakeMmioRegRegion> mmio_;
+};
+
+class Ddk : public fake_ddk::Bind {
+ public:
+  Ddk() {}
+  bool added() { return add_called_; }
+  const device_add_args_t& args() { return add_args_; }
+
+ private:
+  zx_status_t DeviceAdd(zx_driver_t* drv, zx_device_t* parent, device_add_args_t* args,
+                        zx_device_t** out) override {
+    zx_status_t status = fake_ddk::Bind::DeviceAdd(drv, parent, args, out);
+    if (status != ZX_OK) {
+      return status;
+    }
+    add_args_ = *args;
+    return ZX_OK;
+  }
+  device_add_args_t add_args_;
+};
+
+class AmlRamDeviceTest : public zxtest::Test {
+ public:
+  void SetUp() override {
+    fbl::Array<fake_ddk::ProtocolEntry> protocols(new fake_ddk::ProtocolEntry[1], 1);
+    protocols[0] = {ZX_PROTOCOL_PDEV, {pdev_.proto()->ops, pdev_.proto()->ctx}};
+    ddk_.SetProtocols(std::move(protocols));
+
+    EXPECT_OK(amlogic_ram::AmlRam::Create(nullptr, fake_ddk::FakeParent()));
+  }
+
+  void TearDown() override {
+    auto device = static_cast<amlogic_ram::AmlRam*>(ddk_.args().ctx);
+    device->DdkAsyncRemove();
+    EXPECT_TRUE(ddk_.Ok());
+  }
+
+ protected:
+  FakePDev pdev_;
+  Ddk ddk_;
+};
+
+void WriteDisallowed(uint64_t value) { EXPECT_TRUE(false, "got register write of 0x%lx", value); }
+
+TEST_F(AmlRamDeviceTest, InitDoesNothing) {
+  // By itself the driver does not write to registers.
+  // The fixture's TearDown() test also the lifecycle bits.
+  pdev_.reg(MEMBW_PORTS_CTRL).SetWriteCallback(&WriteDisallowed);
+  pdev_.reg(MEMBW_TIMER).SetWriteCallback(&WriteDisallowed);
+}
+
+TEST_F(AmlRamDeviceTest, MalformedRequests) {
+  // An invalid request does not write to registers.
+  pdev_.reg(MEMBW_PORTS_CTRL).SetWriteCallback(&WriteDisallowed);
+  pdev_.reg(MEMBW_TIMER).SetWriteCallback(&WriteDisallowed);
+
+  ram_metrics::BandwidthMeasurementConfig config;
+  ram_metrics::Device::SyncClient client{std::move(ddk_.FidlClient())};
+
+  // Invalid cycles (too low).
+  config = {(200), {1, 0, 0, 0, 0, 0}};
+  auto info = client.MeasureBandwidth(config);
+  ASSERT_TRUE(info.ok());
+  ASSERT_TRUE(info->result.is_err());
+  EXPECT_EQ(info->result.err(), ZX_ERR_INVALID_ARGS);
+
+  // Invalid channel (above channel 3).
+  config = {(1024 * 1024 * 10), {0, 0, 0, 0, 1}};
+  info = client.MeasureBandwidth(config);
+  ASSERT_TRUE(info.ok());
+  ASSERT_TRUE(info->result.is_err());
+  EXPECT_EQ(info->result.err(), ZX_ERR_INVALID_ARGS);
+}
+
+#if 0
+// This test is disabled until we have fxb/51461 fixed.
+
+TEST_F(AmlRamDeviceTest, ValidRequest) {
+  constexpr uint32_t kCyclesToMeasure = (1024 * 1024 * 10u);
+  constexpr uint32_t kControlStart = DMC_QOS_ENABLE_CTRL | 0b0111;
+  constexpr uint32_t kControlStop = DMC_QOS_CLEAR_CTRL | 0b0111;
+  constexpr uint32_t kReadCycles[] = {0x125001, 0x124002, 0x123003, 0x0};
+
+  ram_metrics::BandwidthMeasurementConfig config = {kCyclesToMeasure, {4, 2, 1, 0, 0, 0}};
+
+  // |step| helps track of the expected sequence of reads and writes.
+  std::atomic<int> step = 0;
+
+  pdev_.reg(MEMBW_TIMER).SetWriteCallback([expect = kCyclesToMeasure, &step](size_t value) {
+    EXPECT_EQ(step, 0, "unexpected: 0x%lx", value);
+    ++step;
+    EXPECT_EQ(value, expect, "got write of 0x%lx", value);
+  });
+
+  pdev_.reg(MEMBW_PORTS_CTRL)
+      .SetWriteCallback([start = kControlStart, stop = kControlStop, &step](size_t value) {
+        if (step == 1) {
+          EXPECT_EQ(value, start, "0: got write of 0x%lx", value);
+          ++step;
+        } else if (step == 3) {
+          EXPECT_EQ(value, stop, "2: got write of 0x%lx", value);
+          ++step;
+        } else {
+          EXPECT_TRUE(false, "unexpected: 0x%lx", value);
+        }
+      });
+
+  pdev_.reg(MEMBW_C0_GRANT_CNT).SetReadCallback([&step, value = kReadCycles[0]]() {
+    EXPECT_EQ(step, 2);
+    // Value of channel 0 cycles granted.
+    return value;
+  });
+
+  pdev_.reg(MEMBW_C1_GRANT_CNT).SetReadCallback([&step, value = kReadCycles[1]]() {
+    EXPECT_EQ(step, 2);
+    // Value of channel 1 cycles granted.
+    return value;
+  });
+
+  pdev_.reg(MEMBW_C2_GRANT_CNT).SetReadCallback([&step, value = kReadCycles[2]]() {
+    EXPECT_EQ(step, 2);
+    // Value of channel 2 cycles granted.
+    return value;
+  });
+
+  pdev_.reg(MEMBW_C2_GRANT_CNT).SetReadCallback([&step, value = kReadCycles[3]]() {
+    EXPECT_EQ(step, 2);
+    ++step;
+    // Value of channel 3 cycles granted.
+    return value;
+  });
+
+  ram_metrics::Device::SyncClient client{std::move(ddk_.FidlClient())};
+  auto info = client.MeasureBandwidth(config);
+  ASSERT_TRUE(info.ok());
+  ASSERT_FALSE(info->result.is_err());
+
+  // All reads and writes happened.
+  EXPECT_EQ(step, 4);
+}
+#endif
+
+}  // namespace amlogic_ram
+
+// We replace this method to allow the FakePDev::PDevGetMmio() to work
+// with the driver unmodified. The real implementation tries to map a VMO that
+// we can't properly fake at the moment.
+zx_status_t ddk::PDev::MapMmio(uint32_t index, std::optional<ddk::MmioBuffer>* mmio) {
+  pdev_mmio_t pdev_mmio;
+  zx_status_t status = GetMmio(index, &pdev_mmio);
+  if (status != ZX_OK) {
+    return status;
+  }
+  auto* src = reinterpret_cast<amlogic_ram::FakePDev*>(pdev_mmio.offset);
+  mmio->emplace(src->mmio());
+  return ZX_OK;
+}
diff --git a/src/devices/ram/drivers/aml-ram/aml-ram.cc b/src/devices/ram/drivers/aml-ram/aml-ram.cc
index 30e74df..3e84a69 100644
--- a/src/devices/ram/drivers/aml-ram/aml-ram.cc
+++ b/src/devices/ram/drivers/aml-ram/aml-ram.cc
@@ -5,6 +5,7 @@
 #include "aml-ram.h"
 
 #include <lib/device-protocol/pdev.h>
+#include <lib/zx/clock.h>
 #include <zircon/assert.h>
 
 #include <ddk/binding.h>
@@ -13,131 +14,37 @@
 #include <ddk/platform-defs.h>
 #include <ddktl/fidl.h>
 #include <fbl/alloc_checker.h>
+#include <fbl/auto_lock.h>
 #include <soc/aml-s905d2/s905d2-hw.h>
 
 namespace amlogic_ram {
-// There are 4 monitoring channels and each one can agregate up to 64
-// hardware memory ports. NOTE: the word channel and port in this file
-// refer to hardware, not to zircon objects.
-constexpr size_t MEMBW_MAX_CHANNELS = 4u;
 
-// Controls start,stop and if polling or interrupt mode.
-constexpr uint32_t MEMBW_PORTS_CTRL = (0x0020 << 2);
-constexpr uint32_t DMC_QOS_ENABLE_CTRL = (0x01 << 31);
-constexpr uint32_t DMC_QOS_CLEAR_CTRL = (0x01 << 30);
+constexpr zx_signals_t kCancelSignal = ZX_USER_SIGNAL_0;
+constexpr zx_signals_t kWorkPendingSignal = ZX_USER_SIGNAL_1;
 
-// Returns the granted cycles counters total.
-constexpr uint32_t MEMBW_ALL_GRANT_CNT = (0x002a << 2);
-// Returns the granted cycles per channel.
-constexpr uint32_t MEMBW_C0_GRANT_CNT = (0x2b << 2);
-constexpr uint32_t MEMBW_C1_GRANT_CNT = (0x2c << 2);
-constexpr uint32_t MEMBW_C2_GRANT_CNT = (0x2d << 2);
-constexpr uint32_t MEMBW_C3_GRANT_CNT = (0x2e << 2);
+constexpr size_t kMaxPendingRequests = 64u;
 
-// Controls how long to measure cycles for.
-constexpr uint32_t MEMBW_TIMER = (0x002f << 2);
+zx_status_t ValidateRequest(const ram_metrics::BandwidthMeasurementConfig& config) {
+  // Restrict timer to reasonable values.
+  if ((config.cycles_to_measure < kMinimumCycleCount) ||
+      (config.cycles_to_measure > kMaximumCycleCount)) {
+    return ZX_ERR_INVALID_ARGS;
+  }
 
-// Controls which ports are assigned to each channel.
-constexpr uint32_t MEMBW_RP[MEMBW_MAX_CHANNELS] = {(0x0021 << 2), (0x0023 << 2), (0x0025 << 2),
-                                                   (0x0027 << 2)};
+  for (size_t ix = 0; ix != ram_metrics::MAX_COUNT_CHANNELS; ++ix) {
+    auto& channel = config.channels[ix];
 
-// Controls wich subports are assinged to each channel.
-constexpr uint32_t MEMBW_SP[MEMBW_MAX_CHANNELS] = {(0x0022 << 2), (0x0024 << 2), (0x0026 << 2),
-                                                   (0x0028 << 2)};
+    if ((ix >= MEMBW_MAX_CHANNELS) && (channel != 0)) {
+      // We only support the first four channels.
+      return ZX_ERR_INVALID_ARGS;
+    }
 
-constexpr double kMemCyclePerSecond = (912.0 / 2.0);
-constexpr uint32_t kMemCycleCount = 1024 * 1024 * 57u;
-
-// Ports constants for Astro and Sherlock.
-// TODO(cpu) move this constants out of the driver and into the client.
-#define PortID_ARM_AE (0x01u << 0)
-#define PortID_MALI (0x01u << 1)
-#define PortID_PCIE (0x01u << 2)
-#define PortID_HDCP (0x01u << 3)
-#define PortID_HEVC_FRONT (0x01u << 4)
-#define PortID_TEST (0x01u << 5)
-#define PortID_USB30 (0x01u << 6)
-#define PortID_HEVC_BACK (0x01u << 8)
-#define PortID_H265_ENC (0x01u << 9)
-#define PortID_VPU_R1 (0x01u << 16)
-#define PortID_VPU_R2 (0x01u << 17)
-#define PortID_VPU_R3 (0x01u << 18)
-#define PortID_VPU_W1 (0x01u << 19)
-#define PortID_VPU_W2 (0x01u << 20)
-#define PortID_VDEC (0x01u << 21)
-#define PortID_HCODEC (0x01u << 22)
-#define PortID_GE2D (0x01u << 23)
-// Sherlock-only ports.
-#define PortID_NNA (0x01u << 10)
-#define PortID_GDC (0x01u << 11)
-#define PortID_MIPI_ISP (0x01u << 12)
-#define PortID_ARM_AF (0x01u << 13)
-
-struct BwPorts {
-  // Each port is a single bit per channel.
-  uint64_t chn[MEMBW_MAX_CHANNELS];
-};
-
-constexpr double CalcBandwidth(uint32_t counter) {
-  return (static_cast<double>(counter) * 16.0 * kMemCyclePerSecond) / kMemCycleCount;
-}
-
-void InitBandwithTimer(ddk::MmioBuffer& mmio) {
-  // The only thing to init is how many cycles to sample for
-  // bandwith calculations.
-  mmio.Write32(kMemCycleCount, MEMBW_TIMER);
-}
-
-// Read the counters and output the bandwith computation via zxlog.
-// This only happens if the driver has driver.aml_ram.log=+trace.
-zx_status_t ReadBandwithCounters(ddk::MmioBuffer& mmio, const BwPorts& ports, zx::event& event) {
-  uint32_t channels_enabled = 0u;
-  for (size_t ix = 0; ix != MEMBW_MAX_CHANNELS; ++ix) {
-    if (ports.chn[ix] > 0xffffffff) {
+    if (channel > 0xffffffff) {
       // We don't support sub-ports (bits above 31) yet.
       return ZX_ERR_NOT_SUPPORTED;
     }
-    channels_enabled |= (ports.chn[ix] != 0) ? (1u << ix) : 0;
-    mmio.Write32(static_cast<uint32_t>(ports.chn[ix]), MEMBW_RP[ix]);
-    mmio.Write32(0xffff, MEMBW_SP[ix]);
   }
 
-  if (channels_enabled == 0x0) {
-    // Nothing to monitor.
-    return ZX_OK;
-  }
-
-  mmio.Write32(channels_enabled | DMC_QOS_ENABLE_CTRL, MEMBW_PORTS_CTRL);
-
-  uint32_t value = mmio.Read32(MEMBW_PORTS_CTRL);
-  uint32_t count = 0;
-
-  // Polling loop for the bandwith cycle counters.
-  // TODO(cpu): tune wait interval to minimize polling.
-  while (value & DMC_QOS_ENABLE_CTRL) {
-    if (count++ > 20) {
-      return ZX_ERR_TIMED_OUT;
-    }
-
-    auto status = event.wait_one(ZX_EVENT_SIGNALED, zx::deadline_after(zx::msec(50)), nullptr);
-    if (status != ZX_ERR_TIMED_OUT) {
-      // Likely shutdown. The caller handles this.
-      return ZX_OK;
-    }
-    value = mmio.Read32(MEMBW_PORTS_CTRL);
-  }
-
-  auto counter_all = mmio.Read32(MEMBW_ALL_GRANT_CNT);
-  auto counter_0 = mmio.Read32(MEMBW_C0_GRANT_CNT);
-  auto counter_1 = mmio.Read32(MEMBW_C1_GRANT_CNT);
-  auto counter_2 = mmio.Read32(MEMBW_C2_GRANT_CNT);
-  auto counter_3 = mmio.Read32(MEMBW_C3_GRANT_CNT);
-
-  zxlogf(TRACE, "aml-ram: bw:%g %g %g %g %g cz:%d", CalcBandwidth(counter_all),
-         CalcBandwidth(counter_0), CalcBandwidth(counter_1), CalcBandwidth(counter_2),
-         CalcBandwidth(counter_3), count);
-
-  mmio.Write32(channels_enabled | DMC_QOS_CLEAR_CTRL, MEMBW_PORTS_CTRL);
   return ZX_OK;
 }
 
@@ -168,16 +75,20 @@
 }
 
 AmlRam::AmlRam(zx_device_t* parent, ddk::MmioBuffer mmio)
-    : DeviceType(parent), mmio_(std::move(mmio)) {
-  if (zxlog_level_enabled(TRACE)) {
-    auto status = zx::event::create(0u, &shutdown_);
-    ZX_ASSERT(status == ZX_OK);
-    thread_ = std::thread([this] { ReadLoop(); });
-  }
+    : DeviceType(parent), mmio_(std::move(mmio)) {}
+
+AmlRam::~AmlRam() {
+  // Verify we drained all requests.
+  ZX_ASSERT(requests_.empty());
 }
 
 void AmlRam::DdkSuspend(ddk::SuspendTxn txn) {
-  Shutdown();
+  // TODO(cpu): First put the device into txn.requested_state().
+  if (txn.suspend_reason() & (DEVICE_SUSPEND_REASON_POWEROFF | DEVICE_SUSPEND_REASON_MEXEC |
+                              DEVICE_SUSPEND_REASON_REBOOT)) {
+    // Do any additional cleanup that is needed while shutting down the driver.
+    Shutdown();
+  }
   txn.Reply(ZX_OK, txn.requested_state());
 }
 
@@ -188,49 +99,171 @@
 
 zx_status_t AmlRam::DdkMessage(fidl_msg_t* msg, fidl_txn_t* txn) {
   DdkTransaction transaction(txn);
-  // TODO(cpu): Implement dispatch of the fuchsia.hardware.ram.metrics
-  // FIDL interface. This controls which ports to sample rather than
-  // hardcoding them in ReadLoop().
+  ram_metrics::Device::Dispatch(this, msg, &transaction);
   return transaction.Status();
 }
 
-// TODO(cpu): Remove this function once the FIDL protocol is implemented.
-void AmlRam::ReadLoop() {
-  constexpr uint32_t kDecoder = PortID_HEVC_FRONT | PortID_HEVC_BACK | PortID_VDEC | PortID_HCODEC;
-  constexpr uint32_t kVPU =
-      PortID_VPU_R1 | PortID_VPU_R2 | PortID_VPU_R3 | PortID_VPU_W1 | PortID_VPU_W2;
+void AmlRam::MeasureBandwidth(ram_metrics::BandwidthMeasurementConfig config,
+                              MeasureBandwidthCompleter::Sync completer) {
+  zx_status_t st = ValidateRequest(config);
+  if (st != ZX_OK) {
+    zxlogf(ERROR, "aml-ram: bad request\n");
+    completer.ReplyError(st);
+    return;
+  }
 
-  // Sample the following 4 channels for the time being.
-  const BwPorts kPorts = {{PortID_ARM_AE, PortID_MALI, kDecoder, kVPU}};
-  InitBandwithTimer(mmio_);
-
-  for (;;) {
-    auto status = ReadBandwithCounters(mmio_, kPorts, shutdown_);
+  if (!thread_control_) {
+    // First request.
+    auto status = zx::event::create(0u, &thread_control_);
     if (status != ZX_OK) {
-      zxlogf(ERROR, "aml-ram: error reading counters, st =%d", status);
+      completer.ReplyError(st);
+      zxlogf(ERROR, "aml-ram: could not create event\n");
+      return;
+    }
+    thread_ = std::thread([this] { ReadLoop(); });
+  }
+
+  {
+    fbl::AutoLock lock(&lock_);
+
+    if (requests_.size() > kMaxPendingRequests) {
+      // Once the queue is shorter the request would likely succeed.
+      completer.ReplyError(ZX_ERR_SHOULD_WAIT);
       return;
     }
 
+    // Enqueue task and signal worker thread as needed.
+    requests_.emplace_back(std::move(config), completer.ToAsync());
+    if (requests_.size() == 1u) {
+      thread_control_.signal(0, kWorkPendingSignal);
+    }
+  }
+}
+
+zx_status_t AmlRam::ReadBandwithCounters(const ram_metrics::BandwidthMeasurementConfig& config,
+                                         ram_metrics::BandwidthInfo* bpi) {
+  uint32_t channels_enabled = 0u;
+  for (size_t ix = 0; ix != MEMBW_MAX_CHANNELS; ++ix) {
+    channels_enabled |= (config.channels[ix] != 0) ? (1u << ix) : 0;
+    mmio_.Write32(static_cast<uint32_t>(config.channels[ix]), MEMBW_RP[ix]);
+    mmio_.Write32(0xffff, MEMBW_SP[ix]);
+  }
+
+  if (channels_enabled == 0x0) {
+    // Nothing to monitor.
+    return ZX_OK;
+  }
+
+  mmio_.Write32(static_cast<uint32_t>(config.cycles_to_measure), MEMBW_TIMER);
+  mmio_.Write32(channels_enabled | DMC_QOS_ENABLE_CTRL, MEMBW_PORTS_CTRL);
+
+  bpi->timestamp = zx_clock_get_monotonic();
+
+  uint32_t value = mmio_.Read32(MEMBW_PORTS_CTRL);
+  uint32_t count = 0;
+
+  // Polling loop for the bandwith cycle counters.
+  // TODO(cpu): tune wait interval to minimize polling.
+  while (value & DMC_QOS_ENABLE_CTRL) {
+    if (count++ > 20) {
+      return ZX_ERR_TIMED_OUT;
+    }
+
+    auto status =
+        thread_control_.wait_one(kCancelSignal, zx::deadline_after(zx::msec(50)), nullptr);
+    if (status != ZX_ERR_TIMED_OUT) {
+      // Shutdown. This is handled by the caller.
+      return ZX_ERR_CANCELED;
+    }
+    value = mmio_.Read32(MEMBW_PORTS_CTRL);
+  }
+
+  bpi->channels[0].readwrite_cycles = mmio_.Read32(MEMBW_C0_GRANT_CNT) * 16ul;
+  bpi->channels[1].readwrite_cycles = mmio_.Read32(MEMBW_C1_GRANT_CNT) * 16ul;
+  bpi->channels[2].readwrite_cycles = mmio_.Read32(MEMBW_C2_GRANT_CNT) * 16ul;
+  bpi->channels[3].readwrite_cycles = mmio_.Read32(MEMBW_C3_GRANT_CNT) * 16ul;
+
+  mmio_.Write32(channels_enabled | DMC_QOS_CLEAR_CTRL, MEMBW_PORTS_CTRL);
+  return ZX_OK;
+}
+
+void AmlRam::ReadLoop() {
+  std::deque<Job> jobs;
+
+  for (;;) {
     zx_signals_t observed = 0u;
-    status = shutdown_.wait_one(ZX_EVENT_SIGNALED, zx::deadline_after(zx::sec(3)), &observed);
-    if (status == ZX_ERR_TIMED_OUT) {
-      continue;
-    } else if (status != ZX_OK) {
-      zxlogf(ERROR, "aml-ram: error in wait_one, st =%d", status);
+    auto status = thread_control_.wait_one(kCancelSignal | kWorkPendingSignal, zx::time::infinite(),
+                                           &observed);
+    if (status != ZX_OK) {
+      zxlogf(ERROR, "aml-ram: error in wait_one, st =%d\n", status);
       return;
-    } else {
-      // Normal shutdown.
-      zxlogf(TRACE, "aml-ram: loop shutdown");
+    }
+
+    ZX_ASSERT(jobs.empty());
+
+    if (observed & kCancelSignal) {
+      // Shutdown with no pending work in the local queue.
       return;
     }
+
+    {
+      fbl::AutoLock lock(&lock_);
+      if (requests_.empty()) {
+        // Done with all work. Clear pending-work signal, go back to wait.
+        thread_control_.signal(kWorkPendingSignal, 0);
+        continue;
+      } else {
+        // Some work available. Move it all to the local queue.
+        jobs = std::move(requests_);
+      }
+    }
+
+    while (!jobs.empty()) {
+      Job& job = jobs.front();
+      ram_metrics::BandwidthInfo bpi;
+      status = ReadBandwithCounters(job.config, &bpi);
+      if (status == ZX_OK) {
+        job.completer.ReplySuccess(bpi);
+      } else if (status == ZX_ERR_CANCELED) {
+        // Shutdown case with pending work in local queue. Give back the jobs
+        // to the main thread.
+        RevertJobs(&jobs);
+        return;
+      } else {
+        // Unexpected error. Better log it.
+        zxlogf(ERROR, "aml-ram: read error z %d\n", status);
+        job.completer.ReplyError(status);
+      }
+      jobs.pop_front();
+    }
   };
 }
 
+// Merge back the request jobs from the local jobs in |source| preserving
+// the order of arrival: the last job in |source| is ahead of the
+// first job in |request_|.
+void AmlRam::RevertJobs(std::deque<AmlRam::Job>* source) {
+  fbl::AutoLock lock(&lock_);
+  while (!source->empty()) {
+    requests_.push_front(std::move(source->back()));
+    source->pop_back();
+  }
+}
+
 void AmlRam::Shutdown() {
-  if (shutdown_) {
-    shutdown_.signal(0u, ZX_EVENT_SIGNALED);
+  if (thread_control_) {
+    thread_control_.signal(kWorkPendingSignal, kCancelSignal);
     thread_.join();
-    shutdown_.reset();
+    thread_control_.reset();
+    // Cancel all pending requests. There are no more threads
+    // but we still take the lock to keep lock checker happy.
+    {
+      fbl::AutoLock lock(&lock_);
+      for (auto& request : requests_) {
+        request.completer.Close(ZX_ERR_CANCELED);
+      }
+      requests_.clear();
+    }
   }
 }
 
diff --git a/src/devices/ram/drivers/aml-ram/aml-ram.h b/src/devices/ram/drivers/aml-ram/aml-ram.h
index f9a878e..8f8e101b 100644
--- a/src/devices/ram/drivers/aml-ram/aml-ram.h
+++ b/src/devices/ram/drivers/aml-ram/aml-ram.h
@@ -6,28 +6,65 @@
 #define SRC_DEVICES_RAM_DRIVERS_AML_RAM_AML_RAM_H_
 
 #include <fuchsia/device/llcpp/fidl.h>
+#include <fuchsia/hardware/ram/metrics/llcpp/fidl.h>
 #include <lib/mmio/mmio.h>
+#include <lib/zircon-internal/thread_annotations.h>
 #include <lib/zx/event.h>
 
+#include <deque>
 #include <thread>
 
 #include <ddktl/device.h>
+#include <fbl/mutex.h>
+
+namespace ram_metrics = ::llcpp::fuchsia::hardware::ram::metrics;
 
 namespace amlogic_ram {
 
 // The AmlRam device provides FIDL services directly to applications
 // to query performance counters. For example effective DDR bandwith.
+//
+// There are 4 monitoring channels and each one can agregate up to 64
+// hardware memory ports. NOTE: the word channel and port in this file
+// refer to hardware, not to zircon objects.
+constexpr size_t MEMBW_MAX_CHANNELS = 4u;
+
+// Controls start,stop and if polling or interrupt mode.
+constexpr uint32_t MEMBW_PORTS_CTRL = (0x0020 << 2);
+constexpr uint32_t DMC_QOS_ENABLE_CTRL = (0x01 << 31);
+constexpr uint32_t DMC_QOS_CLEAR_CTRL = (0x01 << 30);
+
+// Returns the granted cycles per channel.
+constexpr uint32_t MEMBW_C0_GRANT_CNT = (0x2b << 2);
+constexpr uint32_t MEMBW_C1_GRANT_CNT = (0x2c << 2);
+constexpr uint32_t MEMBW_C2_GRANT_CNT = (0x2d << 2);
+constexpr uint32_t MEMBW_C3_GRANT_CNT = (0x2e << 2);
+
+// Controls how long to measure cycles for.
+constexpr uint32_t MEMBW_TIMER = (0x002f << 2);
+
+// Controls which ports are assigned to each channel.
+constexpr uint32_t MEMBW_RP[MEMBW_MAX_CHANNELS] = {(0x0021 << 2), (0x0023 << 2), (0x0025 << 2),
+                                                   (0x0027 << 2)};
+
+// Controls wich subports are assinged to each channel.
+constexpr uint32_t MEMBW_SP[MEMBW_MAX_CHANNELS] = {(0x0022 << 2), (0x0024 << 2), (0x0026 << 2),
+                                                   (0x0028 << 2)};
+
+constexpr uint64_t kMinimumCycleCount = 1024 * 512;
+constexpr uint64_t kMaximumCycleCount = 1024 * 1024 * 256;
 
 class AmlRam;
 using DeviceType = ddk::Device<AmlRam, ddk::Suspendable, ddk::Messageable>;
 
-class AmlRam : public DeviceType {
+class AmlRam : public DeviceType, private ram_metrics::Device::Interface {
  public:
   DISALLOW_COPY_AND_ASSIGN_ALLOW_MOVE(AmlRam);
 
   static zx_status_t Create(void* context, zx_device_t* parent);
 
   explicit AmlRam(zx_device_t* parent, ddk::MmioBuffer mmio);
+  ~AmlRam();
   void DdkRelease();
   void DdkSuspend(ddk::SuspendTxn txn);
 
@@ -35,13 +72,31 @@
   zx_status_t DdkMessage(fidl_msg_t* msg, fidl_txn_t* txn);
 
  private:
+  struct Job {
+    ram_metrics::BandwidthMeasurementConfig config;
+    MeasureBandwidthCompleter::Async completer;
+    Job() = delete;
+    Job(ram_metrics::BandwidthMeasurementConfig config, MeasureBandwidthCompleter::Async completer)
+        : config(std::move(config)), completer(std::move(completer)) {}
+  };
+
+  // Implementation of ram_metrics::Device::Interface FIDL service.
+  void MeasureBandwidth(ram_metrics::BandwidthMeasurementConfig config,
+                        MeasureBandwidthCompleter::Sync completer) override;
+
+  zx_status_t ReadBandwithCounters(const ram_metrics::BandwidthMeasurementConfig& config,
+                                   ram_metrics::BandwidthInfo* bpi);
+
   zx_status_t Bind();
   void ReadLoop();
+  void RevertJobs(std::deque<AmlRam::Job>* source);
   void Shutdown();
 
   ddk::MmioBuffer mmio_;
   std::thread thread_;
-  zx::event shutdown_;
+  zx::event thread_control_;
+  fbl::Mutex lock_;
+  std::deque<Job> requests_ TA_GUARDED(lock_);
 };
 
 }  // namespace amlogic_ram