blob: d24ba792792c19d345120db987ddea6b8ba281e8 [file] [log] [blame]
// Copyright 2020 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "memory_stress.h"
#include <endian.h>
#include <fuchsia/kernel/cpp/fidl.h>
#include <lib/zx/clock.h>
#include <lib/zx/time.h>
#include <stdio.h>
#include <zircon/assert.h>
#include <zircon/status.h>
#include <zircon/syscalls.h>
#include <numeric>
#include <random>
#include <string>
#include <thread>
#include <fbl/span.h>
#include "compiler.h"
#include "memory_patterns.h"
#include "memory_range.h"
#include "memory_stats.h"
#include "profile_manager.h"
#include "src/lib/fxl/strings/string_printf.h"
#include "status.h"
#include "temperature_sensor.h"
#include "util.h"
namespace hwstress {
namespace {
// Create a std::default_random_engine PRNG pre-seeded with a small truly random value.
std::default_random_engine CreateRandomEngine() {
std::random_device rng;
return std::default_random_engine(rng());
}
// Writes a pattern to memory; verifies it is still the same, and writes out the
// complement; and finally verify the complement has been correctly written out.
template <typename PatternGenerator>
void TestPatternAndComplement(MemoryRange* memory, PatternGenerator pattern) {
PatternGenerator original_pattern = pattern;
// Write out the pattern.
WritePattern(memory->span(), pattern);
memory->CleanInvalidateCache();
// Verify the pattern, flipping each word as we progress.
//
// We perform a read/verify/write on each word at a time (instead of
// a VerifyPattern/WritePattern pair) to minimise the time between
// verifying the old value and writing the next test pattern.
{
uint64_t* start = memory->words();
size_t words = memory->size_words();
for (size_t i = 0; i < words; i++) {
uint64_t p = pattern();
if (unlikely(start[i] != p)) {
ZX_PANIC("Found memory error: expected 0x%16lx, got 0x%16lx at offset %ld.\n", p, start[i],
i);
}
start[i] = ~p;
}
memory->CleanInvalidateCache();
}
// Verify the pattern again.
VerifyPattern(memory->span(), InvertPattern(original_pattern));
}
// Make a |MemoryWorkload| consisting of writing a pattern to RAM
// and reading it again.
template <typename PatternGenerator>
MemoryWorkload MakePatternWorkload(std::string_view name, PatternGenerator pattern) {
MemoryWorkload result;
result.name = std::string(name);
result.exec = [pattern](StatusLine* /*unused*/, zx::duration /*unused*/, MemoryRange* memory) {
// Write and verify the pattern followed by its negation.
TestPatternAndComplement(memory, pattern);
};
result.memory_type = CacheMode::kCached;
return result;
}
// Repeatedly open/close individual rows on a RAM bank to try and trigger bit errors.
//
// See https://en.wikipedia.org/wiki/Row_hammer for background.
void RowHammer(StatusLine* status, MemoryRange* memory, zx::duration duration, uint64_t pattern) {
constexpr int kAddressesPerIteration = 4;
constexpr int kReadsPerIteration = 1'000'000;
// Set all memory to the desired pattern.
WritePattern(memory->span(), SimplePattern(pattern));
// Get random numbers returning a random page.
uint32_t num_pages = memory->size_bytes() / ZX_PAGE_SIZE;
std::default_random_engine rng = CreateRandomEngine();
std::uniform_int_distribution<uint32_t> random_page(0, num_pages - 1);
// Perform several iterations on different addresses before spending time
// verifying memory.
status->Verbose("Performing RowHammer for %0.2fs with pattern %016lx...",
DurationToSecs(duration), pattern);
zx::time start = zx::clock::get_monotonic();
uint64_t iterations = 0;
while (zx::clock::get_monotonic() - start < duration) {
iterations++;
// Select addresses to hammer.
//
// Our goal is to force the DRAM to open and close a single row many
// times between a refresh cycle. We can do this by reading two
// different rows on the same bank of RAM in quick succession.
//
// Because we don't know the layout of RAM, we select N random
// pages, and read them quickly in succession. There is a good
// chance we'll get lucky and end up with at least two rows in the
// same bank.
volatile uint32_t* targets[kAddressesPerIteration];
uint8_t* data = memory->bytes();
for (auto& target : targets) {
target = reinterpret_cast<volatile uint32_t*>(data + random_page(rng) * ZX_PAGE_SIZE);
}
// Quickly activate the different rows.
UNROLL_LOOP_4
for (uint32_t j = 0; j < kReadsPerIteration; j++) {
UNROLL_LOOP
for (volatile uint32_t* target : targets) {
ForceEval(*target);
}
};
}
zx::time end = zx::clock::get_monotonic();
double seconds_per_iteration = DurationToSecs(end - start) / iterations;
status->Verbose("Done. Time per iteration = %0.2fs, row activations per 64ms refresh ~= %0.0f",
seconds_per_iteration,
(kReadsPerIteration / seconds_per_iteration) * (64. / 1000.));
// Ensure memory is still as expected.
VerifyPattern(memory->span(), SimplePattern(pattern));
}
MemoryWorkload MakeRowHammerWorkload(std::string_view name, uint64_t pattern) {
return MemoryWorkload{
.name = std::string(name),
// Execute the main RowHammer function.
.exec =
[pattern](StatusLine* status, zx::duration max_duration, MemoryRange* memory) {
RowHammer(status, memory, /*duration=*/std::min(max_duration, zx::sec(30)), pattern);
},
// Need to use uncached memory to ensure that each access hits main memory.
.memory_type = CacheMode::kUncached,
// We do not run in time proportional to the memory size, so don't
// attempt to report throughput numbers.
.report_throughput = false,
};
}
} // namespace
template <typename PatternGenerator>
void VerifyPatternOrDie(fbl::Span<uint8_t> range, PatternGenerator pattern) {
std::optional<std::string> result = VerifyPattern(range, pattern);
if (unlikely(result.has_value())) {
ZX_PANIC("Detected memory error: %s\n", result->c_str());
}
}
std::vector<MemoryWorkload> GenerateMemoryWorkloads() {
std::vector<MemoryWorkload> result;
// Simple patterns.
struct BitPattern {
std::string_view name;
uint64_t pattern;
};
for (const auto& pattern : std::initializer_list<BitPattern>{
{"All 0 bits", 0x0000'0000'0000'0000},
{"All 1 bits", 0xffff'ffff'ffff'fffful},
{"Alternating bits (1/2)", 0x5555'5555'5555'5555},
{"Alternating bits (2/2)", 0xaaaa'aaaa'aaaa'aaaa},
{"2 bits on / 2 bits off (1/2)", 0x3333'3333'3333'3333ul},
{"2 bits on / 2 bits off (2/2)", 0xcccc'cccc'cccc'ccccul},
{"4 bits on / 4 bits off (1/2)", 0xf0f0'f0f0'f0f0'f0f0ul},
{"4 bits on / 4 bits off (2/2)", 0x0f0f'0f0f'0f0f'0f0ful},
}) {
result.push_back(MakePatternWorkload(pattern.name, SimplePattern(pattern.pattern)));
}
// 1 in 6 bits set.
//
// Having every 6'th bit set results in rows of RAM not having bits to
// the north/south/east/west set, assuming that the rows are
// a power-of-two size.
auto every_sixth_bit = std::initializer_list<uint64_t>{
0b1000001000001000001000001000001000001000001000001000001000001000,
0b0010000010000010000010000010000010000010000010000010000010000010,
0b0000100000100000100000100000100000100000100000100000100000100000,
};
for (int i = 0; i < 6; i++) {
result.push_back(MakePatternWorkload(fxl::StringPrintf("Single bit set 6-bit (%d/6)", i + 1),
MultiWordPattern(RotatePattern(every_sixth_bit, i))));
}
for (int i = 0; i < 6; i++) {
result.push_back(
MakePatternWorkload(fxl::StringPrintf("Single bit clear 6-bit (%d/6)", i + 1),
MultiWordPattern(NegateWords((RotatePattern(every_sixth_bit, i))))));
}
// Random bits.
constexpr int kRandomBitIterations = 10;
for (int i = 0; i < kRandomBitIterations; i++) {
result.push_back(MakePatternWorkload(
fxl::StringPrintf("Random bits (%d/%d)", i + 1, kRandomBitIterations), RandomPattern()));
}
// Row hammer workloads.
result.push_back(MakeRowHammerWorkload("Row hammer, all bits clear", 0x0000'0000'0000'0000ul));
result.push_back(MakeRowHammerWorkload("Row hammer, all bits set", 0xffff'ffff'ffff'fffful));
result.push_back(
MakeRowHammerWorkload("Row hammer, alternating bits (1/2)", 0xaaaa'aaaa'aaaa'aaaaul));
result.push_back(
MakeRowHammerWorkload("Row hammer, alternating bits (1/2)", 0x5555'5555'5555'5555ul));
return result;
}
// Ensure that |result| contains at least |size| bytes of memory, mapped in as mode |mode|.
//
// Will deallocate and reallocate memory as required to achieve this.
zx::status<> ReallocateMemoryAsRequired(CacheMode mode, size_t size,
std::unique_ptr<MemoryRange>* result) {
// If we are already allocated and have the right cache mode, nothing to do.
if (*result != nullptr && result->get()->cache() == mode && result->get()->size_bytes() >= size) {
return zx::ok();
}
// Otherwise, allocate new memory.
result->reset();
zx::status<std::unique_ptr<MemoryRange>> maybe_memory = MemoryRange::Create(size, mode);
if (maybe_memory.is_error()) {
return zx::error(maybe_memory.status_value());
}
*result = std::move(maybe_memory).value();
return zx::ok();
}
fitx::result<std::string, size_t> GetMemoryToTest(const CommandLineArgs& args) {
// Get amount of RAM and free memory in system.
zx::status<fuchsia::kernel::MemoryStats> maybe_stats = GetMemoryStats();
if (maybe_stats.is_error()) {
return fitx::error("Could not fetch free memory.");
}
// If a value was specified, and doesn't exceed total system RAM, use that.
if (args.mem_to_test_megabytes.has_value()) {
size_t requested = MiB(args.mem_to_test_megabytes.value());
if (requested > maybe_stats->total_bytes()) {
return fitx::error(fxl::StringPrintf(
"Specified memory size (%ld bytes) exceeds system memory size (%ld bytes).", requested,
maybe_stats->total_bytes()));
}
return fitx::ok(requested);
}
// If user asked for a percent of total memory, calculate that.
if (args.ram_to_test_percent.has_value()) {
uint64_t total_bytes = maybe_stats->total_bytes();
auto test_bytes =
static_cast<uint64_t>(total_bytes * (args.ram_to_test_percent.value() / 100.));
return fitx::ok(RoundUp(test_bytes, ZX_PAGE_SIZE));
}
// Otherwise, try and calculate a reasonable value based on free memory.
//
// The default memory stress values for Fuchsia are:
// - 300MiB free => Warning
// - 150MiB free => Critical
// - 50MiB free => OOM
//
// We aim to hit just below the warning threshold.
uint64_t free_bytes = maybe_stats->free_bytes();
uint64_t slack = MiB(301);
if (free_bytes < slack + MiB(1)) {
// We don't have 300MiB free: just use 1MiB.
return zx::ok(MiB(1));
}
return zx::ok(RoundUp(free_bytes - slack, ZX_PAGE_SIZE));
}
MemoryWorkloadGenerator ::MemoryWorkloadGenerator(const std::vector<MemoryWorkload>& workloads,
uint32_t num_cpus)
: num_cpus_(num_cpus) {
ZX_ASSERT(!workloads.empty());
// Copy workloads into workloads_, converting into
// a std::optional<MemoryWorkload> in the process.
workloads_.reserve(workloads.size());
for (const MemoryWorkload& workload : workloads) {
workloads_.emplace_back(workload);
}
// We want to iterate through different workloads and different CPUs. One
// method would be to test 1 CPU through all workloads, or 1 workload
// through all CPUs. Neither is great: ideally, we would like to get
// good coverage of both CPUs and workloads relatively quickly.
//
// To try and quickly maximise coverage, we instead iterate through both
// simultaneously. If we have:
//
// gcd(num_cpus, num_workloads) == 1
//
// then the Chinese Remainder Theorem [1] ensures that after num_cpus
// * num_workloads iterations, we will have covered every combination of
// num_cpus * num_workloads.
//
// To ensure that gcd(num_cpus, num_workloads) == 1, we keep adding a number
// of dummy "null" workloads until this criteria is met.
//
// [1] https://en.wikipedia.org/wiki/Chinese_remainder_theorem
while (std::gcd(num_cpus_, workloads_.size()) != 1) {
workloads_.push_back(std::nullopt);
}
}
MemoryWorkloadGenerator::Workload MemoryWorkloadGenerator::Next() {
// Increment |n|, skipping over null workloads.
do {
n_++;
} while (!workloads_[n_ % workloads_.size()].has_value());
return Workload{
.cpu = static_cast<uint32_t>(n_ % num_cpus_),
.workload = workloads_[n_ % workloads_.size()].value(),
};
}
bool StressMemory(StatusLine* status, const CommandLineArgs& args, zx::duration duration,
TemperatureSensor* /*temperature_sensor*/) {
// Parse the amount of memory to test.
fitx::result<std::string, size_t> bytes_to_test = GetMemoryToTest(args);
if (bytes_to_test.is_error()) {
status->Log(bytes_to_test.error_value());
return false;
}
status->Log("Testing %0.2fMiB of memory.", bytes_to_test.value() / static_cast<double>(MiB(1)));
// Create a profile manager.
std::unique_ptr<ProfileManager> profile_manager = ProfileManager::CreateFromEnvironment();
if (profile_manager == nullptr) {
status->Log("Error: could not create profile manager.");
return false;
}
// Get workloads.
std::vector<MemoryWorkload> workloads = GenerateMemoryWorkloads();
MemoryWorkloadGenerator workload_generator{workloads, zx_system_get_num_cpus()};
// Keep looping over the memory tests until we run out of time.
std::unique_ptr<MemoryRange> memory;
uint64_t num_tests = 1;
zx::time end_time = zx::deadline_after(duration);
while (zx::clock::get_monotonic() < end_time) {
MemoryWorkloadGenerator::Workload next = workload_generator.Next();
// Allocate memory for tests.
zx::status<> result =
ReallocateMemoryAsRequired(next.workload.memory_type, bytes_to_test.value(), &memory);
if (result.is_error()) {
status->Log("Failed to reallocate memory: %s", result.status_string());
return false;
}
// Log start of test.
status->Set("Test %4ld: CPU %2d : %s", num_tests, next.cpu, next.workload.name.c_str());
// Switch execution to the correct CPU.
profile_manager->SetThreadAffinity(*zx::thread::self(), 1u << next.cpu);
// Execute the workload.
zx::time test_start = zx::clock::get_monotonic();
next.workload.exec(status, /*max_duration=*/(end_time - zx::clock::get_monotonic()),
memory.get());
zx::duration test_duration = zx::clock::get_monotonic() - test_start;
// Calculate test time and throughput.
std::string throughput;
if (next.workload.report_throughput) {
throughput =
fxl::StringPrintf(", throughput: %0.2f MiB/s",
memory->size_bytes() / DurationToSecs(test_duration) / 1024. / 1024.);
}
status->Log("Test %4ld: CPU %2d : %s: %0.3fs%s", num_tests, next.cpu,
next.workload.name.c_str(), DurationToSecs(test_duration), throughput.c_str());
num_tests++;
}
return true;
}
} // namespace hwstress