| // Copyright 2025 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include <err.h> |
| #include <stdint.h> |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include <sys/ioctl.h> |
| #include <sys/mman.h> |
| #include <sys/syscall.h> |
| #include <sys/types.h> |
| #include <time.h> |
| #include <unistd.h> |
| |
| #include <gtest/gtest.h> |
| #include <linux/perf_event.h> |
| |
| #include "src/starnix/tests/syscalls/cpp/syscall_matchers.h" |
| #include "test_helper.h" |
| |
| namespace { |
| |
| // From https://man7.org/linux/man-pages/man2/perf_event_open.2.html |
| struct read_format_data { |
| uint64_t value; /* The value of the event */ |
| uint64_t time_enabled; /* if PERF_FORMAT_TOTAL_TIME_ENABLED */ |
| uint64_t time_running; /* if PERF_FORMAT_TOTAL_TIME_RUNNING */ |
| uint64_t id; /* if PERF_FORMAT_ID */ |
| uint64_t lost; /* if PERF_FORMAT_LOST */ |
| }; |
| |
| // We use this struct when we only request `time_running` in the `read_format`. |
| // Because the struct is order-dependent we don't want the `time_running` param to |
| // get written to `time_enabled`. |
| struct read_format_data_time_running { |
| uint64_t value; |
| uint64_t time_running; |
| }; |
| |
| struct read_format_data_id { |
| uint64_t value; |
| uint64_t id; |
| }; |
| |
| // Valid example inputs to use for tests when we aren't testing these values |
| // but still need to pass them in. |
| // TODO(https://fxbug.dev/409621963): implement permissions logic for any pid > 0. |
| const int32_t example_pid = 0; |
| const int32_t example_cpu = -1; // Keep this as -1 for now so that it includes events on ANY CPU. |
| // TODO(https://fxbug.dev/409619971): handle cases other than -1. |
| const int example_group_fd = -1; |
| const long example_flags = 0; |
| perf_event_attr example_attr = {0, 0, 0, {}, 0, 0, 0}; |
| const int32_t from_nanos = 1000000000; |
| const int sample_duration = 100000; // 100 ms |
| const int poll_duration = 250000; // 250 ms |
| const int read_retries = 5; |
| |
| // Returns an example perf_event_attr where none of the values matter |
| // except for the read_format, which is passed in. |
| perf_event_attr attr_with_read_format(uint64_t read_format) { |
| return {PERF_TYPE_SOFTWARE, PERF_ATTR_SIZE_VER1, PERF_COUNT_SW_CPU_CLOCK, {}, 0, read_format, 0}; |
| } |
| |
| perf_event_attr attr_with_read_format(uint64_t read_format, uint64_t disabled) { |
| return {PERF_TYPE_SOFTWARE, |
| PERF_ATTR_SIZE_VER1, |
| PERF_COUNT_SW_CPU_CLOCK, |
| {}, |
| 0, |
| read_format, |
| disabled}; |
| } |
| |
| // Write wrapper because there isn't one per the man7 page. |
| int32_t sys_perf_event_open(perf_event_attr* attr, int32_t pid, int32_t cpu, int group_fd, |
| unsigned long flags) { |
| // Explicitly cast to int32_t because perf_event_open() returns `int`. Also, `FdNumber` is i32. |
| return static_cast<int32_t>(syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags)); |
| } |
| |
| TEST(PerfEventOpenTest, ValidInputsSucceed) { |
| // The file descriptor value that is returned is not guaranteed to be a specific number. |
| // Just check that's not -1 (error). |
| // TODO(https://fxbug.dev/394960158): Change this test when we have something better |
| // to test. |
| if (test_helper::HasSysAdmin()) { |
| int32_t file_descriptor = sys_perf_event_open(&example_attr, example_pid, example_cpu, |
| example_group_fd, example_flags); |
| EXPECT_NE(file_descriptor, -1); |
| EXPECT_NE(syscall(__NR_close, file_descriptor), EXIT_FAILURE); |
| } |
| } |
| |
| TEST(PerfEventOpenTest, InvalidPidAndCpuFails) { |
| int32_t pid = -1; // Invalid |
| int32_t cpu = -1; // Invalid |
| |
| if (test_helper::HasSysAdmin()) { |
| int32_t file_descriptor = |
| sys_perf_event_open(&example_attr, pid, cpu, example_group_fd, example_flags); |
| EXPECT_THAT(file_descriptor, SyscallFailsWithErrno(EINVAL)); |
| EXPECT_NE(syscall(__NR_close, file_descriptor), EXIT_FAILURE); |
| } |
| } |
| |
| TEST(PerfEventOpenTest, ReadEventWithTimeEnabledSucceeds) { |
| uint64_t read_format = PERF_FORMAT_TOTAL_TIME_ENABLED; |
| perf_event_attr attr = attr_with_read_format(read_format); |
| |
| if (test_helper::HasSysAdmin()) { |
| int32_t file_descriptor = |
| sys_perf_event_open(&attr, example_pid, example_cpu, example_group_fd, example_flags); |
| EXPECT_NE(file_descriptor, -1); |
| |
| // read() on the file descriptor should return the number of bytes written to buffer, |
| // and the buffer should contain read_format_data information for that event. |
| char buffer[16]; |
| uint64_t read_length = syscall(__NR_read, file_descriptor, buffer, sizeof(buffer)); |
| EXPECT_EQ(read_length, sizeof(buffer)); |
| |
| read_format_data data; |
| std::memcpy(&data, buffer, sizeof(buffer)); |
| |
| // Check that the time_enabled param in secs is smaller than the current time. |
| uint64_t time_enabled_secs = data.time_enabled / from_nanos; |
| timespec current_time; |
| clock_gettime(CLOCK_MONOTONIC, ¤t_time); |
| uint64_t current_time_secs = current_time.tv_sec; |
| |
| EXPECT_LE(time_enabled_secs, current_time_secs); |
| EXPECT_NE(syscall(__NR_close, file_descriptor), EXIT_FAILURE); |
| } |
| } |
| |
| TEST(PerfEventOpenTest, ReadEventWithTimeRunningSucceeds) { |
| uint64_t read_format = PERF_FORMAT_TOTAL_TIME_RUNNING; |
| perf_event_attr attr = attr_with_read_format(read_format); |
| |
| if (test_helper::HasSysAdmin()) { |
| int32_t file_descriptor = |
| sys_perf_event_open(&attr, example_pid, example_cpu, example_group_fd, example_flags); |
| EXPECT_NE(file_descriptor, -1); |
| |
| // read() on the file descriptor should return the number of bytes written to buffer, |
| // and the buffer should contain read_format_data information for that event. |
| char buffer[16]; |
| uint64_t read_length = syscall(__NR_read, file_descriptor, buffer, sizeof(buffer)); |
| EXPECT_EQ(read_length, sizeof(buffer)); |
| |
| read_format_data_time_running data; |
| std::memcpy(&data, buffer, sizeof(buffer)); |
| |
| uint64_t time_running_secs = data.time_running / from_nanos; |
| timespec current_time; |
| clock_gettime(CLOCK_MONOTONIC, ¤t_time); |
| uint64_t current_time_secs = current_time.tv_sec; |
| |
| EXPECT_LE(time_running_secs, current_time_secs); |
| EXPECT_NE(syscall(__NR_close, file_descriptor), EXIT_FAILURE); |
| } |
| } |
| |
| TEST(PerfEventOpenTest, ReadEventWithPerfFormatIdSucceeds) { |
| if (test_helper::HasSysAdmin()) { |
| uint64_t read_format = PERF_FORMAT_ID; |
| perf_event_attr attr = attr_with_read_format(read_format); |
| uint64_t ids[3]; |
| |
| for (uint64_t i = 0; i < 3; i++) { |
| int32_t file_descriptor = |
| sys_perf_event_open(&attr, example_pid, example_cpu, example_group_fd, example_flags); |
| EXPECT_NE(file_descriptor, -1); |
| |
| char buffer[16]; |
| read_format_data_id data; |
| syscall(__NR_read, file_descriptor, buffer, sizeof(buffer)); |
| std::memcpy(&data, buffer, sizeof(buffer)); |
| |
| ids[i] = data.id; |
| EXPECT_NE(syscall(__NR_close, file_descriptor), EXIT_FAILURE); |
| } |
| |
| EXPECT_LT(ids[0], ids[1]); |
| EXPECT_LT(ids[1], ids[2]); |
| } |
| } |
| |
| TEST(PerfEventOpenTest, ReadEventWithTimeEnabledAndRunningSucceeds) { |
| uint64_t read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING; |
| perf_event_attr attr = attr_with_read_format(read_format); |
| |
| if (test_helper::HasSysAdmin()) { |
| int32_t file_descriptor = |
| sys_perf_event_open(&attr, example_pid, example_cpu, example_group_fd, example_flags); |
| EXPECT_NE(file_descriptor, -1); |
| |
| char buffer[24]; |
| uint64_t read_length = syscall(__NR_read, file_descriptor, buffer, sizeof(buffer)); |
| EXPECT_EQ(read_length, sizeof(buffer)); |
| |
| read_format_data data; |
| std::memcpy(&data, buffer, sizeof(buffer)); |
| |
| // Check that the params are smaller than the current time. |
| uint64_t time_enabled_secs = data.time_enabled / from_nanos; |
| uint64_t time_running_secs = data.time_running / from_nanos; |
| timespec current_time; |
| clock_gettime(CLOCK_MONOTONIC, ¤t_time); |
| uint64_t current_time_secs = current_time.tv_sec; |
| |
| EXPECT_EQ(time_enabled_secs, time_running_secs); |
| EXPECT_LE(time_enabled_secs, current_time_secs); |
| EXPECT_LE(time_running_secs, current_time_secs); |
| EXPECT_NE(syscall(__NR_close, file_descriptor), EXIT_FAILURE); |
| } |
| } |
| |
| TEST(PerfEventOpenTest, ReadEventWithBufferTooSmallFails) { |
| if (test_helper::HasSysAdmin()) { |
| int32_t file_descriptor = sys_perf_event_open(&example_attr, example_pid, example_cpu, |
| example_group_fd, example_flags); |
| EXPECT_NE(file_descriptor, -1); |
| |
| // Create buffer that is too small for the read() call to put stuff in. read() should |
| // return ENOSPC. |
| char buffer[7]; |
| EXPECT_THAT(syscall(__NR_read, file_descriptor, buffer, sizeof(buffer)), |
| SyscallFailsWithErrno(ENOSPC)); |
| EXPECT_NE(syscall(__NR_close, file_descriptor), EXIT_FAILURE); |
| } |
| } |
| |
| // When disabled is passed in the initial attr params. |
| TEST(PerfEventOpenTest, WhenDisabledEventCountShouldBeZero) { |
| if (test_helper::HasSysAdmin()) { |
| perf_event_attr attr = {0, 0, 0, {}, 0, 0, 1}; |
| |
| int32_t file_descriptor = |
| sys_perf_event_open(&attr, example_pid, example_cpu, example_group_fd, example_flags); |
| |
| char buffer[8]; |
| uint64_t read_length = syscall(__NR_read, file_descriptor, buffer, sizeof(buffer)); |
| EXPECT_EQ(read_length, sizeof(buffer)); |
| |
| read_format_data data; |
| std::memcpy(&data, buffer, sizeof(buffer)); |
| |
| unsigned long count = 0; |
| EXPECT_EQ(data.value, count); |
| EXPECT_NE(syscall(__NR_close, file_descriptor), EXIT_FAILURE); |
| } |
| } |
| |
| // When enabled is passed in the initial attr params. |
| TEST(PerfEventOpenTest, WhenEnabledEventCountShouldBeOne) { |
| if (test_helper::HasSysAdmin()) { |
| int32_t file_descriptor = sys_perf_event_open(&example_attr, example_pid, example_cpu, |
| example_group_fd, example_flags); |
| |
| char buffer[8]; |
| uint64_t read_length = syscall(__NR_read, file_descriptor, buffer, sizeof(buffer)); |
| EXPECT_EQ(read_length, sizeof(buffer)); |
| |
| read_format_data data; |
| std::memcpy(&data, buffer, sizeof(buffer)); |
| |
| EXPECT_GT(data.value, (uint64_t)0); |
| EXPECT_NE(syscall(__NR_close, file_descriptor), EXIT_FAILURE); |
| } |
| } |
| |
| TEST(PerfEventOpenTest, SettingIoctlDisabledCallWorks) { |
| if (test_helper::HasSysAdmin()) { |
| int32_t file_descriptor = sys_perf_event_open(&example_attr, example_pid, example_cpu, |
| example_group_fd, example_flags); |
| |
| EXPECT_NE(syscall(__NR_ioctl, file_descriptor, PERF_EVENT_IOC_DISABLE), -1); |
| EXPECT_NE(syscall(__NR_close, file_descriptor), EXIT_FAILURE); |
| } |
| } |
| |
| TEST(PerfEventOpenTest, SettingIoctlEnabledCallWorks) { |
| if (test_helper::HasSysAdmin()) { |
| int32_t file_descriptor = sys_perf_event_open(&example_attr, example_pid, example_cpu, |
| example_group_fd, example_flags); |
| EXPECT_NE(syscall(__NR_ioctl, file_descriptor, PERF_EVENT_IOC_ENABLE), -1); |
| EXPECT_NE(syscall(__NR_close, file_descriptor), EXIT_FAILURE); |
| } |
| } |
| |
| TEST(PerfEventOpenTest, WhenResetAndDisabledEventCountShouldBeZero) { |
| if (test_helper::HasSysAdmin()) { |
| int32_t file_descriptor = sys_perf_event_open(&example_attr, example_pid, example_cpu, |
| example_group_fd, example_flags); |
| |
| char buffer[8]; |
| syscall(__NR_read, file_descriptor, buffer, |
| sizeof(buffer)); // Read once: the test rf_value = 1 |
| uint64_t read_length = |
| syscall(__NR_read, file_descriptor, buffer, sizeof(buffer)); // Read twice: rf_value = 2 |
| EXPECT_EQ(read_length, sizeof(buffer)); |
| |
| read_format_data data; |
| std::memcpy(&data, buffer, sizeof(buffer)); |
| |
| // Host test will return a real value, which is bigger. |
| unsigned long count = 2; |
| EXPECT_GE(data.value, count); |
| |
| // Disable and reset. Count value should now be 0 and stay there. |
| EXPECT_NE(syscall(__NR_ioctl, file_descriptor, PERF_EVENT_IOC_DISABLE), -1); |
| EXPECT_NE(syscall(__NR_ioctl, file_descriptor, PERF_EVENT_IOC_RESET), -1); |
| count = 0; |
| |
| read_length = syscall(__NR_read, file_descriptor, buffer, sizeof(buffer)); |
| EXPECT_EQ(read_length, sizeof(buffer)); |
| |
| std::memcpy(&data, buffer, sizeof(buffer)); |
| |
| EXPECT_EQ(data.value, count); |
| EXPECT_NE(syscall(__NR_close, file_descriptor), EXIT_FAILURE); |
| } |
| } |
| |
| // Example: |
| // - Start perf_event_open with disabled = 0 (enabled) |
| // - Do an event |
| // - Call IOC_DISABLE |
| // - Do a read() which will return a time_running (for that segment) |
| // - Do an event |
| // - Do a read() which will return the same time_running (because segment didn't change) |
| TEST(PerfEventOpenTest, WhenDisabledTimeRunningAndTimeEnabledAreCorrect) { |
| uint64_t read_format = PERF_FORMAT_TOTAL_TIME_RUNNING | PERF_FORMAT_TOTAL_TIME_ENABLED; |
| perf_event_attr attr = attr_with_read_format(read_format); |
| |
| if (test_helper::HasSysAdmin()) { |
| int32_t file_descriptor = |
| sys_perf_event_open(&attr, example_pid, example_cpu, example_group_fd, example_flags); |
| |
| char buffer[24]; |
| read_format_data data; |
| |
| printf("This is an event\n"); |
| EXPECT_NE(syscall(__NR_ioctl, file_descriptor, PERF_EVENT_IOC_DISABLE), -1); |
| syscall(__NR_read, file_descriptor, buffer, sizeof(buffer)); |
| std::memcpy(&data, buffer, sizeof(buffer)); |
| |
| uint64_t time_running = data.time_running; |
| uint64_t time_enabled = data.time_enabled; |
| |
| printf("This is an event\n"); |
| syscall(__NR_read, file_descriptor, buffer, sizeof(buffer)); |
| std::memcpy(&data, buffer, sizeof(buffer)); |
| |
| uint64_t time_running_after_disable = data.time_running; |
| uint64_t time_enabled_after_disable = data.time_enabled; |
| |
| EXPECT_EQ(time_running, time_running_after_disable); |
| EXPECT_EQ(time_enabled, time_enabled_after_disable); |
| EXPECT_NE(syscall(__NR_close, file_descriptor), EXIT_FAILURE); |
| } |
| } |
| |
| // Example: |
| // - Start perf_event_open with disabled = 0 (enabled) |
| // - Do an event |
| // - Do a read() which will return a time_running |
| // - Do an event |
| // - Do a read() which will return a larger time_running |
| TEST(PerfEventOpenTest, WhenEnabledTimeRunningIsCorrect) { |
| uint64_t read_format = PERF_FORMAT_TOTAL_TIME_RUNNING; |
| perf_event_attr attr = attr_with_read_format(read_format); |
| |
| if (test_helper::HasSysAdmin()) { |
| int32_t file_descriptor = |
| sys_perf_event_open(&attr, example_pid, example_cpu, example_group_fd, example_flags); |
| char buffer[16]; |
| read_format_data_time_running data; |
| |
| EXPECT_NE(syscall(__NR_ioctl, file_descriptor, PERF_EVENT_IOC_ENABLE), -1); |
| printf("This is an event\n"); |
| syscall(__NR_read, file_descriptor, buffer, sizeof(buffer)); |
| std::memcpy(&data, buffer, sizeof(buffer)); |
| uint64_t time_running = data.time_running; |
| |
| // Check that time later is bigger. |
| printf("This is an event\n"); |
| syscall(__NR_read, file_descriptor, buffer, sizeof(buffer)); |
| std::memcpy(&data, buffer, sizeof(buffer)); |
| uint64_t time_running_later = data.time_running; |
| |
| // This fails for the host test because time_running is always 0. |
| // TODO(https://fxbug.dev/413146816): figure out what the real time_running is. |
| EXPECT_LT(time_running, time_running_later); |
| EXPECT_NE(syscall(__NR_close, file_descriptor), EXIT_FAILURE); |
| } |
| } |
| |
| // Example: |
| // - Start perf_event_open with disabled = 0 (enabled) |
| // - Do an event |
| // - Do a read() which will return a time_running |
| // - Do an event |
| // - Do a read() which will return a larger time_running |
| TEST(PerfEventOpenTest, WhenEnabledTimeEnabledIsCorrect) { |
| uint64_t read_format = PERF_FORMAT_TOTAL_TIME_ENABLED; |
| perf_event_attr attr = attr_with_read_format(read_format); |
| |
| if (test_helper::HasSysAdmin()) { |
| int32_t file_descriptor = |
| sys_perf_event_open(&attr, example_pid, example_cpu, example_group_fd, example_flags); |
| char buffer[16]; |
| read_format_data data; |
| |
| EXPECT_NE(syscall(__NR_ioctl, file_descriptor, PERF_EVENT_IOC_ENABLE), -1); |
| printf("This is an event\n"); |
| syscall(__NR_read, file_descriptor, buffer, sizeof(buffer)); |
| std::memcpy(&data, buffer, sizeof(buffer)); |
| uint64_t time_enabled = data.time_enabled; |
| |
| // Check that time later is bigger. |
| printf("This is an event\n"); |
| syscall(__NR_read, file_descriptor, buffer, sizeof(buffer)); |
| std::memcpy(&data, buffer, sizeof(buffer)); |
| uint64_t time_enabled_later = data.time_enabled; |
| |
| EXPECT_LT(time_enabled, time_enabled_later); |
| EXPECT_NE(syscall(__NR_close, file_descriptor), EXIT_FAILURE); |
| } |
| } |
| |
| // Example: |
| // - Start perf_event_open with disabled = 1 (disabled) |
| // - Do an event |
| // - Do a read() which will return a time_running and time_enabled of 0 (ensures initialization) |
| TEST(PerfEventOpenTest, WhenDisabledTotalTimeRunningAndEnabledAreZero) { |
| if (test_helper::HasSysAdmin()) { |
| uint64_t read_format = PERF_FORMAT_TOTAL_TIME_RUNNING | PERF_FORMAT_TOTAL_TIME_ENABLED; |
| perf_event_attr attr = attr_with_read_format(read_format, 1); |
| int32_t file_descriptor = |
| sys_perf_event_open(&attr, example_pid, example_cpu, example_group_fd, example_flags); |
| |
| printf("This is an event\n"); |
| |
| char buffer[24]; |
| syscall(__NR_read, file_descriptor, buffer, sizeof(buffer)); |
| |
| read_format_data data; |
| std::memcpy(&data, buffer, sizeof(buffer)); |
| |
| EXPECT_EQ(data.time_running, (uint64_t)0); |
| EXPECT_EQ(data.time_enabled, (uint64_t)0); |
| EXPECT_NE(syscall(__NR_close, file_descriptor), EXIT_FAILURE); |
| } |
| } |
| |
| // Example: |
| // - Start perf_event_open with disabled = 1 (disabled) |
| // - Make multiple IOC_DISABLE calls |
| // - Do an event |
| // - Do a read() which will return a time_running of 0 (ensures it was initialized) |
| TEST(PerfEventOpenTest, MultipleDisablesDoesNotChangeTime) { |
| if (test_helper::HasSysAdmin()) { |
| uint64_t read_format = PERF_FORMAT_TOTAL_TIME_RUNNING; |
| perf_event_attr attr = attr_with_read_format(read_format, 1); |
| int32_t file_descriptor = |
| sys_perf_event_open(&attr, example_pid, example_cpu, example_group_fd, example_flags); |
| |
| EXPECT_NE(syscall(__NR_ioctl, file_descriptor, PERF_EVENT_IOC_DISABLE), -1); |
| EXPECT_NE(syscall(__NR_ioctl, file_descriptor, PERF_EVENT_IOC_DISABLE), -1); |
| |
| printf("This is an event\n"); |
| |
| char buffer[16]; |
| syscall(__NR_read, file_descriptor, buffer, sizeof(buffer)); |
| |
| read_format_data_time_running data; |
| std::memcpy(&data, buffer, sizeof(buffer)); |
| |
| EXPECT_EQ(data.time_running, (uint64_t)0); |
| EXPECT_NE(syscall(__NR_close, file_descriptor), EXIT_FAILURE); |
| } |
| } |
| |
| TEST(PerfEventOpenTest, ReadingFirstEightBytesCanReturnCountAsALong) { |
| if (test_helper::HasSysAdmin()) { |
| int32_t file_descriptor = sys_perf_event_open(&example_attr, example_pid, example_cpu, |
| example_group_fd, example_flags); |
| |
| // Both char buffer[8] (tested in previous tests) and long long (8 bytes) should work. |
| long long count; |
| syscall(__NR_read, file_descriptor, &count, sizeof(count)); |
| |
| EXPECT_GE(count, 1); |
| EXPECT_NE(syscall(__NR_close, file_descriptor), EXIT_FAILURE); |
| } |
| } |
| |
| // Here is a full example of a counting case. |
| TEST(PerfEventOpenTest, CountingCPUClockSucceeds) { |
| if (test_helper::HasSysAdmin()) { |
| perf_event_attr attr; |
| memset(&attr, 0, sizeof(attr)); |
| attr.type = PERF_TYPE_SOFTWARE; |
| attr.size = sizeof(attr); |
| attr.config = PERF_COUNT_SW_CPU_CLOCK; |
| attr.disabled = 1; |
| |
| int32_t file_descriptor = |
| sys_perf_event_open(&attr, example_pid, -1, example_group_fd, example_flags); |
| EXPECT_NE(file_descriptor, -1); |
| |
| EXPECT_NE(syscall(__NR_ioctl, file_descriptor, PERF_EVENT_IOC_RESET), -1); |
| EXPECT_NE(syscall(__NR_ioctl, file_descriptor, PERF_EVENT_IOC_ENABLE), -1); |
| |
| printf("This is an event\n"); |
| |
| EXPECT_NE(syscall(__NR_ioctl, file_descriptor, PERF_EVENT_IOC_DISABLE), -1); |
| |
| long long count; |
| syscall(__NR_read, file_descriptor, &count, sizeof(count)); |
| |
| // TODO(https://fxbug.dev/402938671): this is expected to be 0 right now |
| // because real value hasn't been implemented. When running on host, it will give |
| // a real number (~6000-10000). Update this when we get a real instruction count. |
| EXPECT_GT(count, -1); |
| EXPECT_NE(syscall(__NR_close, file_descriptor), EXIT_FAILURE); |
| } |
| } |
| |
| /* Below are sampling tests */ |
| |
| // Valid attributes for sampling. On Linux for the first sampling event you'll get something like: |
| // perf_event_header { type = 9, size = 16, misc = 2 }. |
| perf_event_attr example_sampling_attr(uint64_t sample_type) { |
| perf_event_attr attr; |
| memset(&attr, 0, sizeof(attr)); |
| attr.type = PERF_TYPE_HARDWARE; |
| attr.size = sizeof(attr); |
| attr.config = PERF_COUNT_HW_INSTRUCTIONS; |
| // Elects sampling instead of counting. Since right here one |
| // event is 1 nanoseconds. It represent take one sample every |
| // 250'000 nanoseconds |
| attr.sample_period = 250'000; |
| attr.sample_type = sample_type; |
| attr.disabled = 1; // Initiate as DISABLED as per Perfetto code use-case. |
| attr.exclude_user = 0; // Necessary otherwise we get zeros for sampling. |
| attr.exclude_kernel = 1; |
| attr.sample_id_all = 1; |
| |
| return attr; |
| } |
| |
| // TODO(https://fxbug.dev/398914921): The Linux version of this test will fail because |
| // we are currently testing against semi-hardcoded values in the Starnix implementation. |
| // Use better EXPECT statements when we grab real values. |
| // |
| // We avoid checking the absolute value for sample id, because its value rely on a global |
| // counter that might be updated through other tests. We issue multiple perf_event_open() |
| // syscall in this test and validate sample_id got updated for different event group. |
| TEST(PerfEventOpenTest, SampleIdIsValid) { |
| if (test_helper::HasSysAdmin()) { |
| perf_event_attr attr_task_clock = |
| example_sampling_attr(PERF_SAMPLE_IDENTIFIER | PERF_SAMPLE_ID); |
| attr_task_clock.type = PERF_TYPE_SOFTWARE; |
| attr_task_clock.config = PERF_COUNT_SW_TASK_CLOCK; |
| |
| perf_event_attr attr_cpu_clock = example_sampling_attr(PERF_SAMPLE_IDENTIFIER | PERF_SAMPLE_ID); |
| attr_task_clock.type = PERF_TYPE_SOFTWARE; |
| attr_cpu_clock.config = PERF_COUNT_SW_CPU_CLOCK; |
| |
| int32_t fd_task_1 = sys_perf_event_open(&attr_task_clock, example_pid, example_cpu, |
| example_group_fd, example_flags); |
| int32_t fd_task_2 = |
| sys_perf_event_open(&attr_task_clock, example_pid, example_cpu, fd_task_1, example_flags); |
| int32_t fd_cpu_1 = sys_perf_event_open(&attr_cpu_clock, example_pid, example_cpu, |
| example_group_fd, example_flags); |
| int32_t fd_cpu_2 = |
| sys_perf_event_open(&attr_cpu_clock, example_pid, example_cpu, fd_cpu_1, example_flags); |
| const uint32_t invalid = -1; |
| |
| auto get_sample_id = [](int32_t fd) { |
| int num_pages = 2; |
| size_t data_size = num_pages * getpagesize(); |
| size_t buffer_size = getpagesize() + data_size; |
| void* address = mmap(nullptr, buffer_size, PROT_READ, MAP_SHARED, fd, 0); |
| EXPECT_NE(address, MAP_FAILED); |
| |
| EXPECT_NE(syscall(__NR_ioctl, fd, PERF_EVENT_IOC_ENABLE), -1); |
| printf("This is an event - start sampling for %u ms \n", sample_duration); |
| usleep(sample_duration); |
| EXPECT_NE(syscall(__NR_ioctl, fd, PERF_EVENT_IOC_DISABLE), -1); |
| EXPECT_NE(syscall(__NR_close, fd), EXIT_FAILURE); |
| |
| uint64_t sample_id = invalid; |
| uint64_t id = invalid; |
| bool read_samples = false; |
| int retries = 0; |
| perf_event_mmap_page* metadata = (perf_event_mmap_page*)address; |
| |
| while (!read_samples && retries < read_retries) { |
| // Note: for the purposes of this test we are only reading the first sample. |
| // Thus we don't make use of data_head or data_tail at the moment. |
| // TODO(https://fxbug.dev/460203776): update data_head. |
| // TODO(https://fxbug.dev/448762912): update data_tail. |
| char* record_start = static_cast<char*>(address) + metadata->data_offset; |
| |
| // Verify that we got samples written by checking the first sample's metadata. |
| perf_event_header* header = (perf_event_header*)record_start; |
| |
| // If no sampling data, wait poll_duration to potentially collect data and retry. |
| if (!read_samples) { |
| if (header->type == 0) { |
| usleep(poll_duration); |
| retries += 1; |
| continue; |
| } else { |
| read_samples = true; |
| } |
| } |
| |
| // Otherwise, we do have at least 1 sample. Verify the first sample_id. |
| char* record_details_start = record_start + sizeof(perf_event_header); |
| struct perf_record_sample { |
| uint64_t sample_id; |
| uint64_t id; |
| }; |
| struct perf_record_sample* record_details = |
| (struct perf_record_sample*)record_details_start; |
| sample_id = record_details->sample_id; |
| id = record_details->id; |
| EXPECT_EQ(sample_id, id); |
| } |
| if (retries > 0) { |
| printf("Retried reading sample data %u times\n", retries); |
| } |
| EXPECT_EQ(syscall(__NR_munmap, address, buffer_size), 0); |
| return sample_id; |
| }; |
| |
| uint64_t task_id_1 = get_sample_id(fd_task_1); |
| uint64_t task_id_2 = get_sample_id(fd_task_2); |
| uint64_t cpu_id_1 = get_sample_id(fd_cpu_1); |
| uint64_t cpu_id_2 = get_sample_id(fd_cpu_2); |
| |
| EXPECT_NE(invalid, task_id_1); |
| EXPECT_EQ(task_id_1, task_id_2); |
| EXPECT_EQ(cpu_id_1, cpu_id_2); |
| EXPECT_LT(task_id_1, cpu_id_1); |
| } |
| } |
| |
| TEST(PerfEventOpenTest, MmapMetadataPageIsValid) { |
| if (test_helper::HasSysAdmin()) { |
| perf_event_attr attr = example_sampling_attr(PERF_SAMPLE_TIME); |
| int32_t file_descriptor = |
| sys_perf_event_open(&attr, example_pid, example_cpu, example_group_fd, example_flags); |
| |
| int num_pages = 2; |
| size_t data_size = num_pages * getpagesize(); |
| // Ring buffer size, defined to be 1 + 2^n pages per the docs. |
| size_t buffer_size = getpagesize() + data_size; |
| |
| // mmap() returns the address of the mapping. Note you MUST use MAP_SHARED because you're |
| // doing kernel and user stuff. The offset has to be 0 to access the metadata page (first page). |
| void* address = mmap(nullptr, buffer_size, PROT_READ, MAP_SHARED, file_descriptor, 0); |
| |
| // Address should not be 0xffffffffffffffff. |
| EXPECT_NE(address, MAP_FAILED); |
| |
| // Docs say you can close here before reading anything. |
| EXPECT_NE(syscall(__NR_close, file_descriptor), EXIT_FAILURE); |
| |
| char buffer[buffer_size]; |
| EXPECT_EQ(buffer_size, sizeof(buffer)); |
| |
| // Verify metadata page has valid/reasonable info. |
| // Don't need to memcopy. Just read directly. |
| perf_event_mmap_page* metadata = (perf_event_mmap_page*)address; |
| EXPECT_LT(metadata->version, (uint32_t)10); |
| EXPECT_LT(metadata->compat_version, (uint32_t)10); |
| EXPECT_EQ(metadata->lock % 2, (uint32_t)0); |
| EXPECT_NE(metadata->index, (uint32_t)-1); |
| EXPECT_NE(metadata->offset, (int64_t)-1); |
| EXPECT_EQ(metadata->capabilities, (uint64_t)30); |
| EXPECT_EQ(metadata->cap_user_time, (uint64_t)1); |
| EXPECT_EQ(metadata->time_enabled, (uint64_t)0); |
| EXPECT_EQ(metadata->time_running, (uint64_t)0); |
| // Verify that there is a sample to read, this must be > 0. |
| EXPECT_GE(metadata->data_head, (uint64_t)0); |
| EXPECT_EQ(metadata->data_tail, (uint64_t)0); |
| EXPECT_EQ(metadata->data_offset, (uint64_t)getpagesize()); |
| EXPECT_EQ(metadata->data_size, (uint64_t)data_size); |
| EXPECT_EQ(syscall(__NR_munmap, address, buffer_size), 0); |
| } |
| } |
| |
| // TODO(https://fxbug.dev/398914921): The Linux version of this test will fail because |
| // we are currently testing against semi-hardcoded values in the Starnix implementation. |
| // Use better EXPECT statements when we grab real values. |
| TEST(PerfEventOpenTest, MmapFirstRecordPageIsValid) { |
| if (test_helper::HasSysAdmin()) { |
| perf_event_attr attr = |
| example_sampling_attr(PERF_SAMPLE_IDENTIFIER | PERF_SAMPLE_IP | PERF_SAMPLE_TID | |
| PERF_SAMPLE_ID | PERF_SAMPLE_CALLCHAIN | PERF_SAMPLE_PERIOD); |
| int32_t file_descriptor = |
| sys_perf_event_open(&attr, example_pid, example_cpu, example_group_fd, example_flags); |
| |
| int num_pages = 2; |
| size_t data_size = num_pages * getpagesize(); |
| // Ring buffer size, defined to be 1 + 2^n pages per the docs. |
| size_t buffer_size = getpagesize() + data_size; |
| |
| // mmap() returns the address of the mapping. Note you MUST use MAP_SHARED because you're |
| // doing kernel and user stuff. |
| // The offset has to be 0 to access the metadata page (first page). |
| void* address = mmap(nullptr, buffer_size, PROT_WRITE, MAP_SHARED, file_descriptor, 0); |
| // Address should not be 0xffffffffffffffff. |
| EXPECT_NE(address, MAP_FAILED); |
| |
| char buffer[buffer_size]; |
| EXPECT_EQ(buffer_size, sizeof(buffer)); |
| |
| // Verify metadata page has valid/reasonable info. |
| // Don't need to memcopy. Just read directly. |
| perf_event_mmap_page* metadata = (perf_event_mmap_page*)address; |
| EXPECT_LT(metadata->version, (uint32_t)10); |
| EXPECT_LT(metadata->compat_version, (uint32_t)10); |
| EXPECT_EQ(metadata->lock % 2, (uint32_t)0); |
| EXPECT_NE(metadata->index, (uint32_t)-1); |
| EXPECT_NE(metadata->offset, (int64_t)-1); |
| EXPECT_EQ(metadata->capabilities, (uint64_t)30); |
| EXPECT_EQ(metadata->cap_user_time, (uint64_t)1); |
| EXPECT_EQ(metadata->time_enabled, (uint64_t)0); |
| EXPECT_EQ(metadata->time_running, (uint64_t)0); |
| // Verify that there is a sample to read, this must be > 0. |
| EXPECT_GT(metadata->data_head, (uint64_t)0); |
| EXPECT_EQ(metadata->data_tail, (uint64_t)0); |
| EXPECT_EQ(metadata->data_offset, (uint64_t)getpagesize()); |
| EXPECT_EQ(metadata->data_size, data_size); |
| |
| // Start sampling. |
| EXPECT_NE(syscall(__NR_ioctl, file_descriptor, PERF_EVENT_IOC_ENABLE), -1); |
| printf("This is an event - start sampling for %u ms \n", sample_duration); |
| usleep(sample_duration); |
| |
| // End sampling. |
| EXPECT_NE(syscall(__NR_ioctl, file_descriptor, PERF_EVENT_IOC_DISABLE), -1); |
| // As mentioned in the docs, closing the file descriptor does not invalidate |
| // the mmap() mapping. |
| EXPECT_NE(syscall(__NR_close, file_descriptor), EXIT_FAILURE); |
| |
| // Start polling every 250 ms to read sampling data. If after 5 retries nothing |
| // has been read, return that sampling has failed. |
| // 250 ms poll is taken from |
| // https://cs.opensource.google/fuchsia/fuchsia/+/main:third_party/perfetto/src/profiling/perf/event_config_unittest.cc;l=96 |
| bool read_samples = false; |
| int retries = 0; |
| while (!read_samples && retries < read_retries) { |
| // Start reading next page, which is the first sampling data page. From there |
| // you can keep iterating to read each sample. Layout: |
| // |
| // The whole object is a Record, comprised of a Header and a RecordDetails: |
| // ------ <-- record_start, start of the header and the sample. |
| // | | |
| // | | perf_event_header size (always 8 bytes) |
| // | | |
| // | --- <-- record_details_start, start of the record_details. |
| // | | |
| // | | varying size based on `perf_event_header->type` |
| // | | |
| // | | |
| // ------ |
| |
| // Starting address for the records page(s). |
| char* record_start = |
| static_cast<char*>(address) + metadata->data_offset + metadata->data_tail; |
| |
| // This is the counter that gets incremented after reading each record. |
| uint64_t curr_pointer = metadata->data_tail; |
| |
| // TODO(https://fxbug.dev/433751865): figure out why we only get 0s after a few loops. |
| // This should say `while (curr_pointer < metadata->data_head)` but we are only getting |
| // 2 samples reliably correctly (rest are 0s or misaligned). Will investigate in follow-up. |
| for (int i = 0; i < 2; i++) { |
| // Parse header. |
| perf_event_header* header = (perf_event_header*)record_start; |
| |
| // If no sampling data, wait poll_duration to potentially collect data and retry. |
| if (!read_samples) { |
| if (header->type == 0) { |
| usleep(poll_duration); |
| retries += 1; |
| break; |
| } else { |
| read_samples = true; |
| } |
| } |
| |
| // Increment by sample size, so that we can read the next sample in the next iteration. |
| EXPECT_EQ(header->type, PERF_RECORD_SAMPLE /* 9 */); |
| EXPECT_THAT(header->misc, testing::AnyOf(testing::Eq(PERF_RECORD_MISC_KERNEL) /* 1 */, |
| testing::Eq(PERF_RECORD_MISC_USER) /* 2 */)); |
| EXPECT_GE(header->size, (uint16_t)8); // Size of the whole sample, INCLUDING THIS HEADER. |
| |
| // Now that we know the type, we can roll past the perf_event_header |
| // and read the rest of the struct, which is different for each type. |
| curr_pointer += header->size; |
| |
| // Parse record details. |
| char* record_details_start = record_start + sizeof(perf_event_header); |
| |
| // This is a subset of the real perf_record_sample which we will implement later. |
| struct perf_record_sample { |
| uint64_t sample_id; |
| uint64_t ip; |
| uint32_t pid; |
| uint32_t tid; |
| uint64_t id; |
| uint64_t sample_period; |
| uint64_t nr; |
| }; |
| |
| struct perf_record_sample* record_details = |
| (struct perf_record_sample*)record_details_start; |
| EXPECT_GE(record_details->ip, (uint64_t)1); |
| EXPECT_GE(record_details->pid, (uint64_t)0); |
| EXPECT_GE(record_details->tid, (uint64_t)1); |
| EXPECT_EQ(record_details->id, record_details->sample_id); |
| EXPECT_GE(record_details->sample_period, (uint64_t)250'000); |
| // On average we are getting ~100 samples for 25 ms hardcoded sample duration. |
| EXPECT_GE(record_details->nr, (uint64_t)1); |
| EXPECT_LT(record_details->nr, (uint64_t)200); |
| |
| // Advance curr_ptr by perf_record_sample (minus ips[nr]). |
| curr_pointer += sizeof(perf_record_sample); |
| |
| // Read the next param of perf_record_sample ips[nr]. When we created the |
| // struct, we don't know the size of `ips`. So we iterate over `nr` times. |
| uint64_t number_of_ips = record_details->nr; |
| uint64_t* ips_start = |
| reinterpret_cast<uint64_t*>(record_details_start + sizeof(perf_record_sample)); |
| std::span<uint64_t> ips{ips_start, static_cast<std::size_t>(number_of_ips)}; |
| for (uint64_t ip : ips) { |
| if (ip == 0) { |
| EXPECT_EQ(ip, record_details->ip); |
| } else { |
| // TODO(https://fxbug.dev/433751865): better way to check validity. |
| EXPECT_GT(ip, (uint64_t)0); |
| } |
| } |
| |
| // Advance counter. |
| curr_pointer += ips.size_bytes(); |
| // See TODO above about using curr_pointer. Just putting EXPECT here so that this will |
| // build. |
| EXPECT_GT(curr_pointer, metadata->data_tail); |
| } |
| } |
| if (retries > 0) { |
| printf("Retried reading sample data %u times\n", retries); |
| } |
| EXPECT_EQ(syscall(__NR_munmap, address, buffer_size), 0); |
| } |
| } |
| |
| TEST(PerfEventOpenTest, GroupLeaderCleanup) { |
| if (test_helper::HasSysAdmin()) { |
| perf_event_attr attr_a = {}; |
| attr_a.type = PERF_TYPE_SOFTWARE; |
| attr_a.config = PERF_COUNT_SW_CPU_CLOCK; |
| |
| perf_event_attr attr_b = {}; |
| attr_b.type = PERF_TYPE_SOFTWARE; |
| attr_b.config = PERF_COUNT_SW_TASK_CLOCK; |
| |
| // Create group A. |
| int fd_a = sys_perf_event_open(&attr_a, example_pid, example_cpu, -1, 0); |
| EXPECT_NE(fd_a, -1); |
| |
| // Create group B. |
| int fd_b = sys_perf_event_open(&attr_b, example_pid, example_cpu, -1, 0); |
| EXPECT_NE(fd_b, -1); |
| |
| // Add an event to group A. |
| int fd_a2 = sys_perf_event_open(&attr_a, example_pid, example_cpu, fd_a, 0); |
| EXPECT_NE(fd_a2, -1); |
| |
| // Close the leader of group A. |
| EXPECT_NE(syscall(__NR_close, fd_a), EXIT_FAILURE); |
| |
| // Try to add another event to group A. This should fail. |
| EXPECT_THAT(sys_perf_event_open(&attr_a, example_pid, example_cpu, fd_a, 0), |
| SyscallFailsWithErrno(EBADF)); |
| |
| // Add an event to group B. This should succeed. |
| int fd_b2 = sys_perf_event_open(&attr_b, example_pid, example_cpu, fd_b, 0); |
| EXPECT_NE(fd_b2, -1); |
| |
| // Cleanup. |
| EXPECT_NE(syscall(__NR_close, fd_a2), EXIT_FAILURE); |
| EXPECT_NE(syscall(__NR_close, fd_b), EXIT_FAILURE); |
| EXPECT_NE(syscall(__NR_close, fd_b2), EXIT_FAILURE); |
| } |
| } |
| |
| } // namespace |