| /* |
| * Copyright © 2020-2021 Collabora, Ltd. |
| * Author: Antonio Caggiano <antonio.caggiano@collabora.com> |
| * |
| * SPDX-License-Identifier: MIT |
| */ |
| |
| #pragma once |
| |
| #include <pps/pps_driver.h> |
| |
| extern "C" { |
| struct intel_perf_query_info; |
| }; |
| |
| namespace pps |
| { |
| |
| class IntelPerf; |
| |
| /// @brief Variable length sequence of bytes generated by Intel Obstervation Architecture (OA) |
| struct PerfRecord { |
| /// Timestamp in the GPU clock domain |
| uint64_t timestamp; |
| |
| /// drm_i915_perf_record_header + report data |
| std::vector<uint8_t> data; |
| }; |
| |
| /// @brief PPS Driver implementation for Intel graphics devices. |
| /// When sampling it may collect multiple perf-records at once. Each perf-record holds multiple |
| /// counter values. Those values are continuously incremented by the GPU. In order to get a delta, |
| /// the driver computes an _accumulation_ (`last_perf_record - previous_perf_record`). |
| /// For optimization purposes, it might ignore some perf-records, considering only those |
| /// perf-records close to the boundary of the sampling period range. |
| class IntelDriver : public Driver |
| { |
| public: |
| IntelDriver(); |
| ~IntelDriver(); |
| |
| uint64_t get_min_sampling_period_ns() override; |
| bool init_perfcnt() override; |
| void enable_counter(uint32_t counter_id) override; |
| void enable_all_counters() override; |
| void enable_perfcnt(uint64_t sampling_period_ns) override; |
| void disable_perfcnt() override; |
| bool dump_perfcnt() override; |
| uint64_t next() override; |
| uint32_t gpu_clock_id() const override; |
| uint64_t gpu_timestamp() const override; |
| |
| private: |
| /// @brief Requests the next perf sample |
| /// @return The sample GPU timestamp |
| uint64_t gpu_next(); |
| |
| /// @param data Buffer of bytes to parse |
| /// @param byte_count Number of bytes to parse |
| /// @return A list of perf records parsed from raw data passed as input |
| std::vector<PerfRecord> parse_perf_records(const std::vector<uint8_t> &data, size_t byte_count); |
| |
| /// @brief Reads data from the GPU metric set |
| void read_data_from_metric_set(); |
| |
| /// Sampling period in nanoseconds requested by the datasource |
| uint64_t sampling_period_ns = 0; |
| |
| /// Last upper 32bits of the GPU timestamp in the parsed reports |
| uint64_t gpu_timestamp_udw = 0; |
| |
| /// Keep track of the timestamp of the last sample generated (upper & lower |
| /// 32bits) |
| uint64_t last_gpu_timestamp = 0; |
| |
| /// Data buffer used to store data read from the metric set |
| std::vector<uint8_t> metric_buffer = std::vector<uint8_t>(1024, 0); |
| /// Number of bytes read so far still un-parsed. |
| /// Reset once bytes from the metric buffer are parsed to perf records |
| size_t total_bytes_read = 0; |
| |
| /// List of OA perf records read so far |
| std::vector<PerfRecord> records; |
| |
| std::unique_ptr<IntelPerf> perf; |
| |
| // Gpu clock ID used to correlate GPU/CPU timestamps |
| uint32_t clock_id = 0; |
| |
| // Selected query |
| intel_perf_query_info *selected_query = nullptr; |
| }; |
| |
| } // namespace pps |