blob: c6642f474578cf2659569f0ccc1879a6b66a18b3 [file] [log] [blame]
/*
* Copyright (C) 2023 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/protozero/filtering/string_filter.h"
#include <cstring>
#include <regex>
#include <string_view>
#include "perfetto/base/compiler.h"
#include "perfetto/base/logging.h"
#include "perfetto/ext/base/string_view.h"
#include "perfetto/public/compiler.h"
namespace protozero {
namespace {
using Matches = std::match_results<char*>;
static constexpr std::string_view kRedacted = "P60REDACTED";
static constexpr char kRedactedDash = '-';
// Returns a pointer to the first character after the tgid pipe character in
// the atrace string given by [ptr, end). Returns null if no such character
// exists.
//
// Examples:
// E|1024 -> nullptr
// foobarbaz -> nullptr
// B|1024|x -> pointer to x
const char* FindAtracePayloadPtr(const char* ptr, const char* end) {
// Don't even bother checking any strings which are so short that they could
// not contain a post-tgid section. This filters out strings like "E|" which
// emitted by Bionic.
//
// Also filter out any other strings starting with "E" as they never contain
// anything past the tgid: this removes >half of the strings for ~zero cost.
static constexpr size_t kEarliestSecondPipeIndex = 2;
const char* search_start = ptr + kEarliestSecondPipeIndex;
if (search_start >= end || *ptr == 'E') {
return nullptr;
}
// We skipped past the first '|' character by starting at the character at
// index 2. Just find the next pipe character (i.e. the one after tgid) using
// memchr.
const char* pipe = static_cast<const char*>(
memchr(search_start, '|', size_t(end - search_start)));
return pipe ? pipe + 1 : nullptr;
}
bool StartsWith(const char* ptr,
const char* end,
const std::string& starts_with) {
// Verify that the atrace string has enough characters to match against all
// the characters in the "starts with" string. If it does, memcmp to check if
// all the characters match and return true if they do.
return ptr + starts_with.size() <= end &&
memcmp(ptr, starts_with.data(), starts_with.size()) == 0;
}
void RedactMatches(const Matches& matches) {
// Go through every group in the matches.
for (size_t i = 1; i < matches.size(); ++i) {
const auto& match = matches[i];
PERFETTO_CHECK(match.second >= match.first);
// Overwrite the match with characters from |kRedacted|. If match is
// smaller, we will not use all of |kRedacted| but that's fine (i.e. we
// will overwrite with a truncated |kRedacted|).
size_t match_len = static_cast<size_t>(match.second - match.first);
size_t redacted_len = std::min(match_len, kRedacted.size());
memcpy(match.first, kRedacted.data(), redacted_len);
// Overwrite any characters after |kRedacted| with |kRedactedDash|.
memset(match.first + redacted_len, kRedactedDash, match_len - redacted_len);
}
}
} // namespace
void StringFilter::AddRule(Policy policy,
std::string_view pattern_str,
std::string atrace_payload_starts_with) {
rules_.emplace_back(StringFilter::Rule{
policy,
std::regex(pattern_str.begin(), pattern_str.end(),
std::regex::ECMAScript | std::regex_constants::optimize),
std::move(atrace_payload_starts_with)});
}
bool StringFilter::MaybeFilterInternal(char* ptr, size_t len) const {
std::match_results<char*> matches;
bool atrace_find_tried = false;
const char* atrace_payload_ptr = nullptr;
for (const Rule& rule : rules_) {
switch (rule.policy) {
case Policy::kMatchRedactGroups:
case Policy::kMatchBreak:
if (std::regex_match(ptr, ptr + len, matches, rule.pattern)) {
if (rule.policy == Policy::kMatchBreak) {
return false;
}
RedactMatches(matches);
return true;
}
break;
case Policy::kAtraceMatchRedactGroups:
case Policy::kAtraceMatchBreak:
atrace_payload_ptr = atrace_find_tried
? atrace_payload_ptr
: FindAtracePayloadPtr(ptr, ptr + len);
atrace_find_tried = true;
if (atrace_payload_ptr &&
StartsWith(atrace_payload_ptr, ptr + len,
rule.atrace_payload_starts_with) &&
std::regex_match(ptr, ptr + len, matches, rule.pattern)) {
if (rule.policy == Policy::kAtraceMatchBreak) {
return false;
}
RedactMatches(matches);
return true;
}
break;
}
}
return false;
}
} // namespace protozero