blob: deb3b7febcc48adaa2504727b6a86b5592fa6ea6 [file] [log] [blame] [edit]
/* MIT License
*
* Copyright (c) 2024 Brad House
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* SPDX-License-Identifier: MIT
*/
/* IMPLEMENTATION NOTES
* ====================
*
* With very little effort we should be able to determine fairly proper timeouts
* we can use based on prior query history. We track in order to be able to
* auto-scale when network conditions change (e.g. maybe there is a provider
* failover and timings change due to that). Apple appears to do this within
* their system resolver in MacOS. Obviously we should have a minimum, maximum,
* and initial value to make sure the algorithm doesn't somehow go off the
* rails.
*
* Values:
* - Minimum Timeout: 250ms (approximate RTT half-way around the globe)
* - Maximum Timeout: 5000ms (Recommended timeout in RFC 1123), can be reduced
* by ARES_OPT_MAXTIMEOUTMS, but otherwise the bound specified by the option
* caps the retry timeout.
* - Initial Timeout: User-specified via configuration or ARES_OPT_TIMEOUTMS
* - Average latency multiplier: 5x (a local DNS server returning a cached value
* will be quicker than if it needs to recurse so we need to account for this)
* - Minimum Count for Average: 3. This is the minimum number of queries we
* need to form an average for the bucket.
*
* Per-server buckets for tracking latency over time (these are ephemeral
* meaning they don't persist once a channel is destroyed). We record both the
* current timespan for the bucket and the immediate preceding timespan in case
* of roll-overs we can still maintain recent metrics for calculations:
* - 1 minute
* - 15 minutes
* - 1 hr
* - 1 day
* - since inception
*
* Each bucket would contain:
* - timestamp (divided by interval)
* - minimum latency
* - maximum latency
* - total time
* - count
* NOTE: average latency is (total time / count), we will calculate this
* dynamically when needed
*
* Basic algorithm for calculating timeout to use would be:
* - Scan from most recent bucket to least recent
* - Check timestamp of bucket, if doesn't match current time, continue to next
* bucket
* - Check count of bucket, if its not at least the "Minimum Count for Average",
* check the previous bucket, otherwise continue to next bucket
* - If we reached the end with no bucket match, use "Initial Timeout"
* - If bucket is selected, take ("total time" / count) as Average latency,
* multiply by "Average Latency Multiplier", bound by "Minimum Timeout" and
* "Maximum Timeout"
* NOTE: The timeout calculated may not be the timeout used. If we are retrying
* the query on the same server another time, then it will use a larger value
*
* On each query reply where the response is legitimate (proper response or
* NXDOMAIN) and not something like a server error:
* - Cycle through each bucket in order
* - Check timestamp of bucket against current timestamp, if out of date
* overwrite previous entry with values, clear current values
* - Compare current minimum and maximum recorded latency against query time and
* adjust if necessary
* - Increment "count" by 1 and "total time" by the query time
*
* Other Notes:
* - This is always-on, the only user-configurable value is the initial
* timeout which will simply re-uses the current option.
* - Minimum and Maximum latencies for a bucket are currently unused but are
* there in case we find a need for them in the future.
*/
#include "ares_private.h"
/*! Minimum timeout value. Chosen due to it being approximately RTT half-way
* around the world */
#define MIN_TIMEOUT_MS 250
/*! Multiplier to apply to average latency to come up with an initial timeout */
#define AVG_TIMEOUT_MULTIPLIER 5
/*! Upper timeout bounds, only used if channel->maxtimeout not set */
#define MAX_TIMEOUT_MS 5000
/*! Minimum queries required to form an average */
#define MIN_COUNT_FOR_AVERAGE 3
static time_t ares_metric_timestamp(ares_server_bucket_t bucket,
const ares_timeval_t *now,
ares_bool_t is_previous)
{
time_t divisor = 1; /* Silence bogus MSVC warning by setting default value */
switch (bucket) {
case ARES_METRIC_1MINUTE:
divisor = 60;
break;
case ARES_METRIC_15MINUTES:
divisor = 15 * 60;
break;
case ARES_METRIC_1HOUR:
divisor = 60 * 60;
break;
case ARES_METRIC_1DAY:
divisor = 24 * 60 * 60;
break;
case ARES_METRIC_INCEPTION:
return is_previous ? 0 : 1;
case ARES_METRIC_COUNT:
return 0; /* Invalid! */
}
if (is_previous) {
if (divisor >= now->sec) {
return 0;
}
return (time_t)((now->sec - divisor) / divisor);
}
return (time_t)(now->sec / divisor);
}
void ares_metrics_record(const ares_query_t *query, ares_server_t *server,
ares_status_t status, const ares_dns_record_t *dnsrec)
{
ares_timeval_t now;
ares_timeval_t tvdiff;
unsigned int query_ms;
ares_dns_rcode_t rcode;
ares_server_bucket_t i;
if (status != ARES_SUCCESS) {
return;
}
if (server == NULL) {
return;
}
ares_tvnow(&now);
rcode = ares_dns_record_get_rcode(dnsrec);
if (rcode != ARES_RCODE_NOERROR && rcode != ARES_RCODE_NXDOMAIN) {
return;
}
ares_timeval_diff(&tvdiff, &query->ts, &now);
query_ms = (unsigned int)((tvdiff.sec * 1000) + (tvdiff.usec / 1000));
if (query_ms == 0) {
query_ms = 1;
}
/* Place in each bucket */
for (i = 0; i < ARES_METRIC_COUNT; i++) {
time_t ts = ares_metric_timestamp(i, &now, ARES_FALSE);
/* Copy metrics to prev and clear */
if (ts != server->metrics[i].ts) {
server->metrics[i].prev_ts = server->metrics[i].ts;
server->metrics[i].prev_total_ms = server->metrics[i].total_ms;
server->metrics[i].prev_total_count = server->metrics[i].total_count;
server->metrics[i].ts = ts;
server->metrics[i].latency_min_ms = 0;
server->metrics[i].latency_max_ms = 0;
server->metrics[i].total_ms = 0;
server->metrics[i].total_count = 0;
}
if (server->metrics[i].latency_min_ms == 0 ||
server->metrics[i].latency_min_ms > query_ms) {
server->metrics[i].latency_min_ms = query_ms;
}
if (query_ms > server->metrics[i].latency_max_ms) {
server->metrics[i].latency_max_ms = query_ms;
}
server->metrics[i].total_count++;
server->metrics[i].total_ms += (ares_uint64_t)query_ms;
}
}
size_t ares_metrics_server_timeout(const ares_server_t *server,
const ares_timeval_t *now)
{
const ares_channel_t *channel = server->channel;
ares_server_bucket_t i;
size_t timeout_ms = 0;
size_t max_timeout_ms;
for (i = 0; i < ARES_METRIC_COUNT; i++) {
time_t ts = ares_metric_timestamp(i, now, ARES_FALSE);
/* This ts has been invalidated, see if we should use the previous
* time period */
if (ts != server->metrics[i].ts ||
server->metrics[i].total_count < MIN_COUNT_FOR_AVERAGE) {
time_t prev_ts = ares_metric_timestamp(i, now, ARES_TRUE);
if (prev_ts != server->metrics[i].prev_ts ||
server->metrics[i].prev_total_count < MIN_COUNT_FOR_AVERAGE) {
/* Move onto next bucket */
continue;
}
/* Calculate average time for previous bucket */
timeout_ms = (size_t)(server->metrics[i].prev_total_ms /
server->metrics[i].prev_total_count);
} else {
/* Calculate average time for current bucket*/
timeout_ms =
(size_t)(server->metrics[i].total_ms / server->metrics[i].total_count);
}
/* Multiply average by constant to get timeout value */
timeout_ms *= AVG_TIMEOUT_MULTIPLIER;
break;
}
/* If we're here, that means its the first query for the server, so we just
* use the initial default timeout */
if (timeout_ms == 0) {
timeout_ms = channel->timeout;
}
/* don't go below lower bounds */
if (timeout_ms < MIN_TIMEOUT_MS) {
timeout_ms = MIN_TIMEOUT_MS;
}
/* don't go above upper bounds */
max_timeout_ms = channel->maxtimeout ? channel->maxtimeout : MAX_TIMEOUT_MS;
if (timeout_ms > max_timeout_ms) {
timeout_ms = max_timeout_ms;
}
return timeout_ms;
}