| /* MIT License |
| * |
| * Copyright (c) 2024 Brad House |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a copy |
| * of this software and associated documentation files (the "Software"), to deal |
| * in the Software without restriction, including without limitation the rights |
| * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| * copies of the Software, and to permit persons to whom the Software is |
| * furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| * SOFTWARE. |
| * |
| * SPDX-License-Identifier: MIT |
| */ |
| |
| |
| /* IMPLEMENTATION NOTES |
| * ==================== |
| * |
| * With very little effort we should be able to determine fairly proper timeouts |
| * we can use based on prior query history. We track in order to be able to |
| * auto-scale when network conditions change (e.g. maybe there is a provider |
| * failover and timings change due to that). Apple appears to do this within |
| * their system resolver in MacOS. Obviously we should have a minimum, maximum, |
| * and initial value to make sure the algorithm doesn't somehow go off the |
| * rails. |
| * |
| * Values: |
| * - Minimum Timeout: 250ms (approximate RTT half-way around the globe) |
| * - Maximum Timeout: 5000ms (Recommended timeout in RFC 1123), can be reduced |
| * by ARES_OPT_MAXTIMEOUTMS, but otherwise the bound specified by the option |
| * caps the retry timeout. |
| * - Initial Timeout: User-specified via configuration or ARES_OPT_TIMEOUTMS |
| * - Average latency multiplier: 5x (a local DNS server returning a cached value |
| * will be quicker than if it needs to recurse so we need to account for this) |
| * - Minimum Count for Average: 3. This is the minimum number of queries we |
| * need to form an average for the bucket. |
| * |
| * Per-server buckets for tracking latency over time (these are ephemeral |
| * meaning they don't persist once a channel is destroyed). We record both the |
| * current timespan for the bucket and the immediate preceding timespan in case |
| * of roll-overs we can still maintain recent metrics for calculations: |
| * - 1 minute |
| * - 15 minutes |
| * - 1 hr |
| * - 1 day |
| * - since inception |
| * |
| * Each bucket would contain: |
| * - timestamp (divided by interval) |
| * - minimum latency |
| * - maximum latency |
| * - total time |
| * - count |
| * NOTE: average latency is (total time / count), we will calculate this |
| * dynamically when needed |
| * |
| * Basic algorithm for calculating timeout to use would be: |
| * - Scan from most recent bucket to least recent |
| * - Check timestamp of bucket, if doesn't match current time, continue to next |
| * bucket |
| * - Check count of bucket, if its not at least the "Minimum Count for Average", |
| * check the previous bucket, otherwise continue to next bucket |
| * - If we reached the end with no bucket match, use "Initial Timeout" |
| * - If bucket is selected, take ("total time" / count) as Average latency, |
| * multiply by "Average Latency Multiplier", bound by "Minimum Timeout" and |
| * "Maximum Timeout" |
| * NOTE: The timeout calculated may not be the timeout used. If we are retrying |
| * the query on the same server another time, then it will use a larger value |
| * |
| * On each query reply where the response is legitimate (proper response or |
| * NXDOMAIN) and not something like a server error: |
| * - Cycle through each bucket in order |
| * - Check timestamp of bucket against current timestamp, if out of date |
| * overwrite previous entry with values, clear current values |
| * - Compare current minimum and maximum recorded latency against query time and |
| * adjust if necessary |
| * - Increment "count" by 1 and "total time" by the query time |
| * |
| * Other Notes: |
| * - This is always-on, the only user-configurable value is the initial |
| * timeout which will simply re-uses the current option. |
| * - Minimum and Maximum latencies for a bucket are currently unused but are |
| * there in case we find a need for them in the future. |
| */ |
| |
| #include "ares_private.h" |
| |
| /*! Minimum timeout value. Chosen due to it being approximately RTT half-way |
| * around the world */ |
| #define MIN_TIMEOUT_MS 250 |
| |
| /*! Multiplier to apply to average latency to come up with an initial timeout */ |
| #define AVG_TIMEOUT_MULTIPLIER 5 |
| |
| /*! Upper timeout bounds, only used if channel->maxtimeout not set */ |
| #define MAX_TIMEOUT_MS 5000 |
| |
| /*! Minimum queries required to form an average */ |
| #define MIN_COUNT_FOR_AVERAGE 3 |
| |
| static time_t ares_metric_timestamp(ares_server_bucket_t bucket, |
| const ares_timeval_t *now, |
| ares_bool_t is_previous) |
| { |
| time_t divisor = 1; /* Silence bogus MSVC warning by setting default value */ |
| |
| switch (bucket) { |
| case ARES_METRIC_1MINUTE: |
| divisor = 60; |
| break; |
| case ARES_METRIC_15MINUTES: |
| divisor = 15 * 60; |
| break; |
| case ARES_METRIC_1HOUR: |
| divisor = 60 * 60; |
| break; |
| case ARES_METRIC_1DAY: |
| divisor = 24 * 60 * 60; |
| break; |
| case ARES_METRIC_INCEPTION: |
| return is_previous ? 0 : 1; |
| case ARES_METRIC_COUNT: |
| return 0; /* Invalid! */ |
| } |
| |
| if (is_previous) { |
| if (divisor >= now->sec) { |
| return 0; |
| } |
| return (time_t)((now->sec - divisor) / divisor); |
| } |
| |
| return (time_t)(now->sec / divisor); |
| } |
| |
| void ares_metrics_record(const ares_query_t *query, ares_server_t *server, |
| ares_status_t status, const ares_dns_record_t *dnsrec) |
| { |
| ares_timeval_t now; |
| ares_timeval_t tvdiff; |
| unsigned int query_ms; |
| ares_dns_rcode_t rcode; |
| ares_server_bucket_t i; |
| |
| if (status != ARES_SUCCESS) { |
| return; |
| } |
| |
| if (server == NULL) { |
| return; |
| } |
| |
| ares_tvnow(&now); |
| |
| rcode = ares_dns_record_get_rcode(dnsrec); |
| if (rcode != ARES_RCODE_NOERROR && rcode != ARES_RCODE_NXDOMAIN) { |
| return; |
| } |
| |
| ares_timeval_diff(&tvdiff, &query->ts, &now); |
| query_ms = (unsigned int)((tvdiff.sec * 1000) + (tvdiff.usec / 1000)); |
| if (query_ms == 0) { |
| query_ms = 1; |
| } |
| |
| /* Place in each bucket */ |
| for (i = 0; i < ARES_METRIC_COUNT; i++) { |
| time_t ts = ares_metric_timestamp(i, &now, ARES_FALSE); |
| |
| /* Copy metrics to prev and clear */ |
| if (ts != server->metrics[i].ts) { |
| server->metrics[i].prev_ts = server->metrics[i].ts; |
| server->metrics[i].prev_total_ms = server->metrics[i].total_ms; |
| server->metrics[i].prev_total_count = server->metrics[i].total_count; |
| server->metrics[i].ts = ts; |
| server->metrics[i].latency_min_ms = 0; |
| server->metrics[i].latency_max_ms = 0; |
| server->metrics[i].total_ms = 0; |
| server->metrics[i].total_count = 0; |
| } |
| |
| if (server->metrics[i].latency_min_ms == 0 || |
| server->metrics[i].latency_min_ms > query_ms) { |
| server->metrics[i].latency_min_ms = query_ms; |
| } |
| |
| if (query_ms > server->metrics[i].latency_max_ms) { |
| server->metrics[i].latency_max_ms = query_ms; |
| } |
| |
| server->metrics[i].total_count++; |
| server->metrics[i].total_ms += (ares_uint64_t)query_ms; |
| } |
| } |
| |
| size_t ares_metrics_server_timeout(const ares_server_t *server, |
| const ares_timeval_t *now) |
| { |
| const ares_channel_t *channel = server->channel; |
| ares_server_bucket_t i; |
| size_t timeout_ms = 0; |
| size_t max_timeout_ms; |
| |
| for (i = 0; i < ARES_METRIC_COUNT; i++) { |
| time_t ts = ares_metric_timestamp(i, now, ARES_FALSE); |
| |
| /* This ts has been invalidated, see if we should use the previous |
| * time period */ |
| if (ts != server->metrics[i].ts || |
| server->metrics[i].total_count < MIN_COUNT_FOR_AVERAGE) { |
| time_t prev_ts = ares_metric_timestamp(i, now, ARES_TRUE); |
| if (prev_ts != server->metrics[i].prev_ts || |
| server->metrics[i].prev_total_count < MIN_COUNT_FOR_AVERAGE) { |
| /* Move onto next bucket */ |
| continue; |
| } |
| /* Calculate average time for previous bucket */ |
| timeout_ms = (size_t)(server->metrics[i].prev_total_ms / |
| server->metrics[i].prev_total_count); |
| } else { |
| /* Calculate average time for current bucket*/ |
| timeout_ms = |
| (size_t)(server->metrics[i].total_ms / server->metrics[i].total_count); |
| } |
| |
| /* Multiply average by constant to get timeout value */ |
| timeout_ms *= AVG_TIMEOUT_MULTIPLIER; |
| break; |
| } |
| |
| /* If we're here, that means its the first query for the server, so we just |
| * use the initial default timeout */ |
| if (timeout_ms == 0) { |
| timeout_ms = channel->timeout; |
| } |
| |
| /* don't go below lower bounds */ |
| if (timeout_ms < MIN_TIMEOUT_MS) { |
| timeout_ms = MIN_TIMEOUT_MS; |
| } |
| |
| /* don't go above upper bounds */ |
| max_timeout_ms = channel->maxtimeout ? channel->maxtimeout : MAX_TIMEOUT_MS; |
| if (timeout_ms > max_timeout_ms) { |
| timeout_ms = max_timeout_ms; |
| } |
| |
| return timeout_ms; |
| } |