| package memberlist |
| |
| import ( |
| "io" |
| "log" |
| "os" |
| "time" |
| ) |
| |
| type Config struct { |
| // The name of this node. This must be unique in the cluster. |
| Name string |
| |
| // Transport is a hook for providing custom code to communicate with |
| // other nodes. If this is left nil, then memberlist will by default |
| // make a NetTransport using BindAddr and BindPort from this structure. |
| Transport Transport |
| |
| // Configuration related to what address to bind to and ports to |
| // listen on. The port is used for both UDP and TCP gossip. It is |
| // assumed other nodes are running on this port, but they do not need |
| // to. |
| BindAddr string |
| BindPort int |
| |
| // Configuration related to what address to advertise to other |
| // cluster members. Used for nat traversal. |
| AdvertiseAddr string |
| AdvertisePort int |
| |
| // ProtocolVersion is the configured protocol version that we |
| // will _speak_. This must be between ProtocolVersionMin and |
| // ProtocolVersionMax. |
| ProtocolVersion uint8 |
| |
| // TCPTimeout is the timeout for establishing a stream connection with |
| // a remote node for a full state sync, and for stream read and write |
| // operations. This is a legacy name for backwards compatibility, but |
| // should really be called StreamTimeout now that we have generalized |
| // the transport. |
| TCPTimeout time.Duration |
| |
| // IndirectChecks is the number of nodes that will be asked to perform |
| // an indirect probe of a node in the case a direct probe fails. Memberlist |
| // waits for an ack from any single indirect node, so increasing this |
| // number will increase the likelihood that an indirect probe will succeed |
| // at the expense of bandwidth. |
| IndirectChecks int |
| |
| // RetransmitMult is the multiplier for the number of retransmissions |
| // that are attempted for messages broadcasted over gossip. The actual |
| // count of retransmissions is calculated using the formula: |
| // |
| // Retransmits = RetransmitMult * log(N+1) |
| // |
| // This allows the retransmits to scale properly with cluster size. The |
| // higher the multiplier, the more likely a failed broadcast is to converge |
| // at the expense of increased bandwidth. |
| RetransmitMult int |
| |
| // SuspicionMult is the multiplier for determining the time an |
| // inaccessible node is considered suspect before declaring it dead. |
| // The actual timeout is calculated using the formula: |
| // |
| // SuspicionTimeout = SuspicionMult * log(N+1) * ProbeInterval |
| // |
| // This allows the timeout to scale properly with expected propagation |
| // delay with a larger cluster size. The higher the multiplier, the longer |
| // an inaccessible node is considered part of the cluster before declaring |
| // it dead, giving that suspect node more time to refute if it is indeed |
| // still alive. |
| SuspicionMult int |
| |
| // SuspicionMaxTimeoutMult is the multiplier applied to the |
| // SuspicionTimeout used as an upper bound on detection time. This max |
| // timeout is calculated using the formula: |
| // |
| // SuspicionMaxTimeout = SuspicionMaxTimeoutMult * SuspicionTimeout |
| // |
| // If everything is working properly, confirmations from other nodes will |
| // accelerate suspicion timers in a manner which will cause the timeout |
| // to reach the base SuspicionTimeout before that elapses, so this value |
| // will typically only come into play if a node is experiencing issues |
| // communicating with other nodes. It should be set to a something fairly |
| // large so that a node having problems will have a lot of chances to |
| // recover before falsely declaring other nodes as failed, but short |
| // enough for a legitimately isolated node to still make progress marking |
| // nodes failed in a reasonable amount of time. |
| SuspicionMaxTimeoutMult int |
| |
| // PushPullInterval is the interval between complete state syncs. |
| // Complete state syncs are done with a single node over TCP and are |
| // quite expensive relative to standard gossiped messages. Setting this |
| // to zero will disable state push/pull syncs completely. |
| // |
| // Setting this interval lower (more frequent) will increase convergence |
| // speeds across larger clusters at the expense of increased bandwidth |
| // usage. |
| PushPullInterval time.Duration |
| |
| // ProbeInterval and ProbeTimeout are used to configure probing |
| // behavior for memberlist. |
| // |
| // ProbeInterval is the interval between random node probes. Setting |
| // this lower (more frequent) will cause the memberlist cluster to detect |
| // failed nodes more quickly at the expense of increased bandwidth usage. |
| // |
| // ProbeTimeout is the timeout to wait for an ack from a probed node |
| // before assuming it is unhealthy. This should be set to 99-percentile |
| // of RTT (round-trip time) on your network. |
| ProbeInterval time.Duration |
| ProbeTimeout time.Duration |
| |
| // DisableTcpPings will turn off the fallback TCP pings that are attempted |
| // if the direct UDP ping fails. These get pipelined along with the |
| // indirect UDP pings. |
| DisableTcpPings bool |
| |
| // AwarenessMaxMultiplier will increase the probe interval if the node |
| // becomes aware that it might be degraded and not meeting the soft real |
| // time requirements to reliably probe other nodes. |
| AwarenessMaxMultiplier int |
| |
| // GossipInterval and GossipNodes are used to configure the gossip |
| // behavior of memberlist. |
| // |
| // GossipInterval is the interval between sending messages that need |
| // to be gossiped that haven't been able to piggyback on probing messages. |
| // If this is set to zero, non-piggyback gossip is disabled. By lowering |
| // this value (more frequent) gossip messages are propagated across |
| // the cluster more quickly at the expense of increased bandwidth. |
| // |
| // GossipNodes is the number of random nodes to send gossip messages to |
| // per GossipInterval. Increasing this number causes the gossip messages |
| // to propagate across the cluster more quickly at the expense of |
| // increased bandwidth. |
| // |
| // GossipToTheDeadTime is the interval after which a node has died that |
| // we will still try to gossip to it. This gives it a chance to refute. |
| GossipInterval time.Duration |
| GossipNodes int |
| GossipToTheDeadTime time.Duration |
| |
| // GossipVerifyIncoming controls whether to enforce encryption for incoming |
| // gossip. It is used for upshifting from unencrypted to encrypted gossip on |
| // a running cluster. |
| GossipVerifyIncoming bool |
| |
| // GossipVerifyOutgoing controls whether to enforce encryption for outgoing |
| // gossip. It is used for upshifting from unencrypted to encrypted gossip on |
| // a running cluster. |
| GossipVerifyOutgoing bool |
| |
| // EnableCompression is used to control message compression. This can |
| // be used to reduce bandwidth usage at the cost of slightly more CPU |
| // utilization. This is only available starting at protocol version 1. |
| EnableCompression bool |
| |
| // SecretKey is used to initialize the primary encryption key in a keyring. |
| // The primary encryption key is the only key used to encrypt messages and |
| // the first key used while attempting to decrypt messages. Providing a |
| // value for this primary key will enable message-level encryption and |
| // verification, and automatically install the key onto the keyring. |
| // The value should be either 16, 24, or 32 bytes to select AES-128, |
| // AES-192, or AES-256. |
| SecretKey []byte |
| |
| // The keyring holds all of the encryption keys used internally. It is |
| // automatically initialized using the SecretKey and SecretKeys values. |
| Keyring *Keyring |
| |
| // Delegate and Events are delegates for receiving and providing |
| // data to memberlist via callback mechanisms. For Delegate, see |
| // the Delegate interface. For Events, see the EventDelegate interface. |
| // |
| // The DelegateProtocolMin/Max are used to guarantee protocol-compatibility |
| // for any custom messages that the delegate might do (broadcasts, |
| // local/remote state, etc.). If you don't set these, then the protocol |
| // versions will just be zero, and version compliance won't be done. |
| Delegate Delegate |
| DelegateProtocolVersion uint8 |
| DelegateProtocolMin uint8 |
| DelegateProtocolMax uint8 |
| Events EventDelegate |
| Conflict ConflictDelegate |
| Merge MergeDelegate |
| Ping PingDelegate |
| Alive AliveDelegate |
| |
| // DNSConfigPath points to the system's DNS config file, usually located |
| // at /etc/resolv.conf. It can be overridden via config for easier testing. |
| DNSConfigPath string |
| |
| // LogOutput is the writer where logs should be sent. If this is not |
| // set, logging will go to stderr by default. You cannot specify both LogOutput |
| // and Logger at the same time. |
| LogOutput io.Writer |
| |
| // Logger is a custom logger which you provide. If Logger is set, it will use |
| // this for the internal logger. If Logger is not set, it will fall back to the |
| // behavior for using LogOutput. You cannot specify both LogOutput and Logger |
| // at the same time. |
| Logger *log.Logger |
| |
| // Size of Memberlist's internal channel which handles UDP messages. The |
| // size of this determines the size of the queue which Memberlist will keep |
| // while UDP messages are handled. |
| HandoffQueueDepth int |
| |
| // Maximum number of bytes that memberlist will put in a packet (this |
| // will be for UDP packets by default with a NetTransport). A safe value |
| // for this is typically 1400 bytes (which is the default). However, |
| // depending on your network's MTU (Maximum Transmission Unit) you may |
| // be able to increase this to get more content into each gossip packet. |
| // This is a legacy name for backward compatibility but should really be |
| // called PacketBufferSize now that we have generalized the transport. |
| UDPBufferSize int |
| } |
| |
| // DefaultLANConfig returns a sane set of configurations for Memberlist. |
| // It uses the hostname as the node name, and otherwise sets very conservative |
| // values that are sane for most LAN environments. The default configuration |
| // errs on the side of caution, choosing values that are optimized |
| // for higher convergence at the cost of higher bandwidth usage. Regardless, |
| // these values are a good starting point when getting started with memberlist. |
| func DefaultLANConfig() *Config { |
| hostname, _ := os.Hostname() |
| return &Config{ |
| Name: hostname, |
| BindAddr: "0.0.0.0", |
| BindPort: 7946, |
| AdvertiseAddr: "", |
| AdvertisePort: 7946, |
| ProtocolVersion: ProtocolVersion2Compatible, |
| TCPTimeout: 10 * time.Second, // Timeout after 10 seconds |
| IndirectChecks: 3, // Use 3 nodes for the indirect ping |
| RetransmitMult: 4, // Retransmit a message 4 * log(N+1) nodes |
| SuspicionMult: 4, // Suspect a node for 4 * log(N+1) * Interval |
| SuspicionMaxTimeoutMult: 6, // For 10k nodes this will give a max timeout of 120 seconds |
| PushPullInterval: 30 * time.Second, // Low frequency |
| ProbeTimeout: 500 * time.Millisecond, // Reasonable RTT time for LAN |
| ProbeInterval: 1 * time.Second, // Failure check every second |
| DisableTcpPings: false, // TCP pings are safe, even with mixed versions |
| AwarenessMaxMultiplier: 8, // Probe interval backs off to 8 seconds |
| |
| GossipNodes: 3, // Gossip to 3 nodes |
| GossipInterval: 200 * time.Millisecond, // Gossip more rapidly |
| GossipToTheDeadTime: 30 * time.Second, // Same as push/pull |
| GossipVerifyIncoming: true, |
| GossipVerifyOutgoing: true, |
| |
| EnableCompression: true, // Enable compression by default |
| |
| SecretKey: nil, |
| Keyring: nil, |
| |
| DNSConfigPath: "/etc/resolv.conf", |
| |
| HandoffQueueDepth: 1024, |
| UDPBufferSize: 1400, |
| } |
| } |
| |
| // DefaultWANConfig works like DefaultConfig, however it returns a configuration |
| // that is optimized for most WAN environments. The default configuration is |
| // still very conservative and errs on the side of caution. |
| func DefaultWANConfig() *Config { |
| conf := DefaultLANConfig() |
| conf.TCPTimeout = 30 * time.Second |
| conf.SuspicionMult = 6 |
| conf.PushPullInterval = 60 * time.Second |
| conf.ProbeTimeout = 3 * time.Second |
| conf.ProbeInterval = 5 * time.Second |
| conf.GossipNodes = 4 // Gossip less frequently, but to an additional node |
| conf.GossipInterval = 500 * time.Millisecond |
| conf.GossipToTheDeadTime = 60 * time.Second |
| return conf |
| } |
| |
| // DefaultLocalConfig works like DefaultConfig, however it returns a configuration |
| // that is optimized for a local loopback environments. The default configuration is |
| // still very conservative and errs on the side of caution. |
| func DefaultLocalConfig() *Config { |
| conf := DefaultLANConfig() |
| conf.TCPTimeout = time.Second |
| conf.IndirectChecks = 1 |
| conf.RetransmitMult = 2 |
| conf.SuspicionMult = 3 |
| conf.PushPullInterval = 15 * time.Second |
| conf.ProbeTimeout = 200 * time.Millisecond |
| conf.ProbeInterval = time.Second |
| conf.GossipInterval = 100 * time.Millisecond |
| conf.GossipToTheDeadTime = 15 * time.Second |
| return conf |
| } |
| |
| // Returns whether or not encryption is enabled |
| func (c *Config) EncryptionEnabled() bool { |
| return c.Keyring != nil && len(c.Keyring.GetKeys()) > 0 |
| } |