Initial public commit
Probably good enough for a first experimental release
- Id
- 1c387f161e04be96b2c547bb6fcd8da6a6b1a1a1
- Author
- Caio
- Commit time
- 2021-11-04T15:26:54+01:00
Created .github/workflows/ci.yml
+on: [push, pull_request]
+
+name: Continuous Integration
+
+jobs:
+ check:
+ name: Cargo check
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v2
+ - uses: actions-rs/toolchain@v1
+ with:
+ profile: minimal
+ toolchain: stable
+ override: true
+ - uses: actions-rs/cargo@v1
+ with:
+ command: check
+
+ test:
+ name: Testsuite with all features
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v2
+ - uses: actions-rs/toolchain@v1
+ with:
+ profile: minimal
+ toolchain: stable
+ override: true
+ - uses: actions-rs/cargo@v1
+ with:
+ command: test
+ args: --all-features
+
+ fmt:
+ name: Rustfmt
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v2
+ - uses: actions-rs/toolchain@v1
+ with:
+ profile: minimal
+ toolchain: stable
+ override: true
+ - run: rustup component add rustfmt
+ - uses: actions-rs/cargo@v1
+ with:
+ command: fmt
+ args: --all -- --check
+
+ ensure_no_std_alloc:
+ name: No-std Compatibility
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v2
+ - uses: actions-rs/toolchain@v1
+ with:
+ profile: minimal
+ toolchain: nightly
+ override: true
+ - name: build
+ working-directory: ./ensure_no_std_alloc
+ run: cargo +nightly rustc -- -C link-arg=-nostartfiles
+
+ clippy:
+ name: Clippy
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v2
+ - uses: actions-rs/toolchain@v1
+ with:
+ profile: minimal
+ toolchain: stable
+ override: true
+ - run: rustup component add clippy
+ - uses: actions-rs/cargo@v1
+ with:
+ command: clippy
+ args: --all-features -- -D warnings
+
+ features:
+ name: Features Powerset
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v2
+ - uses: actions-rs/toolchain@v1
+ with:
+ profile: minimal
+ toolchain: stable
+ override: true
+ - name: Install cargo-hack
+ run: cargo install cargo-hack
+ - name: Run checks
+ run: cargo hack check --feature-powerset --no-dev-deps
Created .gitignore
+target
+Cargo.lock
Created Cargo.toml
+[package]
+name = "foca"
+version = "0.1.0"
+authors = ["Caio <contact@caio.co>"]
+edition = "2021"
+license = "MPL-2.0"
+description = "Gossip-based cluster membership discovery (SWIM)"
+keywords = ["swim", "gossip", "service-discovery", "memberlist"]
+categories = ["network-programming", "no-std"]
+repository = "https://github.com/caio/foca"
+homepage = "https://github.com/caio/foca"
+
+[package.metadata.docs.rs]
+all-features = true
+
+[features]
+# Will always be empty
+default = []
+
+# Adds compatibility with some types and traits
+std = ["anyhow/std"]
+# Exposes `BincodeCodec` a lean general-purpose std-only codec
+bincode-codec = ["std", "serde", "bincode", "bytes/std"]
+# Exposes `PostcardCodec`, a no_std-friendly codec
+postcard-codec = ["serde", "postcard"]
+
+# For examples/identity_golf.rs
+identity-golf = ["std", "rand/default"]
+# For examples/foca_insecure_udp_agent.rs
+agent = [
+ "std", "postcard-codec", "clap", "rand/default", "tokio",
+ "serde/std", "tracing/default", "tracing-subscriber"
+]
+
+[dependencies]
+rand = { version = "0.8", default-features = false }
+bytes = { version = "1", default-features = false }
+anyhow = { version = "1", default-features = false }
+
+serde = { version = "1", default-features = false, features = ["derive", "alloc"], optional = true }
+bincode = { version = "1", default-features = false, optional = true }
+postcard = { version = "0.7", default-features = false, optional = true }
+tracing = { version = "0.1", default-features = false, features = ["attributes"], optional = true }
+
+# Only used by examples:
+clap = { version = "2", default-features = false, optional = true }
+tokio = { version = "1", default-features = false, features = ["rt", "macros", "sync", "time", "net"], optional = true }
+tracing-subscriber = { version = "0.2", optional = true }
+
+[dev-dependencies]
+rand = { version = "0.8", default-features = false, features = ["small_rng"] }
+
+[[example]]
+name = "identity_golf"
+required-features = ["identity-golf"]
+
+[[example]]
+name = "foca_insecure_udp_agent"
+required-features = ["agent"]
Created LICENSE.txt
+Mozilla Public License Version 2.0
+
+1. Definitions
+
+ 1.1. "Contributor" means each individual or legal entity that creates, contributes to the creation of, or owns Covered Software.
+
+ 1.2. "Contributor Version" means the combination of the Contributions of others (if any) used by a Contributor and that particular Contributor's Contribution.
+
+ 1.3. "Contribution" means Covered Software of a particular Contributor.
+
+ 1.4. "Covered Software" means Source Code Form to which the initial Contributor has attached the notice in Exhibit A, the Executable Form of such Source Code Form, and Modifications of such Source Code Form, in each case including portions thereof.
+
+ 1.5. "Incompatible With Secondary Licenses" means
+
+ (a) that the initial Contributor has attached the notice described in Exhibit B to the Covered Software; or
+
+ (b) that the Covered Software was made available under the terms of version 1.1 or earlier of the License, but not also under the terms of a Secondary License.
+
+ 1.6. "Executable Form" means any form of the work other than Source Code Form.
+
+ 1.7. "Larger Work" means a work that combines Covered Software with other material, in a separate file or files, that is not Covered Software.
+
+ 1.8. "License" means this document.
+
+ 1.9. "Licensable" means having the right to grant, to the maximum extent possible, whether at the time of the initial grant or subsequently, any and all of the rights conveyed by this License.
+
+ 1.10. "Modifications" means any of the following:
+
+ (a) any file in Source Code Form that results from an addition to, deletion from, or modification of the contents of Covered Software; or
+
+ (b) any new file in Source Code Form that contains any Covered Software.
+
+ 1.11. "Patent Claims" of a Contributor means any patent claim(s), including without limitation, method, process, and apparatus claims, in any patent Licensable by such Contributor that would be infringed, but for the grant of the License, by the making, using, selling, offering for sale, having made, import, or transfer of either its Contributions or its Contributor Version.
+
+ 1.12. "Secondary License" means either the GNU General Public License, Version 2.0, the GNU Lesser General Public License, Version 2.1, the GNU Affero General Public License, Version 3.0, or any later versions of those licenses.
+
+ 1.13. "Source Code Form" means the form of the work preferred for making modifications.
+
+ 1.14. "You" (or "Your") means an individual or a legal entity exercising rights under this License. For legal entities, "You" includes any entity that controls, is controlled by, or is under common control with You. For purposes of this definition, "control" means (a) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (b) ownership of more than fifty percent (50%) of the outstanding shares or beneficial ownership of such entity.
+
+2. License Grants and Conditions
+
+ 2.1. Grants
+ Each Contributor hereby grants You a world-wide, royalty-free, non-exclusive license:
+
+ (a) under intellectual property rights (other than patent or trademark) Licensable by such Contributor to use, reproduce, make available, modify, display, perform, distribute, and otherwise exploit its Contributions, either on an unmodified basis, with Modifications, or as part of a Larger Work; and
+
+ (b) under Patent Claims of such Contributor to make, use, sell, offer for sale, have made, import, and otherwise transfer either its Contributions or its Contributor Version.
+
+ 2.2. Effective Date
+ The licenses granted in Section 2.1 with respect to any Contribution become effective for each Contribution on the date the Contributor first distributes such Contribution.
+
+ 2.3. Limitations on Grant Scope
+ The licenses granted in this Section 2 are the only rights granted under this License. No additional rights or licenses will be implied from the distribution or licensing of Covered Software under this License. Notwithstanding Section 2.1(b) above, no patent license is granted by a Contributor:
+
+ (a) for any code that a Contributor has removed from Covered Software; or
+
+ (b) for infringements caused by: (i) Your and any other third party's modifications of Covered Software, or (ii) the combination of its Contributions with other software (except as part of its Contributor Version); or
+
+ (c) under Patent Claims infringed by Covered Software in the absence of its Contributions.
+
+ This License does not grant any rights in the trademarks, service marks, or logos of any Contributor (except as may be necessary to comply with the notice requirements in Section 3.4).
+
+ 2.4. Subsequent Licenses
+ No Contributor makes additional grants as a result of Your choice to distribute the Covered Software under a subsequent version of this License (see Section 10.2) or under the terms of a Secondary License (if permitted under the terms of Section 3.3).
+
+ 2.5. Representation
+ Each Contributor represents that the Contributor believes its Contributions are its original creation(s) or it has sufficient rights to grant the rights to its Contributions conveyed by this License.
+
+ 2.6. Fair Use
+ This License is not intended to limit any rights You have under applicable copyright doctrines of fair use, fair dealing, or other equivalents.
+
+ 2.7. Conditions
+ Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted in Section 2.1.
+
+3. Responsibilities
+
+ 3.1. Distribution of Source Form
+ All distribution of Covered Software in Source Code Form, including any Modifications that You create or to which You contribute, must be under the terms of this License. You must inform recipients that the Source Code Form of the Covered Software is governed by the terms of this License, and how they can obtain a copy of this License. You may not attempt to alter or restrict the recipients' rights in the Source Code Form.
+
+ 3.2. Distribution of Executable Form
+ If You distribute Covered Software in Executable Form then:
+
+ (a) such Covered Software must also be made available in Source Code Form, as described in Section 3.1, and You must inform recipients of the Executable Form how they can obtain a copy of such Source Code Form by reasonable means in a timely manner, at a charge no more than the cost of distribution to the recipient; and
+
+ (b) You may distribute such Executable Form under the terms of this License, or sublicense it under different terms, provided that the license for the Executable Form does not attempt to limit or alter the recipients' rights in the Source Code Form under this License.
+
+ 3.3. Distribution of a Larger Work
+ You may create and distribute a Larger Work under terms of Your choice, provided that You also comply with the requirements of this License for the Covered Software. If the Larger Work is a combination of Covered Software with a work governed by one or more Secondary Licenses, and the Covered Software is not Incompatible With Secondary Licenses, this License permits You to additionally distribute such Covered Software under the terms of such Secondary License(s), so that the recipient of the Larger Work may, at their option, further distribute the Covered Software under the terms of either this License or such Secondary License(s).
+
+ 3.4. Notices
+ You may not remove or alter the substance of any license notices (including copyright notices, patent notices, disclaimers of warranty, or limitations of liability) contained within the Source Code Form of the Covered Software, except that You may alter any license notices to the extent required to remedy known factual inaccuracies.
+
+ 3.5. Application of Additional Terms
+ You may choose to offer, and to charge a fee for, warranty, support, indemnity or liability obligations to one or more recipients of Covered Software. However, You may do so only on Your own behalf, and not on behalf of any Contributor. You must make it absolutely clear that any such warranty, support, indemnity, or liability obligation is offered by You alone, and You hereby agree to indemnify every Contributor for any liability incurred by such Contributor as a result of warranty, support, indemnity or liability terms You offer. You may include additional disclaimers of warranty and limitations of liability specific to any jurisdiction.
+
+4. Inability to Comply Due to Statute or Regulation
+If it is impossible for You to comply with any of the terms of this License with respect to some or all of the Covered Software due to statute, judicial order, or regulation then You must: (a) comply with the terms of this License to the maximum extent possible; and (b) describe the limitations and the code they affect. Such description must be placed in a text file included with all distributions of the Covered Software under this License. Except to the extent prohibited by statute or regulation, such description must be sufficiently detailed for a recipient of ordinary skill to be able to understand it.
+
+5. Termination
+
+ 5.1. The rights granted under this License will terminate automatically if You fail to comply with any of its terms. However, if You become compliant, then the rights granted under this License from a particular Contributor are reinstated (a) provisionally, unless and until such Contributor explicitly and finally terminates Your grants, and (b) on an ongoing basis, if such Contributor fails to notify You of the non-compliance by some reasonable means prior to 60 days after You have come back into compliance. Moreover, Your grants from a particular Contributor are reinstated on an ongoing basis if such Contributor notifies You of the non-compliance by some reasonable means, this is the first time You have received notice of non-compliance with this License from such Contributor, and You become compliant prior to 30 days after Your receipt of the notice.
+
+ 5.2. If You initiate litigation against any entity by asserting a patent infringement claim (excluding declaratory judgment actions, counter-claims, and cross-claims) alleging that a Contributor Version directly or indirectly infringes any patent, then the rights granted to You by any and all Contributors for the Covered Software under Section 2.1 of this License shall terminate.
+
+ 5.3. In the event of termination under Sections 5.1 or 5.2 above, all end user license agreements (excluding distributors and resellers) which have been validly granted by You or Your distributors under this License prior to termination shall survive termination.
+
+6. Disclaimer of Warranty
+Covered Software is provided under this License on an "as is" basis, without warranty of any kind, either expressed, implied, or statutory, including, without limitation, warranties that the Covered Software is free of defects, merchantable, fit for a particular purpose or non-infringing. The entire risk as to the quality and performance of the Covered Software is with You. Should any Covered Software prove defective in any respect, You (not any Contributor) assume the cost of any necessary servicing, repair, or correction. This disclaimer of warranty constitutes an essential part of this License. No use of any Covered Software is authorized under this License except under this disclaimer.
+
+7. Limitation of Liability
+Under no circumstances and under no legal theory, whether tort (including negligence), contract, or otherwise, shall any Contributor, or anyone who distributes Covered Software as permitted above, be liable to You for any direct, indirect, special, incidental, or consequential damages of any character including, without limitation, damages for lost profits, loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses, even if such party shall have been informed of the possibility of such damages. This limitation of liability shall not apply to liability for death or personal injury resulting from such party's negligence to the extent applicable law prohibits such limitation. Some jurisdictions do not allow the exclusion or limitation of incidental or consequential damages, so this exclusion and limitation may not apply to You.
+
+8. Litigation
+Any litigation relating to this License may be brought only in the courts of a jurisdiction where the defendant maintains its principal place of business and such litigation shall be governed by laws of that jurisdiction, without reference to its conflict-of-law provisions. Nothing in this Section shall prevent a party's ability to bring cross-claims or counter-claims.
+
+9. Miscellaneous
+This License represents the complete agreement concerning the subject matter hereof. If any provision of this License is held to be unenforceable, such provision shall be reformed only to the extent necessary to make it enforceable. Any law or regulation which provides that the language of a contract shall be construed against the drafter shall not be used to construe this License against a Contributor.
+
+10. Versions of the License
+
+ 10.1. New Versions
+ Mozilla Foundation is the license steward. Except as provided in Section 10.3, no one other than the license steward has the right to modify or publish new versions of this License. Each version will be given a distinguishing version number.
+
+ 10.2. Effect of New Versions
+ You may distribute the Covered Software under the terms of the version of the License under which You originally received the Covered Software, or under the terms of any subsequent version published by the license steward.
+
+ 10.3. Modified Versions
+ If you create software not governed by this License, and you want to create a new license for such software, you may create and use a modified version of this License if you rename the license and remove any references to the name of the license steward (except to note that such modified license differs from this License).
+
+ 10.4. Distributing Source Code Form that is Incompatible With Secondary Licenses
+ If You choose to distribute Source Code Form that is Incompatible With Secondary Licenses under the terms of this version of the License, the notice described in Exhibit B of this License must be attached.
+
+Exhibit A - Source Code Form License Notice
+
+ This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+If it is not possible or desirable to put the notice in a particular file, then You may include the notice in a location (such as a LICENSE file in a relevant directory) where a recipient would be likely to look for such a notice.
+
+You may add additional accurate notices of copyright ownership.
+
+Exhibit B - "Incompatible With Secondary Licenses" Notice
+
+ This Source Code Form is "Incompatible With Secondary Licenses", as defined by the Mozilla Public License, v. 2.0.
Created README.md
+# Foca: Cluster membership discovery on your terms
+
+Foca is a building block for your gossip-based cluster discovery. It's
+a small `no_std` + `alloc` crate that implements the SWIM protocol along
+with its useful extensions (`SWIM+Inf.+Susp.`).
+
+Project:
+
+* Git Repository: https://github.com/caio/foca
+* Issue tracker: https://github.com/caio/foca/issues
+* CI: https://github.com/caio/foca/actions/workflows/ci.yml
+* Packages: https://crates.io/crates/foca
+* Documentation: https://docs.rs/foca
+
+
+# Introduction
+
+The most notable thing about Foca is the fact that it does almost
+nothing. Out of the box, all it gives is a reliable and efficient
+implementation of the [SWIM protocol][1] that's transport and
+identity agnostic.
+
+Knowledge of how SWIM works is helpful but not necessary to make use
+of this library. Reading the documentation for the `Message` enum
+should give you an idea of how the protocol works, but the paper is
+a very accessible read.
+
+Foca is designed to fit into any sort of transport: If your network
+allows peers to talk to each other you can deploy Foca on it.
+Not only the general bandwidth requirements are low, but you also
+have full control of how members identify each other (see
+`./examples/identity_golf.rs`) and how messages are encoded.
+
+
+# Usage
+
+Please take a look at `./examples/foca_insecure_udp_agent.rs`. It
+showcases how a simple tokio-based agent could look like and lets
+you actually run and see Foca swimming.
+
+~~~
+$ cargo run --features agent --example foca_insecure_udp_agent -- --help
+foca_insecure_udp_agent
+
+USAGE:
+ foca_insecure_udp_agent [OPTIONS] <BIND_ADDR>
+
+FLAGS:
+ -h, --help Prints help information
+ -V, --version Prints version information
+
+OPTIONS:
+ -a, --announce <announce> Address to another Foca instance to join with
+ -f, --filename <filename> Name of the file that will contain all active members
+ -i, --identity <identity> The address cluster members will use to talk to you.
+ Defaults to BIND_ADDR
+
+ARGS:
+ <BIND_ADDR> Socket address to bind to. Example: 127.0.0.1:8080
+~~~
+
+So you can start the agent in one terminal with
+`./foca_insecure_udp_agent 127.0.0.1:8000` and join it with as many others
+as you want with using a different `BIND_ADDR` and `--announce` to a
+running instance. Example:
+`./foca_insecure_udp_agent 127.0.0.1:8001 -a 127.0.0.1:8000`.
+
+The agent outputs some information to the console via [tracing][]'s
+subscriber. It defaults to the `INFO` log level and can be customized
+via the `RUST_LOG` environment variable using [tracing_subscriber's
+EnvFilter directives][dir].
+
+
+## Cargo Features
+
+Every feature is optional. The `default` set will always be empty.
+
+* `std`: Adds `std::error::Error` support and implements `foca::Identity`
+ for `std::net::SocketAddr*`.
+* `tracing`: Instruments Foca using the [tracing][] crate.
+* `serde`: Derives `Serialize` and `Deserialize` for Foca's public
+ types.
+* `bincode-codec`: Provides `BincodeCodec`, a serde-based codec type
+ that uses [bincode][] under the hood.
+* `postcard-codec`: Provides `PostcardCodec` a serde-based, `no_std`
+ friendly codec that uses [postcard][] under the hood.
+
+Only for examples:
+
+* `identity-golf`: For `./examples/identity_golf.rs`
+* `agent`: For `./examples/foca_insecure_udp_agent.rs`
+
+
+# Notes
+
+When writing this library, the main goal was having a simple and small
+core that's easy to test, simulate and reason about; It was mostly
+about getting a better understanding of the protocol after reading
+the paper.
+
+Sticking to these goals naturally led to an implementation that doesn't
+rely on many operating system features like a hardware clock, atomics
+and threads, so becoming a `no_std` crate (albeit still requiring heap
+allocations) was kind of a nice accidental feature that I decided to
+commit to.
+
+
+## Comparison to memberlist
+
+I avoided looking at [memberlist][2] until I was satisfied with my
+own implementation. Since then I did take a non-thorough look at it:
+
+* memberlist supports custom broadcasts, which is a very cool feature
+ for complex service discovery scenarios, so now Foca has support
+ for disseminating user data too (see `BroadcastHandler`
+ documentation) :-)
+
+* It has a stream-based synchronization mechanism (push-pull) that's
+ used for joining and periodic merging state between members: It's
+ way beyond Foca's responsibilities, but it's a very interesting idea,
+ so I've exposed the `Foca::apply_many` method which enables code
+ using Foca to do a similar thing if desired.
+
+* Its configuration parameters change based on (current) cluster
+ size. It's super useful for a more plug-and-play experience, so
+ I want introduce something along those lines in the future, likely
+ by pulling `Config` into Foca as a trait implementation.
+
+
+## Future
+
+Foca is very focused on doing almost nothing, so it's likely that
+some things will end up on separate crates. But, in no particular
+order, I want to:
+
+* Provide a more plug-and-play experience, closer to what memberlist
+ gives out of the box.
+
+* Make Foca run as a library for a higher level language. I'm not
+ even sure I can take it that far, so sounds like fun!
+
+* Deliver a (re)usable simulator. Right now I've been yolo-coding
+ one just to give me more confidence on what's implemented right
+ now, but what I want is something that you can: Slap your own
+ identity, codec and configuration; Set network parameters like
+ TTL, loss rate, bandwidth; And then simulate production behavior
+ (rolling restarts, partitions, etc) while watching convergence
+ stats. This is a ridiculous amount of work.
+
+* Actually demonstrate a running Foca with `no_std` constraints; I
+ don't have access to devices to play with at the moment, so
+ it's been difficult to find motivation to pursue this.
+
+
+# References
+
+* The paper [SWIM: Scalable Weakly-consistent Infection-style Process Group Membership
+Protocol][1]
+* HashiCorp's [memberlist][2]
+
+[1]: https://www.cs.cornell.edu/projects/Quicksilver/public_pdfs/SWIM.pdf
+[2]: https://github.com/hashicorp/memberlist
+[bincode]: https://github.com/bincode-org/bincode
+[postcard]: https://github.com/jamesmunns/postcard
+[tracing]: https://tracing.rs/
+[dir]: https://tracing.rs/tracing_subscriber/struct.envfilter#directives
+
+# License
+
+Unless explicitly stated otherwise, all work is subject to the terms
+of the Mozilla Public License, version 2.0.
+
+Files inside the `ensure_no_std_alloc/` directory are under the MIT
+license, as their original.
+
+Files inside the `examples/` directory are dedicated to the Public
+Domain.
Created ensure_no_std_alloc/Cargo.toml
+[package]
+name = "ensure_no_std"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+foca = { path = ".." }
+
+[profile.dev]
+panic = "abort"
+
+[profile.release]
+panic = "abort"
Created ensure_no_std_alloc/README
+This is adapted from:
+
+ https://blog.dbrgn.ch/2019/12/24/testing-for-no-std-compatibility/
+
+A little bit more involved because `alloc` is required.
+
+Running:
+
+ $ cargo +nightly rustc -- -C link-arg=-nostartfiles
Created ensure_no_std_alloc/src/main.rs
+#![no_std]
+#![no_main]
+#![feature(alloc_error_handler)]
+
+extern crate alloc;
+
+use core::{
+ alloc::{GlobalAlloc, Layout},
+ panic::PanicInfo,
+};
+
+#[allow(unused_imports)]
+use foca;
+
+#[global_allocator]
+static ALLOCATOR: Allocator = Allocator;
+
+struct Allocator;
+unsafe impl GlobalAlloc for Allocator {
+ unsafe fn alloc(&self, _: Layout) -> *mut u8 {
+ todo!()
+ }
+
+ unsafe fn dealloc(&self, _: *mut u8, _: Layout) {
+ todo!()
+ }
+}
+
+#[panic_handler]
+fn panic(_info: &PanicInfo) -> ! {
+ loop {}
+}
+
+#[no_mangle]
+pub extern "C" fn _start() -> ! {
+ loop {}
+}
+
+#[alloc_error_handler]
+fn alloc_fail(_: Layout) -> ! {
+ todo!();
+}
Created examples/foca_insecure_udp_agent.rs
+/* Any copyright is dedicated to the Public Domain.
+ * https://creativecommons.org/publicdomain/zero/1.0/ */
+use std::{
+ collections::HashMap, fs::File, io::Write, net::SocketAddr, path::Path, str::FromStr,
+ sync::Arc, time::Duration,
+};
+
+use bytes::{BufMut, Bytes, BytesMut};
+use clap::{App, Arg};
+use rand::{rngs::StdRng, SeedableRng};
+use tokio::{net::UdpSocket, sync::mpsc};
+
+use foca::{Config, Foca, Identity, Notification, PostcardCodec, Runtime, Timer};
+
+#[derive(Debug)]
+struct CliParams {
+ bind_addr: SocketAddr,
+ identity: ID,
+ announce_to: Option<ID>,
+ filename: String,
+}
+
+impl CliParams {
+ fn new() -> Self {
+ let matches = App::new("foca_insecure_udp_agent")
+ .arg(
+ Arg::with_name("BIND_ADDR")
+ .help("Socket address to bind to. Example: 127.0.0.1:8080")
+ .required(true)
+ .index(1),
+ )
+ .arg(
+ Arg::with_name("identity")
+ .help("The address cluster members will use to talk to you. Defaults to BIND_ADDR")
+ .takes_value(true)
+ .short("i")
+ .long("identity"),
+ )
+ .arg(
+ Arg::with_name("announce")
+ .help("Address to another Foca instance to join with")
+ .takes_value(true)
+ .short("a")
+ .long("announce"),
+ )
+ .arg(
+ Arg::with_name("filename")
+ .help("Name of the file that will contain all active members")
+ .takes_value(true)
+ .short("f")
+ .long("filename"),
+ )
+ .get_matches();
+
+ let bind_addr = SocketAddr::from_str(matches.value_of("BIND_ADDR").unwrap())
+ .expect("Invalid BIND_ADDR");
+
+ let identity = if let Some(identity) = matches.value_of("identity") {
+ let addr = SocketAddr::from_str(identity)
+ .expect("Failed to parse identity parameter as a socket address");
+ ID::new(addr)
+ } else {
+ ID::new(bind_addr)
+ };
+
+ let announce_to = matches.value_of("announce").map(|param| {
+ let addr = SocketAddr::from_str(param)
+ .expect("Failed to parse announce parameter as a socket address");
+ ID::new(addr)
+ });
+
+ let filename = matches
+ .value_of("filename")
+ .map(String::from)
+ .unwrap_or_else(|| format!("foca_cluster_members.{}.txt", rand::random::<u64>()));
+
+ Self {
+ bind_addr,
+ identity,
+ announce_to,
+ filename,
+ }
+ }
+}
+
+#[derive(Clone, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
+struct ID {
+ addr: SocketAddr,
+ // An extra field to allow fast rejoin
+ bump: u16,
+}
+
+// We implement a custom, simpler Debug format just to make the tracing
+// output cuter
+impl std::fmt::Debug for ID {
+ fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ formatter.debug_tuple("ID").field(&self.addr).finish()
+ }
+}
+
+impl ID {
+ fn new(addr: SocketAddr) -> Self {
+ Self {
+ addr,
+ bump: rand::random(),
+ }
+ }
+}
+
+impl Identity for ID {
+ // Since a client outside the cluster will not be aware of our
+ // `bump` field, we implement the optional trait method
+ // `has_same_prefix` to allow anyone that knows our `addr`
+ // to join our cluster.
+ fn has_same_prefix(&self, other: &Self) -> bool {
+ self.addr.eq(&other.addr)
+ }
+
+ // And by implementing `renew` we enable automatic rejoining:
+ // when another member declares us as down, Foca immediatelly
+ // switches to this new identity and rejoins the cluster for us
+ fn renew(&self) -> Option<Self> {
+ Some(Self {
+ addr: self.addr,
+ bump: self.bump.wrapping_add(1),
+ })
+ }
+}
+
+struct AccumulatingRuntime<T> {
+ pub to_send: Vec<(T, Bytes)>,
+ pub to_schedule: Vec<(Duration, Timer<T>)>,
+ pub notifications: Vec<Notification<T>>,
+ buf: BytesMut,
+}
+
+impl<T: Identity> Runtime<T> for AccumulatingRuntime<T> {
+ // Notice that we'll interact to these via pop(), so we're taking
+ // them in reverse order of when it happened.
+ // That's perfectly fine, the order of items from a single interaction
+ // is irrelevant. A "nicer" implementation could use VecDeque or
+ // react directly here instead of accumulating.
+
+ fn notify(&mut self, notification: Notification<T>) {
+ self.notifications.push(notification);
+ }
+
+ fn send_to(&mut self, to: T, data: &[u8]) {
+ let mut packet = self.buf.split();
+ packet.put_slice(data);
+ self.to_send.push((to, packet.freeze()));
+ }
+
+ fn submit_after(&mut self, event: Timer<T>, after: Duration) {
+ // We could spawn+sleep here
+ self.to_schedule.push((after, event));
+ }
+}
+
+impl<T> AccumulatingRuntime<T> {
+ pub fn new() -> Self {
+ Self {
+ to_send: Vec::new(),
+ to_schedule: Vec::new(),
+ notifications: Vec::new(),
+ buf: BytesMut::new(),
+ }
+ }
+
+ pub fn backlog(&self) -> usize {
+ self.to_send.len() + self.to_schedule.len() + self.notifications.len()
+ }
+}
+
+// Our identity is a composite of a socket address and extra
+// stuff, but downstream consumers likely only care about
+// the address part.
+//
+// It's perfectly valid to temprarily have more than one member
+// pointing at the same address (with a different `bump`): one
+// could, for example: join the cluster, ^C the program and
+// immediatelly join again. Before Foca detects that the previous
+// identity is down we'll receive a notification about this new
+// identity going up.
+//
+// So what we maintain here is a HashMap of addresses to an
+// occurence count:
+//
+// * The count will most of the time be 1;
+// * But in scenarios like above it may reach 2. Meaning:
+// something made the address change identities, but
+// it's still active
+// * And when the count reaches 0 the address is actually
+// down, so we remove it
+//
+struct Members(HashMap<SocketAddr, u8>);
+
+impl Members {
+ fn new() -> Self {
+ Self(HashMap::new())
+ }
+
+ // A result of `true` means that the effective list of
+ // cluster member addresses has changed
+ fn add_member(&mut self, member: ID) -> bool {
+ // Notice how we don't care at all about the `bump` part.
+ // It's only useful for Foca.
+ let counter = self.0.entry(member.addr).or_insert(0);
+
+ *counter += 1;
+
+ counter == &1
+ }
+
+ // A result of `true` means that the effective list of
+ // cluster member addresses has changed
+ fn remove_member(&mut self, member: ID) -> bool {
+ let effectivelly_down = if let Some(counter) = self.0.get_mut(&member.addr) {
+ *counter -= 1;
+
+ counter == &0
+ } else {
+ // Shouldn't happen
+ false
+ };
+
+ if effectivelly_down {
+ self.0.remove(&member.addr);
+ }
+
+ effectivelly_down
+ }
+
+ fn addrs(&self) -> impl Iterator<Item = &SocketAddr> {
+ self.0.keys()
+ }
+}
+
+fn do_the_file_replace_dance<'a>(
+ fname: &str,
+ addrs: impl Iterator<Item = &'a SocketAddr>,
+) -> std::io::Result<()> {
+ // Shirley, there's a more hygienic way of doing all this
+
+ let tmp_fname = format!("{}.new", fname);
+
+ let mut tmp = File::create(&tmp_fname)?;
+ for addr in addrs {
+ writeln!(&mut tmp, "{}", addr)?;
+ }
+
+ let dst = Path::new(fname);
+ if dst.exists() {
+ let old_fname = format!("{}.old", fname);
+ std::fs::rename(dst, Path::new(&old_fname))?;
+ }
+
+ std::fs::rename(Path::new(&tmp_fname), Path::new(fname))?;
+
+ Ok(())
+}
+
+#[tokio::main(flavor = "current_thread")]
+async fn main() -> Result<(), anyhow::Error> {
+ let params = CliParams::new();
+
+ // It's really confusing how fmt().init() and fmt::init()
+ // behave differently, but hey...
+ // When RUST_LOG is unset, default to Level::INFO
+ if std::env::var("RUST_LOG").is_err() {
+ tracing_subscriber::fmt().init();
+ } else {
+ // Else use whatever it is set to, which defaults to Level::ERROR
+ tracing_subscriber::fmt::init();
+ }
+
+ tracing::info!(?params, "Started");
+
+ let CliParams {
+ bind_addr,
+ identity,
+ announce_to,
+ filename,
+ } = params;
+
+ let rng = StdRng::from_entropy();
+ let config = Config::simple();
+
+ let buf_len = config.max_packet_size.get();
+ let mut recv_buf = vec![0u8; buf_len];
+
+ let mut foca = Foca::new(identity, config, rng, PostcardCodec);
+ let socket = Arc::new(UdpSocket::bind(bind_addr).await?);
+
+ // We'll create a task responsible to sending data through the
+ // socket.
+ // These are what we use to communicate with it
+ let (tx_send_data, mut rx_send_data) = mpsc::channel::<(SocketAddr, Bytes)>(100);
+ // The socket writing task
+ let write_socket = Arc::clone(&socket);
+ tokio::spawn(async move {
+ while let Some((dst, data)) = rx_send_data.recv().await {
+ // A more reasonable implementation would do some more stuff
+ // here before sending, like:
+ // * zlib or something else to compress the data
+ // * encryption (shared key, AES most likely)
+ // * an envelope with tag+version+checksum to allow
+ // protocol evolution
+ let _ignored_send_result = write_socket.send_to(&data, &dst).await;
+ }
+ });
+
+ // We'll also launch a task to manage Foca. Since there are timers
+ // involved, one simple way to do it is unifying the input:
+ enum Input<T> {
+ Event(Timer<T>),
+ Data(Bytes),
+ Announce(T),
+ }
+ // And communicating via channels
+ let (tx_foca, mut rx_foca) = mpsc::channel(100);
+ // Another alternative would be putting a Lock around Foca, but
+ // yours truly likes to hide behind (the lock inside) channels
+ // instead.
+ let mut runtime = AccumulatingRuntime::new();
+ let mut members = Members::new();
+ let tx_foca_copy = tx_foca.clone();
+ tokio::spawn(async move {
+ while let Some(input) = rx_foca.recv().await {
+ debug_assert_eq!(0, runtime.backlog());
+
+ let result = match input {
+ Input::Event(timer) => foca.handle_timer(timer, &mut runtime),
+ Input::Data(data) => foca.handle_data(&data, &mut runtime),
+ Input::Announce(dst) => foca.announce(dst, &mut runtime),
+ };
+
+ // Every public foca result yields `()` on success, so there's
+ // nothing to do with Ok
+ if let Err(error) = result {
+ // And we'd decide what to do with each error, but Foca
+ // is pretty tolerant so we just log them and pretend
+ // all is fine
+ eprintln!("Ignored Error: {}", error);
+ }
+
+ // Now we react to what happened.
+ // This is how we enable async: buffer one single interaction
+ // and then drain the runtime.
+
+ // First we submit everything that needs to go to the network
+ while let Some((dst, data)) = runtime.to_send.pop() {
+ // ToSocketAddrs would be the fancy thing to use here
+ let _ignored_send_result = tx_send_data.send((dst.addr, data)).await;
+ }
+
+ // Then schedule what needs to be scheduled
+ while let Some((delay, event)) = runtime.to_schedule.pop() {
+ let own_input_handle = tx_foca_copy.clone();
+ tokio::spawn(async move {
+ tokio::time::sleep(delay).await;
+ let _ignored_send_error = own_input_handle.send(Input::Event(event)).await;
+ });
+ }
+
+ // And finally react to notifications.
+ //
+ // Here we could do smarter things to keep other actors in
+ // the system up-to-date with the cluster state.
+ // We could, for example:
+ //
+ // * Have a broadcast channel where we submit the MemberUp
+ // and MemberDown notifications to everyone and each one
+ // keeps a lock-free version of the list
+ //
+ // * Update a shared/locked Vec that every consumer has
+ // read access
+ //
+ // But since this is an agent, we simply write to a file
+ // so other proccesses periodically open()/read()/close()
+ // to figure out the cluster members.
+ let mut active_list_has_changed = false;
+ while let Some(notification) = runtime.notifications.pop() {
+ match notification {
+ Notification::MemberUp(id) => {
+ tracing::info!(?id, "Member Up");
+ active_list_has_changed |= members.add_member(id)
+ }
+ Notification::MemberDown(id) => {
+ tracing::info!(?id, "Member Down");
+ active_list_has_changed |= members.remove_member(id)
+ }
+
+ other => {
+ tracing::debug!(notification = ?other, "Unhandled")
+ }
+ }
+ }
+
+ if active_list_has_changed {
+ do_the_file_replace_dance(&filename, members.addrs())
+ .expect("Can write the file alright");
+ }
+ }
+ });
+
+ // Foca is running, we can tell it to announce to our target
+ if let Some(dst) = announce_to {
+ let _ignored_send_error = tx_foca.send(Input::Announce(dst)).await;
+ }
+
+ // And finally, we receive forever
+ let mut databuf = BytesMut::new();
+ loop {
+ let (len, _from_addr) = socket.recv_from(&mut recv_buf).await?;
+ // Accordinly, we would undo everything that's done prior to
+ // sending: decompress, decrypt, remove the envelope
+ databuf.put_slice(&recv_buf[..len]);
+ // And simply forward it to foca
+ let _ignored_send_error = tx_foca.send(Input::Data(databuf.split().freeze())).await;
+ }
+}
Created examples/identity_golf.rs
+/* Any copyright is dedicated to the Public Domain.
+ * https://creativecommons.org/publicdomain/zero/1.0/ */
+// NOTE: This is intended to be read from the top to the bottom,
+// literate style, as you would normally read text.
+#![allow(dead_code)]
+use foca::Identity;
+
+fn main() {
+ // Foca (with the `std` feature) already gives us a very simple Identity
+ // implementation for the socket addreess types. So we could use just
+ // that:
+ use std::net::{Ipv4Addr, SocketAddrV4};
+
+ let basic_identity = SocketAddrV4::new(Ipv4Addr::new(127, 0, 0, 1), 8080);
+
+ // But it's so basic that we won't even be able to rejoin a cluster
+ // quickly:
+ assert_eq!(None, basic_identity.renew());
+
+ // It's very desirable to rejoin fast: we want to recover from false
+ // positives fast and, most importantly, if we need to restart the
+ // application, we don't want to wait a long while to be able to
+ // join the cluster again. So let's add some metadata that we can
+ // control:
+
+ #[derive(Clone, Debug, PartialEq, Eq)]
+ struct FatIdentity {
+ addr: SocketAddrV4,
+ extra: u16,
+ }
+
+ impl From<SocketAddrV4> for FatIdentity {
+ fn from(addr: SocketAddrV4) -> Self {
+ Self {
+ addr,
+ // We initialize with random() instead of zero to add
+ // some unpredictability to it
+ extra: rand::random(),
+ }
+ }
+ }
+
+ impl Identity for FatIdentity {
+ // We want fast rejoins, so we simply bump the extra field
+ // maintaining the actual network address intact
+ fn renew(&self) -> Option<Self> {
+ Some(Self {
+ addr: self.addr,
+ extra: self.extra.wrapping_add(1),
+ })
+ }
+
+ // And we ensure that members can Announce to us without
+ // knowing our (randomized) extra field
+ fn has_same_prefix(&self, other: &Self) -> bool {
+ self.addr.eq(&other.addr)
+ }
+ }
+
+ // So now Foca will happily attempt to rejoin a cluster
+ // for us as soon as it figures we're Down
+ assert!(FatIdentity::from(basic_identity).renew().is_some());
+
+ // But now our identities have increased in size and we haven't even
+ // started adding interesting data to it. Let's shrink it a bit:
+ //
+ // It's very likely that you won't be binding to a random port on
+ // startup- Often you'll have to use a specific port, dictated by
+ // whoever operates the network, so why send this 16-bit number
+ // when we know exactly what it is?
+ //
+ // We may also be in a situation that the IP addresses are repeated
+ // all the time: say, we're operating in LAN context and they
+ // are always something like 192.168.X.Y
+
+ #[derive(Clone, Debug, PartialEq, Eq)]
+ struct SubnetFixedPortId {
+ addr: (u8, u8),
+ extra: u16,
+ }
+
+ impl From<Ipv4Addr> for SubnetFixedPortId {
+ fn from(src: Ipv4Addr) -> Self {
+ let octets = src.octets();
+ Self {
+ addr: (octets[2], octets[3]),
+ extra: rand::random(),
+ }
+ }
+ }
+
+ // We can trivially transform this back into a socket address:
+ impl SubnetFixedPortId {
+ const PORTNR: u16 = 8080;
+
+ pub fn as_socket_addr_v4(&self) -> SocketAddrV4 {
+ SocketAddrV4::new(
+ Ipv4Addr::new(192, 168, self.addr.0, self.addr.1),
+ Self::PORTNR,
+ )
+ }
+ }
+
+ // And implementing identity is as trivial as it always is:
+ impl Identity for SubnetFixedPortId {
+ fn renew(&self) -> Option<Self> {
+ Some(Self {
+ addr: self.addr,
+ extra: self.extra.wrapping_add(1),
+ })
+ }
+
+ // And we ensure that members can Announce to us without
+ // knowing our (randomized) extra field
+ fn has_same_prefix(&self, other: &Self) -> bool {
+ self.addr.eq(&other.addr)
+ }
+ }
+
+ // We'll stop golfing here, but it can be taken very far:
+ //
+ // * It's very common to have a "primary key" for every
+ // computer in the data center, meaning that you can
+ // have a `HashMap<u8, SocketAddress>` lying in memory
+ // somewhere, use `u8` as the main identifier as bask
+ // at your glorious tiny id.
+ //
+ // * Nowadays even tiny shops are going all-in on micro-services
+ // so maybe `u8` is not large enough... Then use `u16`, pack
+ // lookup data on 10bits, use the remaining as an extra random
+ // field for auto-rejoining!
+ //
+ // The best part of being able to minimize the byte size of
+ // your identity is that you get more freedom to enrich it with
+ // host-based metadata: shard_id, data_snapshot_version,
+ // deployment_version - Stuff relevant information there and
+ // you can avoid going all-in on real service discovery for
+ // as long as your architecture is sane.
+}
Created src/broadcast.rs
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+use alloc::vec::Vec;
+use core::{cmp::Ordering, fmt};
+
+use bytes::{Buf, BufMut};
+
+/// A type capable of decoding a (associated) broadcast from a buffer
+/// and deciding whether to keep disseminating it for other members
+/// of the cluster (when it's new information) or to discard it (when
+/// its outdated/stale).
+pub trait BroadcastHandler {
+ /// Concrete type that will be disseminated to all cluster members.
+ ///
+ /// It should be able to compare itself against an arbitrary number
+ /// of other [`Self::Broadcast`] instances and decide wether it
+ /// replaces it or not so conflicting/stale information isn't
+ /// disseminated.
+ ///
+ /// The `AsRef<[u8]>` part is what gets sent over the wire, which
+ /// [`Self::receive_item`] is supposed to decode.
+ type Broadcast: Invalidates + AsRef<[u8]>;
+
+ /// The error type that `receive_item` may emit. Will be wrapped
+ /// by [`crate::Error`].
+ type Error: fmt::Debug + fmt::Display + Send + Sync + 'static;
+
+ /// Decodes a [`Self::Broadcast`] from a buffer and either discards
+ /// it or tells Foca to persist and disseminate it.
+ ///
+ /// When you receive a broadcast you have to decide whether it's
+ /// new information that needs to be disseminated (`Ok(Some(...))`)
+ /// or not (`Ok(None)`).
+ ///
+ /// Always yielding `Some(...)` is wrong because Foca will never
+ /// know when to stop sending this information to other members.
+ ///
+ /// Example: Assume your custom broadcast is a simple Set-Key-Value
+ /// operation. When you receive it you should check if your map
+ /// contains the Key-Value pair; If it didn't, you yield
+ /// `Some`, otherwise the operation is stale, so you yield `None`.
+ ///
+ /// Implementations MUST read a single [`Self::Broadcast`] from the
+ /// buffer and advance the cursor accordingly.
+ ///
+ /// Implementations may assume the data in the buffer is contiguous.
+ fn receive_item(&mut self, data: impl Buf) -> Result<Option<Self::Broadcast>, Self::Error>;
+}
+
+/// A type that's able to look at another and decide wether it's
+/// newer/fresher (i.e. invalidates) than it.
+///
+/// As you send/receive broadcasts, Foca will hold them for a while
+/// as it disseminates the data to other cluster members. This trait
+/// helps with replacing data that hasn't been fully disseminated
+/// yet but you already know it's stale.
+///
+/// Example: Assume a versioned broadcast like `{key,version,...}`;
+/// After you receive `{K, 0, ...}` and keep it, Foca will be
+/// disseminating it. Soon after you receive `{K, 1, ...}` which
+/// is a newer version for this broadcast. This trait enables
+/// Foca to immediately stop disseminating the previous version,
+/// even if it hasn't sent it to everyone it can yet.
+pub trait Invalidates {
+ /// When `item.invalidates(&other)` it means that Foca will
+ /// keep `item` and discard `other` from its dissemination
+ /// backlog.
+ fn invalidates(&self, other: &Self) -> bool;
+}
+
+impl<'a> Invalidates for &'a [u8] {
+ fn invalidates(&self, other: &Self) -> bool {
+ self.eq(other)
+ }
+}
+
+pub(crate) struct Broadcasts<V> {
+ max_tx: usize,
+ storage: Vec<Entry<V>>,
+}
+
+impl<T> Broadcasts<T>
+where
+ T: Invalidates + AsRef<[u8]>,
+{
+ pub fn new(max_tx: usize) -> Self {
+ Self {
+ storage: Vec::new(),
+ max_tx,
+ }
+ }
+
+ pub fn len(&self) -> usize {
+ self.storage.len()
+ }
+
+ pub fn add_or_replace(&mut self, value: T) {
+ let new_node = Entry {
+ remaining_tx: self.max_tx,
+ value,
+ };
+
+ // Can I be smarter here?
+ if let Some(position) = self
+ .storage
+ .iter()
+ .position(|node| new_node.value.invalidates(&node.value))
+ {
+ self.storage.remove(position);
+ }
+
+ // Find where to insert whilst keeping the storage sorted
+ // Searching from the right may be better since there is a
+ // bound and default value for `remaining_tx`
+ let position = self
+ .storage
+ .binary_search(&new_node)
+ .unwrap_or_else(|pos| pos);
+ self.storage.insert(position, new_node);
+ }
+
+ pub fn fill(&mut self, mut buffer: impl BufMut, max_items: usize) -> usize {
+ if self.storage.is_empty() {
+ return 0;
+ }
+
+ let mut num_taken = 0;
+ let mut num_removed = 0;
+ let starting_len = self.storage.len();
+ let mut remaining = max_items;
+
+ // We fill the buffer giving priority to the largest
+ // least sent items.
+ for idx in (0..starting_len).rev() {
+ if !buffer.has_remaining_mut() || remaining == 0 {
+ break;
+ }
+
+ let node = &mut self.storage[idx];
+ let value_len = node.value.as_ref().len();
+ debug_assert!(node.remaining_tx > 0);
+
+ if buffer.remaining_mut() >= value_len {
+ num_taken += 1;
+ remaining -= 1;
+
+ buffer.put_slice(node.value.as_ref());
+
+ if node.remaining_tx == 1 {
+ // Last transmission, gotta remove the node.
+ // It's ok to swap_remove because we're walking
+ // the storage from the right to the left
+ self.storage.swap_remove(idx);
+ num_removed += 1;
+ } else {
+ node.remaining_tx -= 1;
+ }
+ }
+ }
+
+ if num_removed > 0 {
+ self.storage.truncate(starting_len - num_removed);
+ }
+
+ // XXX Any other easy "bail out" scenario?
+ let skip_resort = {
+ // If we took all the nodes without removing any
+ (num_taken == starting_len && num_removed == 0)
+ // Or ignored them all
+ || num_taken == 0
+ };
+
+ if !skip_resort {
+ self.storage.sort_unstable();
+ }
+
+ debug_assert!(!skip_resort || self.is_sorted());
+
+ num_taken
+ }
+
+ pub fn is_sorted(&self) -> bool {
+ // Future: `is_sorted` from https://github.com/rust-lang/rfcs/pull/2351
+ self.storage[..]
+ .windows(2)
+ .all(|w| w[0].remaining_tx <= w[1].remaining_tx)
+ }
+}
+
+#[derive(Debug, Clone)]
+struct Entry<T> {
+ remaining_tx: usize,
+ value: T,
+}
+
+impl<T: AsRef<[u8]>> PartialEq for Entry<T> {
+ fn eq(&self, other: &Self) -> bool {
+ self.remaining_tx == other.remaining_tx
+ && self.value.as_ref().len() == other.value.as_ref().len()
+ }
+}
+
+impl<T: AsRef<[u8]>> Eq for Entry<T> {}
+
+impl<T: AsRef<[u8]>> Ord for Entry<T> {
+ fn cmp(&self, other: &Self) -> Ordering {
+ let ordering = self.remaining_tx.cmp(&other.remaining_tx);
+
+ if ordering == Ordering::Equal {
+ self.value.as_ref().len().cmp(&other.value.as_ref().len())
+ } else {
+ ordering
+ }
+ }
+}
+
+impl<T: AsRef<[u8]>> PartialOrd for Entry<T> {
+ fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+ Some(self.cmp(other))
+ }
+}
+
+#[cfg(test)]
+impl<T> Broadcasts<T> {
+ pub fn is_empty(&self) -> bool {
+ self.storage.is_empty()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+
+ use super::*;
+
+ struct TwoByteKey(Vec<u8>);
+
+ impl TwoByteKey {
+ fn new(data: impl AsRef<[u8]>) -> Self {
+ assert!(
+ data.as_ref().len() > 2,
+ "first two bytes are used as key for invalidation"
+ );
+ Self(Vec::from(data.as_ref()))
+ }
+ }
+
+ impl Invalidates for TwoByteKey {
+ fn invalidates(&self, other: &Self) -> bool {
+ self.0[..2] == other.0[..2]
+ }
+ }
+
+ impl AsRef<[u8]> for TwoByteKey {
+ fn as_ref(&self) -> &[u8] {
+ self.0.as_ref()
+ }
+ }
+
+ #[test]
+ fn piggyback_behaviour() {
+ let max_tx = 5;
+ let mut piggyback = Broadcasts::new(max_tx);
+
+ assert!(piggyback.is_empty(), "Piggyback starts empty");
+
+ piggyback.add_or_replace(TwoByteKey::new(b"AAabc"));
+
+ assert_eq!(1, piggyback.len());
+
+ piggyback.add_or_replace(TwoByteKey::new(b"AAcba"));
+
+ assert_eq!(
+ 1,
+ piggyback.len(),
+ "add_or_replace with same key should replace"
+ );
+
+ let mut buf = Vec::new();
+
+ for _i in 0..max_tx {
+ buf.clear();
+ let num_items = piggyback.fill(&mut buf, usize::MAX);
+ assert_eq!(1, num_items);
+ assert_eq!(
+ b"AAcba",
+ &buf[..],
+ "Should transmit an item at most max_tx times"
+ );
+ }
+
+ assert!(
+ piggyback.is_empty(),
+ "Should remove item after being used max_tx times"
+ );
+ }
+
+ #[test]
+ fn fill_does_nothing_if_buffer_full() {
+ let mut piggyback = Broadcasts::new(1);
+ piggyback.add_or_replace(TwoByteKey::new(b"a super long value"));
+
+ let buf = bytes::BytesMut::new();
+ let mut limited = buf.limit(5);
+
+ let num_items = piggyback.fill(&mut limited, usize::MAX);
+
+ assert_eq!(0, num_items);
+ assert_eq!(5, limited.remaining_mut());
+ assert_eq!(1, piggyback.len());
+ }
+
+ #[test]
+ fn piggyback_consumes_largest_first() {
+ let mut piggyback = Broadcasts::new(10);
+
+ piggyback.add_or_replace(TwoByteKey::new("00hi".as_bytes()));
+ piggyback.add_or_replace(TwoByteKey::new("01hello".as_bytes()));
+ piggyback.add_or_replace(TwoByteKey::new("02hey".as_bytes()));
+
+ let mut buf = Vec::new();
+ let num_items = piggyback.fill(&mut buf, usize::MAX);
+
+ assert_eq!(3, num_items);
+ assert_eq!("01hello02hey00hi".as_bytes(), &buf[..]);
+ }
+
+ #[test]
+ fn piggyback_respects_limit() {
+ let mut piggyback = Broadcasts::new(10);
+
+ piggyback.add_or_replace(TwoByteKey::new(b"foo"));
+ piggyback.add_or_replace(TwoByteKey::new(b"bar"));
+ piggyback.add_or_replace(TwoByteKey::new(b"baz"));
+
+ let mut buf = Vec::new();
+ let num_items = piggyback.fill(&mut buf, 0);
+
+ assert_eq!(0, num_items);
+ assert!(buf.is_empty());
+
+ let num_items = piggyback.fill(&mut buf, 2);
+ assert_eq!(2, num_items);
+ }
+}
Created src/codec.rs
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+use core::fmt;
+
+use bytes::{Buf, BufMut};
+
+use crate::{Header, Member};
+
+#[cfg(feature = "bincode-codec")]
+pub mod bincode_impl;
+
+#[cfg(feature = "postcard-codec")]
+pub mod postcard_impl;
+
+/// A Codec is responsible to encoding and decoding the data that
+/// is sent between cluster members.
+///
+/// So you can paint your bike shed however you like.
+pub trait Codec<T> {
+ /// The codec error type. Will be wrapped by [`crate::Error`].
+ type Error: fmt::Debug + fmt::Display + Send + Sync + 'static;
+
+ /// Encodes a `foca::Header` into the given buffer.
+ fn encode_header(&mut self, header: &Header<T>, buf: impl BufMut) -> Result<(), Self::Error>;
+
+ /// Decode a [`Header`] from the given buffer.
+ ///
+ /// Implementations MUST read a single item from the buffer and
+ /// advance the cursor accordingly.
+ ///
+ /// Implementations may assume the data in the buffer is contiguous.
+ fn decode_header(&mut self, buf: impl Buf) -> Result<Header<T>, Self::Error>;
+
+ /// Encodes a [`Member`] into the given buffer.
+ ///
+ /// Implementations MUST NOT leave the buffer dirty when there's
+ /// not enough space to encode the item.
+ fn encode_member(&mut self, member: &Member<T>, buf: impl BufMut) -> Result<(), Self::Error>;
+
+ /// Decode a [`Member`] from the given buffer.
+ ///
+ /// Implementations MUST read a single item from the buffer and
+ /// advance the cursor accordingly.
+ ///
+ /// Implementations may assume the data in the buffer is contiguous.
+ fn decode_member(&mut self, buf: impl Buf) -> Result<Member<T>, Self::Error>;
+}
+
+impl<'a, C, T> Codec<T> for &'a mut C
+where
+ C: Codec<T>,
+{
+ type Error = C::Error;
+
+ fn encode_header(&mut self, header: &Header<T>, buf: impl BufMut) -> Result<(), Self::Error> {
+ C::encode_header(self, header, buf)
+ }
+
+ fn decode_header(&mut self, buf: impl Buf) -> Result<Header<T>, Self::Error> {
+ C::decode_header(self, buf)
+ }
+
+ fn encode_member(&mut self, member: &Member<T>, buf: impl BufMut) -> Result<(), Self::Error> {
+ C::encode_member(self, member, buf)
+ }
+
+ fn decode_member(&mut self, buf: impl Buf) -> Result<Member<T>, Self::Error> {
+ C::decode_member(self, buf)
+ }
+}
Created src/codec/bincode_impl.rs
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+use crate::{Codec, Header, Member};
+
+/// BincodeCodec encodes/decodes messages using [`bincode`].
+///
+/// This struct simply wraps a [`bincode::Options`] type:
+///
+/// ~~~rust
+/// let codec =
+/// foca::BincodeCodec(bincode::DefaultOptions::new());
+/// ~~~
+#[derive(Debug, Clone, Copy)]
+pub struct BincodeCodec<O: bincode::Options>(pub O);
+
+impl<T, O> Codec<T> for BincodeCodec<O>
+where
+ T: serde::Serialize + for<'de> serde::Deserialize<'de>,
+ O: bincode::Options + Copy,
+{
+ type Error = bincode::Error;
+
+ fn encode_header(
+ &mut self,
+ payload: &Header<T>,
+ buf: impl bytes::BufMut,
+ ) -> Result<(), Self::Error> {
+ self.0.serialize_into(buf.writer(), payload)
+ }
+
+ fn decode_header(&mut self, buf: impl bytes::Buf) -> Result<Header<T>, Self::Error> {
+ self.0.deserialize_from(buf.reader())
+ }
+
+ fn encode_member(
+ &mut self,
+ member: &Member<T>,
+ buf: impl bytes::BufMut,
+ ) -> Result<(), Self::Error> {
+ self.0.serialize_into(buf.writer(), member)
+ }
+
+ fn decode_member(&mut self, buf: impl bytes::Buf) -> Result<Member<T>, Self::Error> {
+ self.0.deserialize_from(buf.reader())
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::BincodeCodec;
+
+ #[test]
+ fn bincode_roundtrip() -> Result<(), bincode::Error> {
+ crate::testing::verify_codec_roundtrip(BincodeCodec(bincode::DefaultOptions::new()))
+ }
+}
Created src/codec/postcard_impl.rs
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+use bytes::{Buf, BufMut};
+use postcard::flavors::SerFlavor;
+
+use crate::{Codec, Header, Member};
+
+/// PostcardCodec encodes/decodes packets using [`postcard`].
+#[derive(Debug, Clone, Copy)]
+pub struct PostcardCodec;
+
+// XXX We can use Buf::chunk here because Foca guarantees the buffer is
+// contiguous... Maybe a marker trait like `trait ContiguousBuf: Buf {}`
+// or some other form of making it explicit in the type would be
+// helpful?
+impl<T> Codec<T> for PostcardCodec
+where
+ T: serde::Serialize + for<'de> serde::Deserialize<'de>,
+{
+ type Error = postcard::Error;
+
+ fn encode_header(&mut self, payload: &Header<T>, buf: impl BufMut) -> Result<(), Self::Error> {
+ postcard::serialize_with_flavor(payload, WrappedBuf(buf))
+ }
+
+ fn decode_header(&mut self, mut buf: impl Buf) -> Result<Header<T>, Self::Error> {
+ let remaining = buf.remaining();
+ debug_assert_eq!(remaining, buf.chunk().len());
+ let (payload, rest) = postcard::take_from_bytes(buf.chunk())?;
+ let after = rest.len();
+ buf.advance(remaining - after);
+ Ok(payload)
+ }
+
+ fn encode_member(&mut self, member: &Member<T>, buf: impl BufMut) -> Result<(), Self::Error> {
+ postcard::serialize_with_flavor(member, WrappedBuf(buf))
+ }
+
+ fn decode_member(&mut self, mut buf: impl Buf) -> Result<Member<T>, Self::Error> {
+ let remaining = buf.remaining();
+ debug_assert_eq!(remaining, buf.chunk().len());
+ let (member, rest) = postcard::take_from_bytes(buf.chunk())?;
+ let after = rest.remaining();
+ buf.advance(remaining - after);
+ Ok(member)
+ }
+}
+
+struct WrappedBuf<B>(B);
+
+impl<B: BufMut> SerFlavor for WrappedBuf<B> {
+ type Output = ();
+
+ fn try_push(&mut self, data: u8) -> Result<(), ()> {
+ if self.0.has_remaining_mut() {
+ self.0.put_u8(data);
+ Ok(())
+ } else {
+ Err(())
+ }
+ }
+
+ fn release(self) -> Result<Self::Output, ()> {
+ Ok(())
+ }
+
+ fn try_extend(&mut self, data: &[u8]) -> Result<(), ()> {
+ if self.0.remaining_mut() >= data.len() {
+ self.0.put_slice(data);
+ Ok(())
+ } else {
+ Err(())
+ }
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::PostcardCodec;
+
+ #[test]
+ fn postcard_roundtrip() -> Result<(), postcard::Error> {
+ crate::testing::verify_codec_roundtrip(PostcardCodec)
+ }
+}
Created src/config.rs
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+use core::{
+ num::{NonZeroU8, NonZeroUsize},
+ time::Duration,
+};
+
+/// A Config specifies the parameters Foca will use for the SWIM
+/// protocol.
+#[derive(Clone, Debug)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
+pub struct Config {
+ /// Specifies how often a random member will be probed for activity.
+ ///
+ /// At the end of this period, if the member didn't reply (directly
+ /// or indirectly, see [`crate::Message`]) it's declared
+ /// [`crate::State::Suspect`].
+ ///
+ /// Should be strictly larger than [`Self::probe_rtt`]. Preferably more
+ /// than twice its value since we need to wait for the indirect ping cycle.
+ /// If unsure, err on the safe side with `probe_rtt * 3` and tune
+ /// later.
+ ///
+ /// Must not be zero.
+ pub probe_period: Duration,
+
+ /// How long to wait for a direct reply to a probe before starting
+ /// the indirect probing cycle.
+ ///
+ /// It should be set to a value that describes well your transport
+ /// round-trip time. A reasonable value would be a high quantile
+ /// (p99, for example) of your cluster-wide `ICMP PING` RTT.
+ ///
+ /// Must be strictly smaller than [`Self::probe_period`].
+ ///
+ /// Must not be zero.
+ pub probe_rtt: Duration,
+
+ /// How many members will be asked to perform an indirect ping
+ /// in case the probed member takes too long to reply.
+ ///
+ /// This doesn't need to be a large number: we're essentially
+ /// fanning out to ensure a message actually reaches the original
+ /// ping target in case of poor transmission quality or weird
+ /// partitions.
+ ///
+ /// Setting this to 3-5 should be more than enough for a "modern"
+ /// network.
+ pub num_indirect_probes: NonZeroUsize,
+
+ /// Specifies how many times a single update/broadcast will be sent
+ /// along with a normal message.
+ ///
+ /// A high value trades off bandwidth for higher chances of fully
+ /// disseminating broadcasts throughout the cluster.
+ ///
+ /// Reasonable values range from 5, for small clusters to 15 for
+ /// *very* large clusters.
+ pub max_transmissions: NonZeroU8,
+
+ /// How long a Suspect member is considered active before being
+ /// declared Down.
+ ///
+ /// Here you want to give time for the member to realize it has
+ /// been declared Suspect and notify the cluster that its actually
+ /// active.
+ ///
+ /// Higher values give more time for a member to recover from a
+ /// false suspicion, but slows down detection of a failed state.
+ ///
+ /// Very application-dependent. Smaller clusters likely want
+ /// this value to be a small multiplier of [`Self::probe_period`]
+ /// whereas large clusters can easily tolerate several seconds of
+ /// wait.
+ ///
+ /// Must not be zero.
+ pub suspect_to_down_after: Duration,
+
+ /// Governs how long Foca will remember an identity as being
+ /// Down.
+ ///
+ /// A high value is recommended to avoid confusing cluster
+ /// members with partial joins. If in doubt use a high multiplier
+ /// over the probe period, like `10 * probe_period`.
+ pub remove_down_after: Duration,
+
+ /// The maximum packet size Foca will produce AND consume.
+ ///
+ /// This is transport-dependent. The main goal is reducing
+ /// fragmentation and congestion.
+ ///
+ /// If using UDP as a transport, use `rfc8085` guidelines and stick
+ /// to a value smaller than your network's MTU. 1400 is a good
+ /// value for a in a non-ancient network.
+ pub max_packet_size: NonZeroUsize,
+}
+
+impl Config {
+ /// A simple configuration that should work well in a LAN scenario.
+ pub fn simple() -> Self {
+ Self {
+ probe_period: Duration::from_millis(1500),
+ probe_rtt: Duration::from_millis(500),
+ num_indirect_probes: NonZeroUsize::new(3).unwrap(),
+
+ max_transmissions: NonZeroU8::new(10).unwrap(),
+
+ suspect_to_down_after: Duration::from_secs(3),
+ remove_down_after: Duration::from_secs(15),
+
+ max_packet_size: NonZeroUsize::new(1400).unwrap(),
+ }
+ }
+}
Created src/error.rs
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+use core::fmt;
+
+#[derive(Debug)]
+/// This type represents all possible errors operating a Foca instance.
+pub enum Error {
+ /// Emitted whenever Foca receives a byte slice larger than
+ /// the configured limit ([`crate::Config::max_packet_size`]).
+ ///
+ /// Doesn't affect Foca's state.
+ DataTooBig,
+
+ /// Attempt to [`crate::Foca::reuse_down_identity`] when not needed.
+ ///
+ /// Doesn't affect Foca's state.
+ NotUndead,
+
+ /// Attempt to [`crate::Foca::change_identity`] with the same identity.
+ ///
+ /// Doesn't affect Foca's state.
+ SameIdentity,
+
+ /// Expected to be connected when reaching this point.
+ /// Sentinel error to detect integration bugs.
+ ///
+ /// Must not happen under normal circumstances.
+ NotConnected,
+
+ /// Reached the end of the probe cycle but expected
+ /// steps didn't happen. Bug in the runtime/scheduling
+ /// mechanism most likely.
+ ///
+ /// Must not happen under normal circumstances.
+ IncompleteProbeCycle,
+
+ /// Received data where the sender has the same
+ /// id as ourselves.
+ ///
+ /// There's likely a member submitting wrong/manually-crafted
+ /// packets.
+ DataFromOurselves,
+
+ /// Data contains a message supposed to reach us via indirect
+ /// means.
+ ///
+ /// There's likely a member submitting wrong/manually-crafted
+ /// packets.
+ IndirectForOurselves,
+
+ /// Data is in an unexpected format.
+ ///
+ /// Doesn't affect Foca's state.
+ MalformedPacket,
+
+ /// Wraps [`crate::Codec`]'s `encode_*` failures.
+ /// Shouldn't happen under normal circumstances unless using a broken
+ /// codec.
+ ///
+ /// Might have left Foca in a inconsistent state.
+ Encode(anyhow::Error),
+
+ /// Wraps [`crate::Codec`]'s `decode_*` failures.
+ ///
+ /// Can happen during normal operation when receiving junk data.
+ Decode(anyhow::Error),
+
+ /// Wraps [`crate::BroadcastHandler`] failures.
+ ///
+ /// Doesn't affect Foca's state.
+ CustomBroadcast(anyhow::Error),
+}
+
+impl PartialEq for Error {
+ fn eq(&self, other: &Self) -> bool {
+ use alloc::string::ToString;
+ match (self, other) {
+ // Wrapped errors have to allocate to compare :(
+ // But PartialEq on an error type is mostly useful for tests
+ (Error::Encode(a), Error::Encode(b)) => a.to_string().eq(&b.to_string()),
+ (Error::Decode(a), Error::Decode(b)) => a.to_string().eq(&b.to_string()),
+ (Error::CustomBroadcast(a), Error::CustomBroadcast(b)) => {
+ a.to_string().eq(&b.to_string())
+ }
+
+ (Error::DataTooBig, Error::DataTooBig) => true,
+ (Error::NotConnected, Error::NotConnected) => true,
+ (Error::NotUndead, Error::NotUndead) => true,
+ (Error::SameIdentity, Error::SameIdentity) => true,
+ (Error::IncompleteProbeCycle, Error::IncompleteProbeCycle) => true,
+ (Error::DataFromOurselves, Error::DataFromOurselves) => true,
+ (Error::IndirectForOurselves, Error::IndirectForOurselves) => true,
+ (Error::MalformedPacket, Error::MalformedPacket) => true,
+
+ // Instead of a catch-all here, we explicitly enumerate our variants
+ // so that when/if new errors are added we don't silently introduce
+ // a bug
+ (Error::Encode(_), _) => false,
+ (Error::Decode(_), _) => false,
+ (Error::CustomBroadcast(_), _) => false,
+ (Error::DataTooBig, _) => false,
+ (Error::NotConnected, _) => false,
+ (Error::NotUndead, _) => false,
+ (Error::SameIdentity, _) => false,
+ (Error::IncompleteProbeCycle, _) => false,
+ (Error::DataFromOurselves, _) => false,
+ (Error::IndirectForOurselves, _) => false,
+ (Error::MalformedPacket, _) => false,
+ }
+ }
+}
+
+impl fmt::Display for Error {
+ fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
+ match self {
+ Error::DataTooBig => {
+ formatter.write_str("Received data larger than maximum configured limit")
+ }
+ Error::NotUndead => formatter.write_str("Useless attempt to reuse a functioning Foca"),
+ Error::SameIdentity => {
+ formatter.write_str("New identity is the same as the current one")
+ }
+ Error::NotConnected => formatter.write_str("BUG! Expected to be connected, but wasn't"),
+ Error::IncompleteProbeCycle => {
+ formatter.write_str("BUG! Probe cycle finished without running its full course")
+ }
+ Error::DataFromOurselves => formatter.write_str(concat!(
+ "Received data from something claiming to have ",
+ "an identity equal to our own"
+ )),
+ Error::IndirectForOurselves => formatter.write_str(concat!(
+ "Received message that was supposed to reach us only ",
+ "via indirect means"
+ )),
+ Error::MalformedPacket => formatter.write_str("Payload with more data than expected"),
+ Error::Encode(err) => err.fmt(formatter),
+ Error::Decode(err) => err.fmt(formatter),
+ Error::CustomBroadcast(err) => err.fmt(formatter),
+ }
+ }
+}
+
+#[cfg(feature = "std")]
+impl std::error::Error for Error {}
Created src/identity.rs
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+use core::fmt;
+
+/// Identity is a cluster-global identifier. It qualifies a cluster
+/// member and there must not be multiple members sharing the
+/// same identity.
+///
+/// When talking about network protocols we're often talking about
+/// an IP-address paired with a port number (`std::net::SocketAddr`,
+/// for example), but Foca doesn't actually care about what's inside
+/// an identity so long as its unique.
+///
+/// This allows implementations to make their identities as lean or
+/// large as they need. For example: if every Foca instance will bind
+/// to the same port, there's no need to make the port number part of
+/// the identity.
+///
+/// That said, most of the time it's useful to have *more* information
+/// in a identity than just a way to figure out a "network" address.
+/// And that's because of how SWIM works: when an identity is declared
+/// down or deliberately leaves the cluster, it cannot rejoin for a
+/// relatively long while, so a little extra metadata allows us to
+/// come back as fast as possible.
+///
+/// See `examples/identity_golf.rs` for ideas
+///
+pub trait Identity: Clone + Eq + fmt::Debug {
+ /// Opt-in on auto-rejoining by providing a new identity.
+ ///
+ /// When Foca detects it's been declared Down by another member
+ /// of the cluster, it will call [`Self::renew()`] on its current
+ /// identity and if it yields a new one will immediately
+ /// switch to it and notify the cluster so that downtime is
+ /// minimized.
+ fn renew(&self) -> Option<Self>;
+
+ /// Optionally accept Announce messages addressed to an identity
+ /// that isn't exactly the same as ours.
+ ///
+ /// Foca discards messages that aren't addressed to its exact
+ /// identity. This means that if your identity has an unpredictable
+ /// field (a UUID or a random number, for example), nobody will
+ /// be able to join with us directly.
+ ///
+ /// The [`Self::has_same_prefix`] method is how we teach Foca to
+ /// relax this restriction: Upon receiving an Announce message it
+ /// will call `current_id.has_same_prefix(sender_id)` and if it
+ /// yields `true` the message will be accepted and the new member
+ /// will be allowed to join the cluster.
+ fn has_same_prefix(&self, other: &Self) -> bool;
+}
+
+#[cfg(feature = "std")]
+macro_rules! impl_basic_identity {
+ ($type: ty) => {
+ impl Identity for $type {
+ fn renew(&self) -> Option<Self> {
+ None
+ }
+
+ fn has_same_prefix(&self, _other: &Self) -> bool {
+ false
+ }
+ }
+ };
+}
+
+#[cfg(feature = "std")]
+impl_basic_identity!(std::net::SocketAddr);
+
+#[cfg(feature = "std")]
+impl_basic_identity!(std::net::SocketAddrV6);
+
+#[cfg(feature = "std")]
+impl_basic_identity!(std::net::SocketAddrV4);
Created src/lib.rs
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+//! Foca is a building block for your gossip-based cluster discovery. It's
+//! a small library-first crate that implements the SWIM protocol along
+//! with its useful extensions (`SWIM+Inf.+Susp.`).
+//!
+//! * It's a `no_std` + `alloc` crate by default. There's an optional
+//! `std` feature that simply brings compatibility with some types
+//! and the `std::error::Error` trait
+//!
+//! * Bring Your Own Everything: Foca doesn't care about anything that
+//! isn't part of the cluster membership functionality:
+//!
+//! * Pluggable, renewable identities: Using a fixed port number?
+//! No need to send it all the time. Want to attach extra crucial
+//! information (shard id, deployment version, etc)? Easy.
+//! Always have a lookup table mapping `u16` to hostnames? Use
+//! that instead of a socket address! Bring your own type,
+//! implement [`Identity`] and enjoy.
+//!
+//! * Write your own wire format by implementing [`Codec`]; Like
+//! serde? There is `bincode-codec` and `postcard-codec` features,
+//! or just use the `serde` feature and pick your favorite format.
+//!
+//! * Use any transport you want, it's up to you how messages
+//! reach each member: Foca will tell you "Send these bytes to
+//! member M", how that happens is not its business.
+//!
+//! * Custom Broadcasts: Foca can attach arbitrary data to its messages
+//! and disseminate them the same way it distributes cluster updates.
+//! Send CRDT operations, take a stab at implementing metadata-heavy
+//! service discovery system, anything really. Give it something
+//! that implements [`BroadcastHandler`] and Foca will ship it.
+//!
+//! * No runtime crashes: Apart from `alloc`-related aborts, Foca should
+//! only crash inside something you provided: a [`Codec`], [`Runtime`]
+//! or a [`BroadcastHandler`]- so long as those are solid, Foca is too.
+//!
+//! * Doesn't force you to choose between `sync` and `async`. It's as
+//! easy to plug it in an evented runtime as it is to go old-school.
+//!
+#![forbid(unsafe_code)]
+#![no_std]
+#![deny(missing_docs)]
+#![deny(rustdoc::broken_intra_doc_links)]
+
+extern crate alloc;
+use alloc::vec::Vec;
+
+#[cfg(feature = "std")]
+extern crate std;
+
+use core::{cmp::Ordering, convert::TryFrom, fmt, mem};
+
+use bytes::{Buf, BufMut, Bytes, BytesMut};
+use rand::Rng;
+
+mod broadcast;
+mod codec;
+mod config;
+mod error;
+mod identity;
+mod member;
+mod payload;
+mod probe;
+mod runtime;
+#[cfg(test)]
+mod testing;
+
+use crate::{
+ broadcast::Broadcasts,
+ member::{ApplySummary, Members},
+ probe::Probe,
+};
+
+pub use crate::{
+ broadcast::{BroadcastHandler, Invalidates},
+ codec::Codec,
+ config::Config,
+ error::Error,
+ identity::Identity,
+ member::{Incarnation, Member, State},
+ payload::{Header, Message, ProbeNumber},
+ runtime::{Notification, Runtime, Timer, TimerToken},
+};
+
+#[cfg(feature = "postcard-codec")]
+pub use crate::codec::postcard_impl::PostcardCodec;
+
+#[cfg(feature = "bincode-codec")]
+pub use crate::codec::bincode_impl::BincodeCodec;
+
+type Result<T> = core::result::Result<T, Error>;
+
+/// Foca is the main interaction point of this crate.
+///
+/// It manages the cluster members and executes the SWIM protocol. It's
+/// intended as a low-level guts-exposed safe view into the protocol
+/// allowing any kind of Identity and transport to be used.
+///
+/// Most interactions with Foca require the caller to provide a
+/// [`Runtime`] type, which is simply a way to turn the result of an
+/// operation inside out (think callbacks, or an out parameter like
+/// `void* out`). This allows Foca to avoid deciding anything related
+/// to how it interacts with the operating system.
+pub struct Foca<T, C, RNG, B: BroadcastHandler> {
+ identity: T,
+ codec: C,
+ rng: RNG,
+
+ incarnation: Incarnation,
+ config: Config,
+ connection_state: ConnectionState,
+ timer_token: TimerToken,
+
+ members: Members<T>,
+ probe: Probe<T>,
+
+ // Used to buffer up members/updates when receiving and
+ // sending data
+ member_buf: Vec<Member<T>>,
+
+ // Since we emit data via `Runtime::send_to`, this could
+ // easily be a Vec, but `BytesMut::limit` is quite handy
+ send_buf: BytesMut,
+
+ // Holds (serialized) cluster updates, which may live for a
+ // while until they get disseminated `Config::max_transmissions`
+ // times or replaced by fresher updates.
+ updates_buf: BytesMut,
+ updates: Broadcasts<ClusterUpdate<T>>,
+
+ broadcast_handler: B,
+ custom_broadcasts: Broadcasts<B::Broadcast>,
+}
+
+impl<T, C, RNG> Foca<T, C, RNG, NoCustomBroadcast>
+where
+ T: Identity,
+ C: Codec<T>,
+ RNG: Rng,
+{
+ /// Create a new Foca instance with custom broadcasts disabled.
+ ///
+ /// This is a simple shortcut for [`Foca::with_custom_broadcast`]
+ /// using the [`NoCustomBroadcast`] type to deny any form of custom
+ /// broadcast.
+ pub fn new(identity: T, config: Config, rng: RNG, codec: C) -> Self {
+ Self::with_custom_broadcast(identity, config, rng, codec, NoCustomBroadcast)
+ }
+}
+
+#[cfg(feature = "tracing")]
+impl<T: Identity, C, RNG, B: BroadcastHandler> fmt::Debug for Foca<T, C, RNG, B> {
+ fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
+ // Assuming that when tracing comes into play the cluster is actually
+ // uniform. Meaning: everything is configured the same, including
+ // codec and broadcast handler.
+ // So the actually interesting thing is the identity.
+ formatter.debug_tuple("Foca").field(&self.identity).finish()
+ }
+}
+
+// XXX Does it make sense to have different associated type restrictions
+// based on a feature flag? Say: when using `std` we would enforce
+// that `Codec::Error` and `BroadcastHandler::Error` both implement
+// `std::error::Error`, thus instead of wrapping these errors via
+// `anyhow::Error::msg` we can use `anyhow::Error::new`.
+impl<T, C, RNG, B> Foca<T, C, RNG, B>
+where
+ T: Identity,
+ C: Codec<T>,
+ RNG: Rng,
+ B: BroadcastHandler,
+{
+ /// Initialize a new Foca instance.
+ pub fn with_custom_broadcast(
+ identity: T,
+ config: Config,
+ rng: RNG,
+ codec: C,
+ broadcast_handler: B,
+ ) -> Self {
+ let max_indirect_probes = config.num_indirect_probes.get();
+ let max_tx = config.max_transmissions.get().into();
+ let max_bytes = config.max_packet_size.get();
+ Self {
+ identity,
+ config,
+ rng,
+ codec,
+ incarnation: Incarnation::default(),
+ timer_token: TimerToken::default(),
+ members: Members::new(Vec::new()),
+ probe: Probe::new(Vec::with_capacity(max_indirect_probes)),
+ member_buf: Vec::new(),
+ connection_state: ConnectionState::Disconnected,
+ updates: Broadcasts::new(max_tx),
+ send_buf: BytesMut::with_capacity(max_bytes),
+ custom_broadcasts: Broadcasts::new(max_tx),
+ updates_buf: BytesMut::new(),
+ broadcast_handler,
+ }
+ }
+
+ /// Getter for the current identity.
+ pub fn identity(&self) -> &T {
+ &self.identity
+ }
+
+ /// Re-enable joining a cluster with the same identity after being
+ /// declared Down.
+ ///
+ /// This is intended to be use by implementations that decide not to
+ /// opt-in on auto-rejoining: once Foca detects its Down you'll
+ /// only be able to receive messages (which will likely stop after
+ /// a short while since the cluster things you are down).
+ ///
+ /// Whatever is controlling the running Foca will then have to wait
+ /// for at least [`Config::remove_down_after`] before attempting a
+ /// rejoin. Then you can call this method followed by a
+ /// [`Foca::announce(T)`] to go back to the cluster.
+ #[cfg_attr(feature = "tracing", tracing::instrument)]
+ pub fn reuse_down_identity(&mut self) -> Result<()> {
+ if self.connection_state != ConnectionState::Undead {
+ Err(Error::NotUndead)
+ } else {
+ self.reset();
+ Ok(())
+ }
+ }
+
+ /// Change the current identity.
+ ///
+ /// Foca will declare its previous identity as Down and immediatelly
+ /// notify the cluster about the changes.
+ ///
+ /// Notice that changing your identity does not guarantee a
+ /// successful (re)join. After changing it and disseminating the updates
+ /// Foca will only know it's actually accepted after receiving a
+ /// message addressed to it.
+ ///
+ /// Watch for [`Notification::Active`] if you want more confidence about
+ /// a successful (re)join.
+ ///
+ /// Intended to be used when identities carry metadata that occasionally
+ /// changes.
+ #[cfg_attr(feature = "tracing", tracing::instrument(skip(runtime)))]
+ pub fn change_identity(&mut self, new_id: T, runtime: impl Runtime<T>) -> Result<()> {
+ if self.identity == new_id {
+ Err(Error::SameIdentity)
+ } else {
+ let previous_is_down = self.connection_state == ConnectionState::Undead;
+ let previous_id = mem::replace(&mut self.identity, new_id);
+
+ self.reset();
+
+ // If our previous identity wasn't known as Down already,
+ // we'll declare it ourselves
+ if !previous_is_down {
+ let data = self.serialize_member(Member::down(previous_id.clone()))?;
+ self.updates.add_or_replace(ClusterUpdate {
+ member_id: previous_id,
+ data,
+ });
+ }
+
+ self.gossip(runtime)?;
+
+ Ok(())
+ }
+ }
+
+ /// Iterate over the currently active cluster members.
+ pub fn iter_members(&self) -> impl Iterator<Item = &T> {
+ self.members.iter_active().map(|member| member.id())
+ }
+
+ /// Returns the number of active members in the cluster.
+ ///
+ /// May only be used as a bound for [`Foca::iter_members`] if no
+ /// Foca method that takes `&mut self` is called in-between.
+ pub fn num_members(&self) -> usize {
+ self.members.num_active()
+ }
+
+ /// Applies cluster updates to this foca instance.
+ ///
+ /// This is for advanced usage. It's intended as a way to unlock
+ /// more elaborate synchronization protocols: implementations may
+ /// choose to unify their cluster knowledge (say: a streaming
+ /// join protocol or a periodic sync) and use [`Foca::apply_many`]
+ /// as a way to feed Foca this new (external) knowledge.
+ #[cfg_attr(feature = "tracing", tracing::instrument(skip(updates, runtime)))]
+ pub fn apply_many(
+ &mut self,
+ updates: impl Iterator<Item = Member<T>>,
+ mut runtime: impl Runtime<T>,
+ ) -> Result<()> {
+ for update in updates {
+ if update.id() == &self.identity {
+ self.handle_self_update(update.incarnation(), update.state(), &mut runtime)?;
+ } else {
+ self.apply_update(update, &mut runtime)?;
+ }
+ }
+
+ self.adjust_connection_state(runtime);
+
+ Ok(())
+ }
+
+ fn adjust_connection_state(&mut self, runtime: impl Runtime<T>) {
+ match self.connection_state {
+ ConnectionState::Disconnected => {
+ if self.members.num_active() > 0 {
+ self.become_connected(runtime);
+ }
+ }
+ ConnectionState::Connected => {
+ if self.members.num_active() == 0 {
+ self.become_disconnected(runtime);
+ }
+ }
+ ConnectionState::Undead => {
+ // We're undead. The only ways to recover are via
+ // an id change or reuse_down_identity(). Nothing else
+ // to do
+ }
+ }
+ }
+
+ /// Attempt to join the cluster `dst` belongs to.
+ ///
+ /// Sends a [`Message::Announce`] to `dst`. If accepted, we'll receive
+ /// a [`Message::Feed`] as reply.
+ #[cfg_attr(feature = "tracing", tracing::instrument(skip(runtime)))]
+ pub fn announce(&mut self, dst: T, runtime: impl Runtime<T>) -> Result<()> {
+ self.send_message(dst, Message::Announce, runtime)
+ }
+
+ /// Disseminate updates/broadcasts to cluster members.
+ ///
+ /// This instructs Foca to pick [`Config::num_indirect_probes`]
+ /// random active members and send a `Message::Gossip` containing
+ /// cluster updates.
+ ///
+ /// Intended for more complex scenarios where an implementation wants
+ /// to attempt reducing the time it takes for information to
+ /// propagate thoroughly.
+ #[cfg_attr(feature = "tracing", tracing::instrument(skip(runtime)))]
+ pub fn gossip(&mut self, mut runtime: impl Runtime<T>) -> Result<()> {
+ self.member_buf.clear();
+ self.members.choose_active_members(
+ self.config.num_indirect_probes.get(),
+ &mut self.member_buf,
+ &mut self.rng,
+ |_| true,
+ );
+
+ while let Some(chosen) = self.member_buf.pop() {
+ self.send_message(chosen.into_identity(), Message::Gossip, &mut runtime)?;
+ }
+
+ Ok(())
+ }
+
+ /// Leave the cluster by declaring our own identity as down.
+ ///
+ /// If there are active members, we select a few are selected
+ /// and notify them of our exit so that the cluster learns
+ /// about it quickly.
+ ///
+ /// This is the cleanest way to terminate a running Foca.
+ #[cfg_attr(feature = "tracing", tracing::instrument(skip(runtime)))]
+ pub fn leave_cluster(mut self, mut runtime: impl Runtime<T>) -> Result<()> {
+ let data = self.serialize_member(Member::down(self.identity().clone()))?;
+ self.updates.add_or_replace(ClusterUpdate {
+ member_id: self.identity().clone(),
+ data,
+ });
+
+ self.gossip(&mut runtime)?;
+
+ // We could try to be smart here and only go defunct if there
+ // are active members, but I'd rather have consistent behaviour.
+ self.become_undead(&mut runtime);
+
+ Ok(())
+ }
+
+ /// Register some data to be broadcast along with Foca messages.
+ ///
+ /// Calls into this instance's BroadcastHandler and reacts accordingly.
+ #[cfg_attr(feature = "tracing", tracing::instrument(skip(data)))]
+ pub fn add_broadcast(&mut self, data: &[u8]) -> Result<()> {
+ // NOTE: Receiving B::Broadcast instead of a byte slice would make it
+ // look more convenient, however it gets in the way when
+ // implementing more ergonomic interfaces (say: an async driver)
+ // it forces everything to know the exact concrete type of
+ // the broadcast. So... maybe revisit this decision later?
+ #[cfg(feature = "tracing")]
+ tracing::info!(len = data.len());
+
+ // Not considering the whole header
+ if data.len() > self.config.max_packet_size.get() {
+ return Err(Error::DataTooBig);
+ }
+
+ if let Some(broadcast) = self
+ .broadcast_handler
+ .receive_item(data)
+ .map_err(anyhow::Error::msg)
+ .map_err(Error::CustomBroadcast)?
+ {
+ self.custom_broadcasts.add_or_replace(broadcast);
+ }
+
+ Ok(())
+ }
+
+ /// React to a previously scheduled timer event.
+ ///
+ /// See [`Runtime::submit_after`].
+ #[cfg_attr(feature = "tracing", tracing::instrument(skip(runtime)))]
+ pub fn handle_timer(&mut self, event: Timer<T>, mut runtime: impl Runtime<T>) -> Result<()> {
+ match event {
+ Timer::SendIndirectProbe { probed_id, token } => {
+ // Internal assumption: token invalidation happens when
+ // going undead, which also clears the probing, so there
+ // is no chance for this return to trigger while we have
+ // a running probe unless something is crafting these
+ // manually
+ if token != self.timer_token {
+ #[cfg(feature = "tracing")]
+ tracing::warn!(?self.timer_token, token, "Bad timer token");
+ return Ok(());
+ }
+
+ // Bookkeeping: This is how we verify that the probe code
+ // is running correctly. If we reach the end of the
+ // probe and this hasn't happened, we know something is
+ // wrong.
+ self.probe.mark_indirect_probe_stage_reached();
+
+ if !self.probe.is_probing(&probed_id) {
+ #[cfg(feature = "tracing")]
+ tracing::warn!(?probed_id, "Member not being probed");
+ return Ok(());
+ }
+
+ if self.probe.succeeded() {
+ // We received an Ack already, nothing else to do
+ return Ok(());
+ }
+
+ self.member_buf.clear();
+ self.members.choose_active_members(
+ self.config.num_indirect_probes.get(),
+ &mut self.member_buf,
+ &mut self.rng,
+ |candidate| Some(candidate) != self.probe.target(),
+ );
+
+ while let Some(chosen) = self.member_buf.pop() {
+ let indirect = chosen.into_identity();
+
+ self.probe.expect_indirect_ack(indirect.clone());
+
+ self.send_message(
+ indirect,
+ Message::PingReq {
+ target: probed_id.clone(),
+ probe_number: self.probe.probe_number(),
+ },
+ &mut runtime,
+ )?;
+ }
+
+ Ok(())
+ }
+ Timer::ChangeSuspectToDown {
+ member_id,
+ incarnation,
+ token,
+ } => {
+ if self.timer_token == token {
+ let as_down = Member::new(member_id, incarnation, State::Down);
+ if let Some(summary) = self
+ .members
+ // Down is terminal, so before doing that we ensure the member
+ // is still under suspicion.
+ // Checking only incarnation is sufficient because to refute
+ // suspicion the member must increment its own incarnation
+ .apply_existing_if(as_down.clone(), |member| {
+ member.incarnation() == incarnation
+ })
+ {
+ self.handle_apply_summary(&summary, as_down, &mut runtime)?;
+ // Member went down we might need to adjust our internal state
+ self.adjust_connection_state(runtime);
+ } else {
+ #[cfg(feature = "tracing")]
+ tracing::warn!(member = ?as_down.id(), "Member not found");
+ }
+ }
+
+ Ok(())
+ }
+ Timer::RemoveDown(down) => {
+ #[cfg_attr(
+ not(feature = "tracing"),
+ allow(unused_variables, clippy::if_same_then_else)
+ )]
+ if let Some(removed) = self.members.remove_if_down(&down) {
+ #[cfg(feature = "tracing")]
+ tracing::trace!(?removed);
+ } else {
+ #[cfg(feature = "tracing")]
+ tracing::trace!(?down, "Member not found / not down");
+ }
+
+ Ok(())
+ }
+ Timer::ProbeRandomMember(token) => {
+ if token == self.timer_token {
+ if self.connection_state != ConnectionState::Connected {
+ // Not expected to happen during normal operation, but
+ // may reach here via manually crafted Timer::
+ Err(Error::NotConnected)
+ } else {
+ self.probe_random_member(runtime)
+ }
+ } else {
+ // Invalid token, may happen whenever we go offline after
+ // being online
+ Ok(())
+ }
+ }
+ }
+ }
+
+ /// Reports the current length of the cluster updates queue.
+ ///
+ /// Updates are transmitted [`Config::max_transmissions`] times
+ /// at most or until we learn new information about the same
+ /// member.
+ pub fn updates_backlog(&self) -> usize {
+ self.updates.len()
+ }
+
+ /// Repports the current length of the custom broadcast queue.
+ ///
+ /// Custom broadcasts are transmitted [`Config::max_transmissions`]
+ /// times at most or until they get invalidated by another custom
+ /// broadcast.
+ pub fn custom_broadcast_backlog(&self) -> usize {
+ self.custom_broadcasts.len()
+ }
+
+ /// Handle data received from the network.
+ ///
+ /// Data larger than the configured limit will be rejected. Errors are
+ /// expected if you're receiving arbitrary data (which very likely if
+ /// you are listening to a socket address).
+ #[cfg_attr(feature = "tracing", tracing::instrument(skip(runtime, data)))]
+ pub fn handle_data(&mut self, mut data: &[u8], mut runtime: impl Runtime<T>) -> Result<()> {
+ #[cfg(feature = "tracing")]
+ tracing::debug!(len = data.len());
+
+ if data.remaining() > self.config.max_packet_size.get() {
+ return Err(Error::DataTooBig);
+ }
+
+ let header = self
+ .codec
+ .decode_header(&mut data)
+ .map_err(anyhow::Error::msg)
+ .map_err(Error::Decode)?;
+
+ #[cfg(feature = "tracing")]
+ tracing::info!(?header.src, ?header.message);
+
+ let remaining = data.remaining();
+ // A single trailing byte or a Announce payload with _any_
+ // data is bad
+ if remaining == 1 || (header.message == Message::Announce && remaining > 0) {
+ return Err(Error::MalformedPacket);
+ }
+
+ if !self.accept_payload(&header) {
+ #[cfg(feature = "tracing")]
+ tracing::debug!("payload rejected");
+ return Ok(());
+ }
+
+ // We can skip this buffering is we assume that reaching here
+ // means the packet is valid. But that doesn't seem like a very
+ // good idea...
+ self.member_buf.clear();
+ if remaining >= 2 {
+ let num_updates = data.get_u16();
+ for _i in 0..num_updates {
+ self.member_buf.push(
+ self.codec
+ .decode_member(&mut data)
+ .map_err(anyhow::Error::msg)
+ .map_err(Error::Decode)?,
+ );
+ }
+ }
+
+ let Header {
+ src,
+ src_incarnation,
+ dst: _,
+ message,
+ } = header;
+
+ if src == self.identity {
+ return Err(Error::DataFromOurselves);
+ }
+
+ let sender_is_active = self
+ // It's a known member, so we ensure our knowledge about
+ // it is up-to-date (it is at _least_ alive, since it can
+ // talk)
+ .apply_update(
+ Member::new(src.clone(), src_incarnation, State::Alive),
+ &mut runtime,
+ )?;
+
+ // But dead members are ignored. At least until the member
+ // list gets reaped.
+ if !sender_is_active {
+ #[cfg(feature = "tracing")]
+ tracing::debug!("Discarded: Inactive sender");
+
+ return Ok(());
+ }
+
+ // Now that we know the member is active, we'll handle the
+ // updates, which may change our referential cluster
+ // representation and our own connection state.
+ //
+ // Here we take the Vec so we can drain it without upsetting
+ // the borrow checker. And then put it back in its place, so
+ // that we can keep reusing its already-allocated space.
+ let mut updates = mem::take(&mut self.member_buf);
+ self.apply_many(updates.drain(..), &mut runtime)?;
+ debug_assert!(
+ self.member_buf.is_empty(),
+ "member_buf modified while taken"
+ );
+ self.member_buf = updates;
+
+ // Right now there might still be some data left to read in the
+ // buffer (custom broadcasts).
+ // We choose to defer handling them until after we're done
+ // with the core of the protocol.
+
+ // If we're not connected (anymore), we can't react to a message
+ // So we just finish consuming the data
+ if self.connection_state != ConnectionState::Connected {
+ return self.handle_custom_broadcasts(data);
+ }
+
+ match message {
+ Message::Ping(probe_number) => {
+ self.send_message(src, Message::Ack(probe_number), runtime)?;
+ }
+ Message::Ack(probe_number) => {
+ #[cfg_attr(not(feature = "tracing"), allow(clippy::if_same_then_else))]
+ if self.probe.receive_ack(&src, probe_number) {
+ #[cfg(feature = "tracing")]
+ tracing::debug!("Probe success");
+ } else {
+ // May be triggered by a member that slows down (say, you ^Z
+ // the proccess and `fg` back after a while).
+ // Might be interesting to keep an eye on.
+ #[cfg(feature = "tracing")]
+ tracing::warn!("Ack from unexpected member");
+ }
+ }
+ Message::PingReq {
+ target,
+ probe_number,
+ } => {
+ if target == self.identity {
+ return Err(Error::IndirectForOurselves);
+ } else {
+ self.send_message(
+ target,
+ Message::IndirectPing {
+ origin: src,
+ probe_number,
+ },
+ runtime,
+ )?;
+ }
+ }
+ Message::IndirectPing {
+ origin,
+ probe_number,
+ } => {
+ if origin == self.identity {
+ return Err(Error::IndirectForOurselves);
+ } else {
+ self.send_message(
+ src,
+ Message::IndirectAck {
+ target: origin,
+ probe_number,
+ },
+ runtime,
+ )?;
+ }
+ }
+ Message::IndirectAck {
+ target,
+ probe_number,
+ } => {
+ if target == self.identity {
+ return Err(Error::IndirectForOurselves);
+ } else {
+ self.send_message(
+ target,
+ Message::ForwardedAck {
+ origin: src,
+ probe_number,
+ },
+ runtime,
+ )?;
+ }
+ }
+ Message::ForwardedAck {
+ origin,
+ probe_number,
+ } =>
+ {
+ #[cfg_attr(not(feature = "tracing"), allow(clippy::if_same_then_else))]
+ if origin == self.identity {
+ return Err(Error::IndirectForOurselves);
+ } else if self.probe.receive_indirect_ack(&src, probe_number) {
+ #[cfg(feature = "tracing")]
+ tracing::debug!("Indirect probe success");
+ } else {
+ #[cfg(feature = "tracing")]
+ tracing::warn!("Unexpected ForwardedAck sender");
+ }
+ }
+ Message::Announce => self.send_message(src, Message::Feed, runtime)?,
+ // Nothing to do. Gossip and Feed messages come with cluster
+ // updates only and we've handled them above.
+ Message::Gossip | Message::Feed => {}
+ };
+
+ self.handle_custom_broadcasts(data)
+ }
+
+ fn serialize_member(&mut self, member: Member<T>) -> Result<Bytes> {
+ let mut buf = self.updates_buf.split();
+ self.codec
+ .encode_member(&member, &mut buf)
+ .map_err(anyhow::Error::msg)
+ .map_err(Error::Encode)?;
+
+ Ok(buf.freeze())
+ }
+
+ fn reset(&mut self) {
+ self.connection_state = ConnectionState::Disconnected;
+ self.incarnation = Incarnation::default();
+ // XXX It might make sense to `self.updates.clear()` if we're
+ // down for a very long while, but we don't track instants
+ // internally... Exposing a public method to do so and
+ // letting drivers decide when to do it could be a way
+ // out. But recreating Foca is quite cheap, so revisit
+ // me maybe?
+ }
+
+ fn probe_random_member(&mut self, mut runtime: impl Runtime<T>) -> Result<()> {
+ if !self.probe.validate() {
+ return Err(Error::IncompleteProbeCycle);
+ }
+
+ if let Some(failed) = self.probe.take_failed() {
+ // Applying here can fail if:
+ //
+ // 1. The member increased its incarnation since the probe started
+ // (as a side effect of someone else probing and suspecting it)
+ //
+ // 2. The member was ALREADY suspect when we picked it for probing
+ //
+ // 3. The member is now Down, either by leaving voluntarily or by
+ // being declared down by another cluster member
+ //
+ // 4. The member doesn't exist anymore, which shouldn't actually
+ // happen...?
+ let as_suspect = Member::new(failed.id().clone(), failed.incarnation(), State::Suspect);
+ if let Some(summary) = self
+ .members
+ .apply_existing_if(as_suspect.clone(), |_member| true)
+ {
+ self.handle_apply_summary(&summary, as_suspect, &mut runtime)?;
+
+ // Now we ensure we change the member to Down if it
+ // isn't already inactive
+ if summary.is_active_now {
+ runtime.submit_after(
+ Timer::ChangeSuspectToDown {
+ member_id: failed.id().clone(),
+ incarnation: failed.incarnation(),
+ token: self.timer_token,
+ },
+ self.config.suspect_to_down_after,
+ );
+ }
+ } else {
+ #[cfg(feature = "tracing")]
+ tracing::error!(
+ failed = ?failed.id(),
+ "Member failed probe but doesn't exist"
+ );
+ }
+ }
+
+ if let Some(member) = self.members.next(&mut self.rng) {
+ let member_id = member.id().clone();
+
+ let probe_number = self.probe.start(member.clone());
+
+ self.send_message(member_id.clone(), Message::Ping(probe_number), &mut runtime)?;
+
+ runtime.submit_after(
+ Timer::SendIndirectProbe {
+ probed_id: member_id,
+ token: self.timer_token,
+ },
+ self.config.probe_rtt,
+ );
+ } else {
+ // Should never happen... Reaching here is gated by being
+ // online, which requires having at least one active member
+ #[cfg(feature = "tracing")]
+ tracing::error!("Expected to find an active member to probe");
+ }
+
+ runtime.submit_after(
+ Timer::ProbeRandomMember(self.timer_token),
+ self.config.probe_period,
+ );
+
+ Ok(())
+ }
+
+ // shortcut for apply + handle
+ fn apply_update(&mut self, update: Member<T>, runtime: impl Runtime<T>) -> Result<bool> {
+ debug_assert_ne!(&self.identity, update.id());
+ let summary = self.members.apply(update.clone(), &mut self.rng);
+ self.handle_apply_summary(&summary, update, runtime)?;
+
+ Ok(summary.is_active_now)
+ }
+
+ fn handle_apply_summary(
+ &mut self,
+ summary: &ApplySummary,
+ update: Member<T>,
+ mut runtime: impl Runtime<T>,
+ ) -> Result<()> {
+ let id = update.id().clone();
+
+ if summary.apply_successful {
+ // Cluster state changed, start broadcasting it
+ let data = self.serialize_member(update)?;
+ self.updates.add_or_replace(ClusterUpdate {
+ member_id: id.clone(),
+ data,
+ });
+
+ // Down is a terminal state, so set up a handler for removing
+ // the member so that it may rejoin later
+ if !summary.is_active_now {
+ runtime.submit_after(Timer::RemoveDown(id.clone()), self.config.remove_down_after);
+ }
+ }
+
+ if summary.changed_active_set {
+ if summary.is_active_now {
+ runtime.notify(Notification::MemberUp(id));
+ } else {
+ runtime.notify(Notification::MemberDown(id));
+ }
+ }
+
+ Ok(())
+ }
+
+ fn handle_custom_broadcasts(&mut self, mut data: impl Buf) -> Result<()> {
+ while data.has_remaining() {
+ if let Some(broadcast) = self
+ .broadcast_handler
+ .receive_item(&mut data)
+ .map_err(anyhow::Error::msg)
+ .map_err(Error::CustomBroadcast)?
+ {
+ self.custom_broadcasts.add_or_replace(broadcast);
+ }
+ }
+
+ Ok(())
+ }
+
+ fn become_disconnected(&mut self, mut runtime: impl Runtime<T>) {
+ // We reached zero active members, so we're offline
+ debug_assert_eq!(0, self.num_members());
+ self.connection_state = ConnectionState::Disconnected;
+
+ // Ignore every timer event we sent up until this point.
+ // This is to stop the probe cycle and prevent members from
+ // being switched the Down state since we have little
+ // confidence about our own state at this point.
+ self.timer_token = self.timer_token.wrapping_add(1);
+
+ runtime.notify(Notification::Idle);
+ }
+
+ fn become_undead(&mut self, mut runtime: impl Runtime<T>) {
+ self.connection_state = ConnectionState::Undead;
+
+ // We're down, whatever we find out by probing is unreliable
+ self.probe.clear();
+
+ // Just like `become_disconnected`, we want to avoid
+ // handling events that aren't relevant anymore.
+ self.timer_token = self.timer_token.wrapping_add(1);
+
+ runtime.notify(Notification::Defunct);
+ }
+
+ fn become_connected(&mut self, mut runtime: impl Runtime<T>) {
+ debug_assert_ne!(0, self.num_members());
+ self.connection_state = ConnectionState::Connected;
+
+ // We have at least one active member, so we can start
+ // probing
+ runtime.submit_after(
+ Timer::ProbeRandomMember(self.timer_token),
+ self.config.probe_period,
+ );
+
+ runtime.notify(Notification::Active);
+ }
+
+ fn send_message(
+ &mut self,
+ dst: T,
+ message: Message<T>,
+ mut runtime: impl Runtime<T>,
+ ) -> Result<()> {
+ #[cfg(feature = "tracing")]
+ tracing::debug!(?dst, ?message, "send_message");
+
+ let header = Header {
+ src: self.identity.clone(),
+ src_incarnation: self.incarnation,
+ dst: dst.clone(),
+ message,
+ };
+
+ let mut buf = self
+ .send_buf
+ .split()
+ .limit(self.config.max_packet_size.get());
+
+ self.codec
+ .encode_header(&header, &mut buf)
+ .map_err(anyhow::Error::msg)
+ .map_err(Error::Encode)?;
+
+ let (needs_piggyback, only_active_members) = match header.message {
+ // Announce packets contain nothing but the header
+ Message::Announce => (false, false),
+ // Feed packets stuff active members at the tail
+ Message::Feed => (true, true),
+ // Every other message stuffs cluster updates
+ _ => (true, false),
+ };
+
+ // If we're piggybacking data, we need at least 2 extra bytes
+ // so that we can also encode the number of items we're stuffing
+ // into this buffer
+ if needs_piggyback && buf.remaining_mut() > 2 {
+ // Where we'll write the total number of items
+ let tally_position = buf.get_ref().len();
+ // We leave a zero here so that the buffer advances, then
+ // we'll come back to `tally_position` and overwrite this
+ // with the actual total
+ buf.put_u16(0);
+
+ let mut num_items = 0;
+
+ if only_active_members {
+ for member in self
+ .members
+ .iter_active()
+ .filter(|member| member.id() != &dst)
+ {
+ // XXX It's not very difficult to lift this restriction:
+ // This means that codecs MUST NOT leave the buffer
+ // dirty on failure
+ if let Err(_ignored) = self.codec.encode_member(member, &mut buf) {
+ break;
+ }
+ num_items += 1;
+ }
+ } else {
+ // u16::MAX guarantees that num_events can be safely
+ // cast from usize to u16
+ let num_updates = self.updates.fill(&mut buf, u16::MAX.into());
+ num_items = u16::try_from(num_updates).expect("usize bound by u16::MAX");
+ }
+
+ // Seek back and write the correct number of items added
+ buf.get_mut()[tally_position..].as_mut().put_u16(num_items);
+
+ // Fill the remaining space in the buffer with custom
+ // broadcasts, if any
+ self.custom_broadcasts.fill(&mut buf, usize::MAX);
+ }
+
+ runtime.send_to(dst, &buf.into_inner());
+ Ok(())
+ }
+
+ fn accept_payload(&self, header: &Header<T>) -> bool {
+ // Only accept payloads addessed to us
+ header.dst == self.identity
+ // Unless it's an Announce message
+ || (header.message == Message::Announce
+ // Then we accept it if DST is one of our _possible_
+ // identities
+ && self.identity.has_same_prefix(&header.dst))
+ }
+
+ fn handle_self_update(
+ &mut self,
+ incarnation: Incarnation,
+ state: State,
+ mut runtime: impl Runtime<T>,
+ ) -> Result<()> {
+ match state {
+ State::Suspect => {
+ match self.incarnation.cmp(&incarnation) {
+ // This can happen when a member received an update about
+ // someone else suspecting us but hasn't received our
+ // refutal yet. We can ignore it.
+ // There is a chance that it may lead to us being declared
+ // down due to this if our new incarnation doesn't reach
+ // them, but we shouldn't try to bump our incarnation again
+ // else we risk entering a game of counting
+ Ordering::Greater => {
+ #[cfg(feature = "tracing")]
+ tracing::trace!(
+ ?self.incarnation,
+ suspected = incarnation,
+ "Ignored suspicion about old incarnation",
+ );
+ return Ok(());
+ }
+
+ // Unexpected: someone suspects our identity but thinks we were
+ // in a higher incarnation. May happen due to members flapping,
+ // but can also be a sign of a bad actor (multiple identical
+ // identities, clients bumping identities from other members,
+ // corrupted data, etc)
+ // We'll emit a warning and then refute the suspicion normally
+ Ordering::Less => {
+ #[cfg(feature = "tracing")]
+ tracing::warn!(
+ ?self.incarnation,
+ suspected = incarnation,
+ "Suspicion on incarnation higher than current",
+ );
+ }
+
+ // The usual case: our current incarnation is being suspected,
+ // so we need to bump ours.
+ Ordering::Equal => {}
+ };
+
+ let incarnation = Incarnation::max(incarnation, self.incarnation);
+
+ // We need to rejoin the cluster when this situation happens
+ // because it will be impossible to refute suspicion
+ if incarnation == Incarnation::MAX {
+ if !self.attempt_rejoin(&mut runtime)? {
+ #[cfg(feature = "tracing")]
+ tracing::warn!("Inactive: reached Incarnation::MAX",);
+ self.become_undead(runtime);
+ }
+ return Ok(());
+ }
+
+ // XXX Overzealous checking
+ self.incarnation = incarnation.saturating_add(1);
+
+ // We do NOT add ourselves as Alive to the updates buffer
+ // because it's unnecessary: by bumping our incarnation *any*
+ // message we send will be interpreted as a broadcast update
+ // See: `tests::message_from_aware_suspect_refutes_suspicion`
+ }
+ State::Alive => {
+ // The cluster is talking about our liveness. Nothing to do.
+ }
+ State::Down => {
+ // It's impossible to refute a Down state so we'll need
+ // to rejoin somehow
+ if !self.attempt_rejoin(&mut runtime)? {
+ self.become_undead(runtime);
+ }
+ }
+ }
+ Ok(())
+ }
+
+ fn attempt_rejoin(&mut self, mut runtime: impl Runtime<T>) -> Result<bool> {
+ if let Some(new_identity) = self.identity.renew() {
+ if self.identity == new_identity {
+ #[cfg(feature = "tracing")]
+ tracing::warn!("Rejoin failure: Identity::renew() returned same id",);
+ Ok(false)
+ } else {
+ self.change_identity(new_identity.clone(), &mut runtime)?;
+
+ runtime.notify(Notification::Rejoin(new_identity));
+
+ Ok(true)
+ }
+ } else {
+ Ok(false)
+ }
+ }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq)]
+enum ConnectionState {
+ Disconnected,
+ Connected,
+ Undead,
+}
+
+/// A Broadcast Handler that rejects any form of custom broadcast.
+///
+/// Used by Foca when constructed via [`Foca::new()`].
+pub struct NoCustomBroadcast;
+
+/// Error emitted by [`NoCustomBroadcast`] when any trailing byte is
+/// found. Will be wrapped by [`Error`]
+#[derive(Debug, Clone, Copy)]
+pub struct BroadcastsDisabledError;
+
+impl fmt::Display for BroadcastsDisabledError {
+ fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
+ formatter.write_str("Broadcasts disabled")
+ }
+}
+
+#[cfg(feature = "std")]
+impl std::error::Error for BroadcastsDisabledError {}
+
+impl BroadcastHandler for NoCustomBroadcast {
+ type Broadcast = &'static [u8];
+ type Error = BroadcastsDisabledError;
+
+ fn receive_item(
+ &mut self,
+ _data: impl Buf,
+ ) -> core::result::Result<Option<Self::Broadcast>, Self::Error> {
+ Err(BroadcastsDisabledError)
+ }
+}
+
+struct ClusterUpdate<T> {
+ member_id: T,
+ data: Bytes,
+}
+
+impl<T: PartialEq> Invalidates for ClusterUpdate<T> {
+ // State is managed externally (via Members), so invalidation
+ // is a trivial replace-if-same-key
+ fn invalidates(&self, other: &Self) -> bool {
+ self.member_id == other.member_id
+ }
+}
+
+impl<T> AsRef<[u8]> for ClusterUpdate<T> {
+ fn as_ref(&self) -> &[u8] {
+ self.data.as_ref()
+ }
+}
+
+#[cfg(test)]
+impl<T, C, RNG, B> Foca<T, C, RNG, B>
+where
+ T: Identity,
+ C: Codec<T>,
+ RNG: rand::Rng,
+ B: BroadcastHandler,
+{
+ pub fn incarnation(&self) -> Incarnation {
+ self.incarnation
+ }
+
+ pub fn probe(&self) -> &Probe<T> {
+ &self.probe
+ }
+
+ pub fn timer_token(&self) -> TimerToken {
+ self.timer_token
+ }
+
+ pub(crate) fn connection_state(&self) -> ConnectionState {
+ self.connection_state
+ }
+
+ pub(crate) fn apply(&mut self, member: Member<T>, mut runtime: impl Runtime<T>) -> Result<()> {
+ self.apply_many(core::iter::once(member), &mut runtime)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+
+ use super::*;
+ use alloc::vec;
+ use core::{
+ num::{NonZeroU8, NonZeroUsize},
+ time::Duration,
+ };
+
+ use bytes::{Buf, BufMut};
+ use rand::{rngs::SmallRng, SeedableRng};
+
+ use crate::testing::{BadCodec, InMemoryRuntime, ID};
+
+ fn rng() -> SmallRng {
+ SmallRng::seed_from_u64(0xF0CA)
+ }
+
+ fn codec() -> BadCodec {
+ BadCodec
+ }
+
+ fn config() -> Config {
+ Config::simple()
+ }
+
+ fn encode(src: (Header<ID>, Vec<Member<ID>>)) -> Bytes {
+ let (header, updates) = src;
+ let mut codec = BadCodec;
+ let mut buf = BytesMut::new();
+
+ codec
+ .encode_header(&header, &mut buf)
+ .expect("MAYBE FIXME?");
+
+ if !updates.is_empty() {
+ buf.put_u16(u16::try_from(updates.len()).unwrap());
+ for member in updates.iter() {
+ codec.encode_member(member, &mut buf).expect("MAYBE FIXME?");
+ }
+ }
+
+ buf.freeze()
+ }
+
+ fn decode(mut src: impl Buf) -> (Header<ID>, Vec<Member<ID>>) {
+ let mut codec = BadCodec;
+ let header = codec.decode_header(&mut src).unwrap();
+
+ let mut updates = Vec::new();
+ if src.has_remaining() {
+ let num_items = src.get_u16();
+ updates.reserve(num_items.into());
+
+ for _i in 0..num_items {
+ updates.push(codec.decode_member(&mut src).unwrap());
+ }
+ }
+
+ (header, updates)
+ }
+
+ #[test]
+ fn invariants() {
+ let identity = ID::new(42);
+ let mut foca = Foca::new(identity, config(), rng(), codec());
+
+ assert_eq!(ConnectionState::Disconnected, foca.connection_state());
+
+ assert_eq!(0, foca.num_members());
+
+ assert_eq!(None, foca.iter_members().next());
+
+ assert_eq!(Err(Error::NotUndead), foca.reuse_down_identity());
+
+ let mut runtime = InMemoryRuntime::new();
+ assert_eq!(
+ Err(Error::SameIdentity),
+ foca.change_identity(identity, &mut runtime)
+ );
+ assert_eq!(Ok(()), foca.change_identity(ID::new(43), &mut runtime));
+ assert_eq!(&ID::new(43), foca.identity());
+ }
+
+ #[test]
+ fn cant_probe_when_not_connected() {
+ let mut foca = Foca::new(ID::new(1), config(), rng(), codec());
+
+ let runtime = InMemoryRuntime::new();
+ let res = foca.handle_timer(Timer::ProbeRandomMember(foca.timer_token()), runtime);
+
+ assert_eq!(Err(Error::NotConnected), res);
+ }
+
+ #[test]
+ fn codec_errors_are_forwarded_correctly() {
+ // A codec that only produces errors
+ struct UnitErroringCodec;
+
+ #[derive(Debug)]
+ struct UnitError;
+
+ impl fmt::Display for UnitError {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ f.write_str("no")
+ }
+ }
+
+ impl Codec<ID> for UnitErroringCodec {
+ type Error = UnitError;
+
+ fn encode_header(
+ &mut self,
+ _header: &Header<ID>,
+ _buf: impl BufMut,
+ ) -> core::result::Result<(), Self::Error> {
+ Err(UnitError)
+ }
+
+ fn decode_header(
+ &mut self,
+ _buf: impl Buf,
+ ) -> core::result::Result<Header<ID>, Self::Error> {
+ Err(UnitError)
+ }
+
+ fn encode_member(
+ &mut self,
+ _member: &Member<ID>,
+ _buf: impl BufMut,
+ ) -> core::result::Result<(), Self::Error> {
+ Err(UnitError)
+ }
+
+ fn decode_member(
+ &mut self,
+ _buf: impl Buf,
+ ) -> core::result::Result<Member<ID>, Self::Error> {
+ Err(UnitError)
+ }
+ }
+
+ // And a runtime that does nothing to pair it with
+ struct NoopRuntime;
+
+ impl Runtime<ID> for NoopRuntime {
+ fn notify(&mut self, _notification: Notification<ID>) {}
+ fn send_to(&mut self, _to: ID, _data: &[u8]) {}
+ fn submit_after(&mut self, _event: Timer<ID>, _after: Duration) {}
+ }
+
+ let mut foca = Foca::new(ID::new(1), Config::simple(), rng(), UnitErroringCodec);
+
+ assert_eq!(
+ Err(Error::Encode(anyhow::Error::msg(UnitError))),
+ foca.announce(ID::new(2), NoopRuntime)
+ );
+
+ assert_eq!(
+ Err(Error::Decode(anyhow::Error::msg(UnitError))),
+ foca.handle_data(b"hue", NoopRuntime)
+ );
+ }
+
+ macro_rules! expect_scheduling {
+ ($runtime: expr, $timer: expr, $after: expr) => {
+ $runtime
+ .take_scheduling($timer)
+ .map(|after| assert_eq!(after, $after, "Incorrect scheduling for {:?}", $timer))
+ .unwrap_or_else(|| panic!("Timer {:?} not found", $timer));
+ };
+ }
+
+ macro_rules! expect_notification {
+ ($runtime: expr, $notification: expr) => {
+ $runtime
+ .take_notification($notification)
+ .unwrap_or_else(|| panic!("Notification {:?} not found", $notification));
+ };
+ }
+
+ macro_rules! reject_notification {
+ ($runtime: expr, $notification: expr) => {
+ assert!(
+ $runtime.take_notification($notification).is_none(),
+ "Unwanted notification {:?} found",
+ $notification
+ );
+ };
+ }
+
+ #[test]
+ fn can_join_with_another_client() {
+ let mut foca_one = Foca::new(ID::new(1), config(), rng(), codec());
+ let mut foca_two = Foca::new(ID::new(2), config(), rng(), codec());
+
+ let mut runtime = InMemoryRuntime::new();
+
+ // Here foca_one will send an announce packet to foca_two
+ foca_one
+ .announce(*foca_two.identity(), &mut runtime)
+ .expect("no errors");
+
+ assert_eq!(
+ 0,
+ foca_one.num_members(),
+ "announcing shouldn't change members"
+ );
+
+ // So the runtime should've been instructed to send a
+ // message to foca_two
+
+ let data = decode(
+ runtime
+ .take_data(ID::new(2))
+ .expect("No data for ID::new(2) found"),
+ );
+ assert_eq!(data.0.message, Message::Announce);
+
+ runtime.clear();
+ foca_two
+ .handle_data(&encode(data), &mut runtime)
+ .expect("no errors");
+
+ // Right now, foca_two should be aware of foca_one
+ assert_eq!(1, foca_two.num_members());
+ // Whilst foca_one is oblivious to the effect of its announce
+ assert_eq!(0, foca_one.num_members());
+
+ // So we should have gotten a notification about going online
+ expect_notification!(runtime, Notification::<ID>::Active);
+ expect_notification!(runtime, Notification::MemberUp(ID::new(1)));
+
+ // And a event to trigger a probe should've been
+ // scheduled
+ expect_scheduling!(
+ runtime,
+ Timer::<ID>::ProbeRandomMember(foca_one.timer_token()),
+ config().probe_period
+ );
+
+ // More importantly, the runtime should've been instructed to
+ // send a feed to foca_one, which will finally complete its
+ // join cycle
+ let data = decode(
+ runtime
+ .take_data(ID::new(1))
+ .expect("Feed for ID::new(1) not found"),
+ );
+ assert_eq!(data.0.message, Message::Feed);
+
+ runtime.clear();
+ assert_eq!(Ok(()), foca_one.handle_data(&encode(data), &mut runtime));
+
+ expect_notification!(runtime, Notification::<ID>::Active);
+ expect_notification!(runtime, Notification::MemberUp(ID::new(2)));
+ assert_eq!(1, foca_one.num_members());
+ }
+
+ #[test]
+ fn feed_contains_only_active_members() {
+ // We'll make `foca_one` send an Announce to `foca_two` and verify
+ // that its reply is a Feed containing its known *active* members
+ let one = ID::new(1);
+ let two = ID::new(2);
+ let mut foca_one = Foca::new(one, config(), rng(), codec());
+ let mut runtime = InMemoryRuntime::new();
+
+ assert_eq!(Ok(()), foca_one.announce(two, &mut runtime));
+ let data = runtime
+ .take_data(two)
+ .expect("Should have a message for foca_two");
+
+ runtime.clear();
+
+ // Members 3 and 4 are active, 5 is down and should not be
+ // found in the feed
+ let members = [
+ Member::alive(ID::new(3)),
+ Member::suspect(ID::new(4)),
+ Member::down(ID::new(5)),
+ ];
+
+ let mut foca_two = Foca::new(two, config(), rng(), codec());
+
+ // Let `foca_two` know about the members
+ for member in members.iter() {
+ assert_eq!(Ok(()), foca_two.apply(member.clone(), &mut runtime));
+ }
+
+ // Receive the packet from `foca_one`
+ assert_eq!(Ok(()), foca_two.handle_data(&data, &mut runtime));
+
+ let feed_data = runtime
+ .take_data(one)
+ .expect("Should have a message for foca_one");
+
+ let (header, updates) = decode(feed_data);
+
+ assert_eq!(header.message, Message::Feed);
+ assert_eq!(2, updates.len());
+ assert_eq!(
+ members
+ .iter()
+ .cloned()
+ .filter(|m| m.is_active())
+ .collect::<Vec<_>>(),
+ updates
+ );
+ }
+
+ #[test]
+ fn piggyback_behaviour() {
+ let max_transmissions = NonZeroU8::new(10).unwrap();
+ let num_indirect_probes = NonZeroUsize::new(3).unwrap();
+
+ let config = Config {
+ max_transmissions,
+ num_indirect_probes,
+ ..config()
+ };
+
+ let mut foca = Foca::new(ID::new(1), config.clone(), rng(), codec());
+
+ // A manually crafted Gossip packet from ID::new(2) addressed to
+ // our foca instance
+ let data = {
+ let header = Header {
+ src: ID::new(2),
+ src_incarnation: 0,
+ dst: ID::new(1),
+ message: Message::Gossip,
+ };
+ // Containing these cluster updates:
+ let updates = vec![
+ Member::new(ID::new(3), 3, State::Alive),
+ Member::new(ID::new(4), 1, State::Suspect),
+ Member::new(ID::new(5), 1, State::Down),
+ ];
+ (header, updates)
+ };
+
+ let mut runtime = InMemoryRuntime::new();
+ assert_eq!(Ok(()), foca.handle_data(&encode(data), &mut runtime));
+
+ expect_notification!(runtime, Notification::<ID>::Active);
+ // We didn't know about any mentioned in the packet
+ expect_notification!(runtime, Notification::MemberUp(ID::new(2)));
+ expect_notification!(runtime, Notification::MemberUp(ID::new(3)));
+ expect_notification!(runtime, Notification::MemberUp(ID::new(4)));
+ // But an update about a Down member that we didn't know
+ // about is cluster metadata only and shouldn't trigger
+ // a notification
+ reject_notification!(runtime, Notification::MemberDown(ID::new(5)));
+ // It should, however, trigger a scheduling for forgetting
+ // the member, so that they may rejoin the cluster
+ expect_scheduling!(
+ runtime,
+ Timer::RemoveDown(ID::new(5)),
+ config.remove_down_after
+ );
+
+ // 2 active members from the updates + the member the sent
+ // the payload
+ assert_eq!(3, foca.num_members());
+ let mut members = foca.iter_members().cloned().collect::<Vec<_>>();
+ members.sort_unstable();
+ assert_eq!(vec![ID::new(2), ID::new(3), ID::new(4)], members);
+
+ // Now, whenever we send a message that isn't part of the
+ // join subprotocol (i.e.: not Feed nor Announce)
+ // we should be emitting updates regardless of who we're
+ // sending the message to.
+ runtime.clear();
+ assert_eq!(Ok(()), foca.gossip(&mut runtime));
+
+ // When we gossip, we pick random `num_indirect_probes`
+ // members to send them. And every update is disseminated
+ // at most `max_transmissions` times.
+ //
+ // Since our ids are tiny, we know that every update
+ // we have at the moment (the 3 that we received, plus
+ // the discovery of the sender) will fit in a single
+ // message.
+ //
+ // And since we just verified we have 3 active members,
+ // which is exactly our fan out parameter
+ // (`num_indirect_probes`), we expect that every
+ // call to `gossip()` will drain 3 from the transmission
+ // count of each update.
+ //
+ // So now we have `max_transmissions - 3` remaining
+ // transmissions for each update.
+ let mut remaining_tx = usize::from(max_transmissions.get()) - foca.num_members();
+
+ assert_eq!(
+ 4,
+ foca.updates_backlog(),
+ "We should still have 4 updates in the backlog"
+ );
+
+ // let's gossip some more until we're in a more interesting
+ // scenario
+ while remaining_tx >= foca.num_members() {
+ assert_eq!(Ok(()), foca.gossip(&mut runtime));
+
+ remaining_tx -= foca.num_members();
+ // So long as we have remaining_tx, the backlog should
+ // remain the same
+ assert_eq!(4, foca.updates_backlog());
+ }
+
+ assert!(remaining_tx < foca.num_members() && remaining_tx > 0);
+ assert_eq!(4, foca.updates_backlog());
+
+ // Now we sent enough broadcasts that we'll finally see the updates
+ // backlog tank.
+ // Since max_transmissions is set to 10 and every gossip() call
+ // dropped 3:
+ assert_eq!(1, remaining_tx);
+
+ // Which means that the next gossip round should not only
+ // finally drain the backlog: only one of the three Gossip
+ // messages sent will contain our 4 updates. The remaining
+ // should have no update at all.
+ // (The value of an empty gossip message is questionable, but
+ // since a valid message counts as a valid update it
+ // essentially helps disseminate the knowledge of our
+ // existance)
+ runtime.clear();
+ assert_eq!(Ok(()), foca.gossip(&mut runtime));
+
+ let mut gossip_with_updates = 0;
+ let mut empty_gossip = 0;
+
+ for (_dst, data) in runtime.take_all_data() {
+ let (header, updates) = decode(data);
+
+ assert_eq!(Message::Gossip, header.message);
+ if updates.is_empty() {
+ empty_gossip += 1;
+ } else {
+ gossip_with_updates += 1;
+ }
+ }
+
+ assert_eq!(1, gossip_with_updates);
+ assert_eq!(2, empty_gossip);
+
+ assert_eq!(0, foca.updates_backlog());
+ }
+
+ #[test]
+ fn new_down_member_triggers_remove_down_scheduling() -> Result<()> {
+ let mut foca = Foca::new(ID::new(1), config(), rng(), codec());
+ let mut runtime = InMemoryRuntime::new();
+
+ // ID::new(2) is new and down
+ foca.apply(Member::down(ID::new(2)), &mut runtime)?;
+ expect_scheduling!(
+ runtime,
+ Timer::RemoveDown(ID::new(2)),
+ config().remove_down_after
+ );
+
+ // We already know about ID::new(2) being down. So we don't
+ // want to schedule anything
+ foca.apply(Member::down(ID::new(2)), &mut runtime)?;
+ assert_eq!(
+ None,
+ runtime.take_scheduling(Timer::RemoveDown(ID::new(2))),
+ "Must not duplicate removal scheduling"
+ );
+
+ // A new _active_ member must not trigger the scheduling
+ foca.apply(Member::alive(ID::new(3)), &mut runtime)?;
+ assert_eq!(
+ None,
+ runtime.take_scheduling(Timer::RemoveDown(ID::new(3))),
+ "Must not schedule removal of active member ID=3"
+ );
+
+ // But it should trigger if we change it to down via an update
+ foca.apply(Member::down(ID::new(3)), &mut runtime)?;
+ expect_scheduling!(
+ runtime,
+ Timer::RemoveDown(ID::new(3)),
+ config().remove_down_after
+ );
+
+ Ok(())
+ }
+
+ #[test]
+ fn notification_triggers() -> Result<()> {
+ let mut foca = Foca::new(ID::new(1), config(), rng(), codec());
+ let mut runtime = InMemoryRuntime::new();
+
+ // Brand new member. The first in our set, so we should
+ // also be notified about going active
+ foca.apply(Member::alive(ID::new(2)), &mut runtime)?;
+ expect_notification!(runtime, Notification::MemberUp(ID::new(2)));
+ expect_notification!(runtime, Notification::<ID>::Active);
+
+ // Updated/stale knowledge about an active member shouldn't
+ // trigger a notification so long as it doesn't go down
+ runtime.clear();
+ foca.apply(Member::alive(ID::new(2)), &mut runtime)?;
+ foca.apply(Member::suspect(ID::new(2)), &mut runtime)?;
+ foca.apply(Member::new(ID::new(2), 10, State::Alive), &mut runtime)?;
+ reject_notification!(runtime, Notification::MemberUp(ID::new(2)));
+ reject_notification!(runtime, Notification::MemberDown(ID::new(2)));
+
+ // Another new member
+ runtime.clear();
+ foca.apply(Member::suspect(ID::new(3)), &mut runtime)?;
+ expect_notification!(runtime, Notification::MemberUp(ID::new(3)));
+ reject_notification!(runtime, Notification::<ID>::Active);
+
+ // Existing member going down
+ runtime.clear();
+ foca.apply(Member::down(ID::new(3)), &mut runtime)?;
+ expect_notification!(runtime, Notification::MemberDown(ID::new(3)));
+
+ // A stale update should trigger no notification
+ runtime.clear();
+ foca.apply(Member::down(ID::new(3)), &mut runtime)?;
+ reject_notification!(runtime, Notification::MemberDown(ID::new(3)));
+
+ // A new member, but already down, so no notification
+ runtime.clear();
+ foca.apply(Member::down(ID::new(4)), &mut runtime)?;
+ reject_notification!(runtime, Notification::MemberDown(ID::new(4)));
+
+ // Last active member going down, we're going idle
+ runtime.clear();
+ assert_eq!(1, foca.num_members());
+ foca.apply(Member::down(ID::new(2)), &mut runtime)?;
+ expect_notification!(runtime, Notification::MemberDown(ID::new(2)));
+ expect_notification!(runtime, Notification::<ID>::Idle);
+
+ // New active member, going back to active
+ runtime.clear();
+ foca.apply(Member::alive(ID::new(5)), &mut runtime)?;
+ expect_notification!(runtime, Notification::MemberUp(ID::new(5)));
+ expect_notification!(runtime, Notification::<ID>::Active);
+
+ // Now someone declared us (ID=1) down, we should
+ // go defunct
+ runtime.clear();
+ foca.apply(Member::down(ID::new(1)), &mut runtime)?;
+ expect_notification!(runtime, Notification::<ID>::Defunct);
+ // But since we're not part of the member list, there shouldn't
+ // be a notification about our id going down
+ reject_notification!(runtime, Notification::MemberDown(ID::new(1)));
+
+ // While defunct, we can still maintain members,
+ runtime.clear();
+ foca.apply(Member::down(ID::new(5)), &mut runtime)?;
+ expect_notification!(runtime, Notification::MemberDown(ID::new(5)));
+
+ foca.apply(Member::alive(ID::new(6)), &mut runtime)?;
+ expect_notification!(runtime, Notification::MemberUp(ID::new(6)));
+
+ // But until manual intervention happens, we are not active
+ reject_notification!(runtime, Notification::<ID>::Active);
+
+ assert_eq!(Ok(()), foca.reuse_down_identity());
+ // Now since we are not defunct anymore, any message
+ // received, even if it's a stale update should
+ // notify that we're active again
+ runtime.clear();
+ assert_eq!(1, foca.num_members());
+ foca.apply(Member::alive(ID::new(6)), &mut runtime)?;
+ expect_notification!(runtime, Notification::<ID>::Active);
+
+ Ok(())
+ }
+
+ #[test]
+ fn not_submitting_indirect_probe_timer_causes_probe_error() -> Result<()> {
+ // The probe cycle requires two timer events:
+ //
+ // 1. Timer::ProbeRandomMember, which starts the probe
+ // 2. Timer::SendIndirectProbe, which sends indirect
+ // probes IFF we haven't received a direct reply
+ //
+ // This test verifies that not submitting the second
+ // timer event causes an error.
+ let mut foca = Foca::new(ID::new(1), config(), rng(), codec());
+ let mut runtime = InMemoryRuntime::new();
+
+ // Add an active member so that the probing can start
+ foca.apply(Member::alive(ID::new(2)), &mut runtime)?;
+ let probe_random_member = Timer::ProbeRandomMember(foca.timer_token());
+ expect_scheduling!(runtime, probe_random_member.clone(), config().probe_period);
+
+ // Start the probe now, instead of after `probe_period`
+ runtime.clear();
+ assert_eq!(
+ Ok(()),
+ foca.handle_timer(probe_random_member.clone(), &mut runtime)
+ );
+
+ // Which should instruct the runtime to trigger the second stage of
+ // the probe after `probe_rtt`
+ expect_scheduling!(
+ runtime,
+ Timer::SendIndirectProbe {
+ probed_id: ID::new(2),
+ token: foca.timer_token(),
+ },
+ config().probe_rtt
+ );
+
+ // But instead of triggering send_indirect_probe as instructed
+ // we'll trigger probe_random_member again, simulating a
+ // broken runtime
+ assert_eq!(
+ Err(Error::IncompleteProbeCycle),
+ foca.handle_timer(probe_random_member, &mut runtime)
+ );
+
+ Ok(())
+ }
+
+ #[test]
+ fn receiving_indirect_for_ourselves_causes_error() {
+ // To pierce holes/partitions in the cluster the protocol
+ // has a mechanism to request a member to talk to another
+ // one on our behalf.
+ //
+ // This test verifies that if someone ask us to talk to
+ // ourselves via this mechanism, an error occurrs.
+ let mut foca = Foca::new(ID::new(1), config(), rng(), codec());
+ let mut runtime = InMemoryRuntime::new();
+
+ let probe_number = foca.probe().probe_number();
+ let indirect_messages = vec![
+ Message::PingReq {
+ target: ID::new(1),
+ probe_number,
+ },
+ Message::IndirectPing {
+ origin: ID::new(1),
+ probe_number,
+ },
+ Message::IndirectAck {
+ target: ID::new(1),
+ probe_number,
+ },
+ Message::ForwardedAck {
+ origin: ID::new(1),
+ probe_number,
+ },
+ ];
+
+ for message in indirect_messages.into_iter() {
+ let bad_header = Header {
+ src: ID::new(2),
+ src_incarnation: 0,
+ dst: ID::new(1),
+ message,
+ };
+
+ assert_eq!(
+ Err(Error::IndirectForOurselves),
+ foca.handle_data(&encode((bad_header, Vec::new())), &mut runtime)
+ );
+ }
+ }
+
+ #[test]
+ fn cant_receive_data_from_same_identity() {
+ let mut foca = Foca::new(ID::new(1), config(), rng(), codec());
+ let mut runtime = InMemoryRuntime::new();
+
+ assert_eq!(
+ Err(Error::DataFromOurselves),
+ foca.handle_data(
+ &encode((
+ Header {
+ src: ID::new(1),
+ src_incarnation: 0,
+ dst: ID::new(1),
+ message: Message::Announce
+ },
+ Vec::new()
+ )),
+ &mut runtime
+ )
+ );
+ }
+
+ #[test]
+ fn cant_receive_announce_with_extra_data() {
+ let mut foca = Foca::new(ID::new(1), config(), rng(), codec());
+ let mut runtime = InMemoryRuntime::new();
+
+ assert_eq!(
+ Err(Error::MalformedPacket),
+ foca.handle_data(
+ &encode((
+ Header {
+ src: ID::new(1),
+ src_incarnation: 0,
+ dst: ID::new(2),
+ message: Message::Announce
+ },
+ Vec::from([Member::alive(ID::new(3))])
+ )),
+ &mut runtime
+ )
+ );
+ }
+
+ #[test]
+ fn announce_to_wrong_id_is_accepted_if_same_prefix() {
+ // Joining a cluster involves sending a Announce message to a
+ // member we know about, that means that we need to know the
+ // exact identity of a member.
+ //
+ // Re-joining a cluster involves either waiting until the
+ // cluster forgets you went down or simply changing your
+ // identity.
+ //
+ // That's when things may get confusing: if we want to be
+ // able to rejoin a cluster fast, we need to be able to change
+ // identities; But if everyone can change identities, how
+ // can we send a valid Announce message?
+ //
+ // To facilitate this, we provide a mechanism relax the
+ // check on Announce messages: if the packet was not addressed
+ // directly to us, but to an identity that "has the same prefix"
+ // we accept it.
+ //
+ // This mechanism is disabled by default. To enable it an
+ // identity must specialize the default implementation of
+ // the `has_same_prefix` method to yield `true` when they
+ // want.
+ //
+ // This test verifies that this mechanism actually works.
+
+ // This is our running Foca instance, with `target_id`. Nobody
+ // in the cluster knows that our bump is 255, but everyone
+ // knows about the ID::new(1) part.
+ let target_id = ID::new_with_bump(1, 255);
+ let codec = BadCodec;
+ let mut foca = Foca::new(target_id, config(), rng(), codec);
+ let mut runtime = InMemoryRuntime::new();
+
+ // Our goal is getting `src` to join `target_id`'s cluster.
+ let src_id = ID::new(2);
+
+ // We'll send a packet destined to the wrong id, not
+ // passing the "has same prefix" check to verify the join
+ // doesn't happen
+ let wrong_dst = ID::new(3);
+ assert!(!target_id.has_same_prefix(&wrong_dst));
+ let data = (
+ Header {
+ src: src_id,
+ src_incarnation: 0,
+ dst: wrong_dst,
+ message: Message::Announce,
+ },
+ Vec::new(),
+ );
+
+ // Whislt it won't cause any errors
+ assert_eq!(Ok(()), foca.handle_data(&encode(data), &mut runtime));
+ // The packet was simply ignored:
+ assert_eq!(0, foca.num_members());
+
+ // Now we'll send it to an identity that matches the same
+ // prefix check
+ let dst = ID::new_with_bump(1, 42);
+ assert_ne!(target_id, dst);
+ assert!(target_id.has_same_prefix(&dst));
+ let data = (
+ Header {
+ src: src_id,
+ src_incarnation: 0,
+ dst,
+ message: Message::Announce,
+ },
+ Vec::new(),
+ );
+ assert_eq!(Ok(()), foca.handle_data(&encode(data), &mut runtime));
+ // So we should've successfully joined
+ assert_eq!(1, foca.num_members());
+ assert!(foca.iter_members().any(|member| member == &src_id));
+ }
+
+ #[test]
+ fn suspicion_refutal() -> Result<()> {
+ let mut foca = Foca::new(ID::new(1), config(), rng(), codec());
+ let mut runtime = InMemoryRuntime::new();
+
+ let original_incarnation = foca.incarnation();
+
+ // Update declaring we are suspect.
+ // We should be able to refute it simply by increasing
+ // our incarnation
+ foca.apply(Member::suspect(ID::new(1)), &mut runtime)?;
+ assert!(original_incarnation < foca.incarnation());
+
+ // Our incarnation may grow until a maximum level
+ foca.apply(
+ Member::new(ID::new(1), Incarnation::MAX - 1, State::Suspect),
+ &mut runtime,
+ )?;
+ assert_eq!(Incarnation::MAX, foca.incarnation());
+
+ // But if we live long enough, we may reach a point where
+ // the incarnation is too high to refute. When this
+ // happens, manual intervention is required.
+ foca.apply(
+ Member::new(ID::new(1), Incarnation::MAX, State::Suspect),
+ &mut runtime,
+ )?;
+ assert_eq!(ConnectionState::Undead, foca.connection_state());
+
+ Ok(())
+ }
+
+ #[test]
+ fn change_identity_gossips_immediately() {
+ let mut foca = Foca::new(ID::new(1), config(), rng(), codec());
+ let mut runtime = InMemoryRuntime::new();
+
+ // Introduce a new member so we have someone to gossip to
+ assert_eq!(Ok(()), foca.apply(Member::alive(ID::new(2)), &mut runtime));
+
+ assert_eq!(Ok(()), foca.change_identity(ID::new(99), &mut runtime));
+
+ assert!(foca.updates_backlog() > 0);
+
+ let (header, updates) = decode(
+ runtime
+ .take_data(ID::new(2))
+ .expect("Should have sent a message to ID=2"),
+ );
+
+ assert_eq!(Message::Gossip, header.message);
+ assert!(!updates.is_empty());
+ }
+
+ // Simple helper to ease testing of the probe cycle
+ // Yields:
+ // .0: a foca instance, ID=1, with `num_members` active members
+ // .1: the member being probed
+ // .2: the event (ProbeRandomMember) to submit in order
+ // to continue the probe cycle
+ fn craft_probing_foca(
+ num_members: u8,
+ ) -> (
+ Foca<ID, BadCodec, SmallRng, NoCustomBroadcast>,
+ ID,
+ Timer<ID>,
+ ) {
+ let mut foca = Foca::new(ID::new(1), config(), rng(), codec());
+ let mut runtime = InMemoryRuntime::new();
+
+ assert!(num_members > 0);
+ // Assume some members exist
+ for smallid in 2..(num_members + 2) {
+ foca.apply(Member::alive(ID::new(smallid)), &mut runtime)
+ .expect("infallible");
+ }
+
+ // The runtime shoud've been instructed to schedule a
+ // probe for later on
+ let expected_timer = Timer::ProbeRandomMember(foca.timer_token());
+ expect_scheduling!(runtime, expected_timer.clone(), config().probe_period);
+
+ // We'll trigger it right now instead
+ runtime.clear();
+ assert_eq!(Ok(()), foca.handle_timer(expected_timer, &mut runtime));
+
+ let probed = *foca.probe().target().expect("Probe should have started");
+
+ // Now we know which member is being probed. So we can verify
+ // that a ping message was sent to it:
+ let (header, _updates) = decode(
+ runtime
+ .take_data(probed)
+ .expect("Should have initiated a probe"),
+ );
+ assert!(matches!(header.message, Message::Ping(_)));
+
+ // We should also have received a scheduling request
+ // for when we should trigger the second stage of the
+ // probe
+ let send_indirect_probe = Timer::SendIndirectProbe {
+ probed_id: probed,
+ token: foca.timer_token(),
+ };
+ expect_scheduling!(runtime, send_indirect_probe.clone(), config().probe_rtt);
+
+ (foca, probed, send_indirect_probe)
+ }
+
+ #[test]
+ fn probe_ping_ack_cycle() {
+ let (mut foca, probed, send_indirect_probe) = craft_probing_foca(5);
+ let mut runtime = InMemoryRuntime::new();
+
+ // Now if probed replies before the timer fires, the probe
+ // should complete and the indirect probe cycle shouldn't
+ // start.
+ let data = (
+ Header {
+ src: probed,
+ src_incarnation: Incarnation::default(),
+ dst: ID::new(1),
+ message: Message::Ack(foca.probe().probe_number()),
+ },
+ Vec::new(),
+ );
+ assert_eq!(Ok(()), foca.handle_data(&encode(data), &mut runtime));
+
+ assert_eq!(Ok(()), foca.handle_timer(send_indirect_probe, &mut runtime));
+
+ assert!(
+ foca.probe().succeeded(),
+ "probe should have succeeded after Ack"
+ );
+ }
+
+ #[test]
+ fn probe_cycle_requires_correct_probe_number() {
+ let (mut foca, probed, send_indirect_probe) = craft_probing_foca(5);
+ let mut runtime = InMemoryRuntime::new();
+
+ let incorrect_probe_number = foca.probe().probe_number() + 1;
+ assert_ne!(incorrect_probe_number, foca.probe().probe_number());
+
+ // An Ack payload akin to the one in `tests::probe_ping_ack_cycle`,
+ // but with an incorrect probe number
+ let data = (
+ Header {
+ src: probed,
+ src_incarnation: Incarnation::default(),
+ dst: ID::new(1),
+ message: Message::Ack(incorrect_probe_number),
+ },
+ Vec::new(),
+ );
+ assert_eq!(Ok(()), foca.handle_data(&encode(data), &mut runtime));
+
+ assert_eq!(Ok(()), foca.handle_timer(send_indirect_probe, &mut runtime));
+
+ assert!(
+ !foca.probe().succeeded(),
+ "Ack with incorrect probe number should be discarded"
+ );
+ }
+
+ #[test]
+ fn probe_valid_indirect_ack_completes_succesfully() {
+ // Like `probe_ping_ack_cycle` but instead of a successful
+ // direct refutal via Ack, we'll stress the indirect mechanism
+ // that kicks off after SendIndirectProbe is accepted
+ let num_indirect_probes = config().num_indirect_probes.get();
+ // We create a cluser with _more_ active members than
+ // `num_indirect_probes + 1` so that we can verify that
+ // we don't send more requests than the configured value.
+ let (mut foca, probed, send_indirect_probe) =
+ craft_probing_foca((num_indirect_probes + 2) as u8);
+ let mut runtime = InMemoryRuntime::new();
+
+ // `probed` did NOT reply with an Ack before the timer
+ assert_eq!(Ok(()), foca.handle_timer(send_indirect_probe, &mut runtime));
+
+ let mut ping_req_dsts = Vec::new();
+ let all_data = runtime.take_all_data();
+ for (to, data) in all_data.into_iter() {
+ let (header, _updates) = decode(data);
+
+ if matches!(
+ header.message,
+ Message::PingReq {
+ target: _,
+ probe_number: _
+ }
+ ) {
+ assert_ne!(
+ to, probed,
+ "Must not request a ping to the member being probed"
+ );
+
+ assert_ne!(
+ to,
+ foca.identity().clone(),
+ "Must not request a ping to ourselves"
+ );
+ ping_req_dsts.push(to);
+ }
+ }
+ assert_eq!(num_indirect_probes, ping_req_dsts.len());
+ runtime.clear();
+
+ // Now the probe can succeed via:
+ //
+ // 1. A direct ack coming from `probed`
+ // 2. An forwarded ack coming from ANY of the members we sent
+ //
+ // For this indirect scenario we'll verify that:
+ //
+ // 1. A ForwardedAck from a member we did NOT send a ping
+ // request to gets ignored
+ //
+ // 2. A well-formed ForwardedAck makes the probe succeed
+ let outsider = ID::new(42);
+ assert!(ping_req_dsts.iter().all(|id| id != &outsider));
+ let forwarded_ack = Message::ForwardedAck {
+ origin: probed,
+ probe_number: foca.probe().probe_number(),
+ };
+
+ assert_eq!(
+ Ok(()),
+ foca.handle_data(
+ &encode((
+ Header {
+ src: outsider,
+ src_incarnation: Incarnation::default(),
+ dst: ID::new(1),
+ message: forwarded_ack.clone(),
+ },
+ Vec::new(),
+ )),
+ &mut runtime,
+ )
+ );
+
+ assert!(
+ !foca.probe().succeeded(),
+ "Must not accept ForwardedAck from outsider"
+ );
+
+ for src in ping_req_dsts.into_iter() {
+ assert_eq!(
+ Ok(()),
+ foca.handle_data(
+ &encode((
+ Header {
+ src,
+ src_incarnation: Incarnation::default(),
+ dst: ID::new(1),
+ message: forwarded_ack.clone(),
+ },
+ Vec::new(),
+ )),
+ &mut runtime,
+ )
+ );
+
+ // Only one ack is necessary for the probe to succeed
+ assert!(
+ foca.probe().succeeded(),
+ "Probe should succeed with any expected ForwardedAck"
+ );
+ }
+ }
+
+ #[test]
+ fn probe_receiving_ping_replies_with_ack() {
+ let mut foca = Foca::new(ID::new(1), config(), rng(), codec());
+ let mut runtime = InMemoryRuntime::new();
+
+ let probe_number = foca.probe().probe_number();
+ let data = (
+ Header {
+ src: ID::new(2),
+ src_incarnation: Incarnation::default(),
+ dst: ID::new(1),
+ message: Message::Ping(probe_number),
+ },
+ Vec::new(),
+ );
+ assert_eq!(Ok(()), foca.handle_data(&encode(data), &mut runtime));
+
+ let (header, _updates) = decode(runtime.take_data(ID::new(2)).unwrap());
+ assert_eq!(header.message, Message::Ack(probe_number));
+ }
+
+ #[test]
+ fn probe_receiving_ping_req_sends_indirect_ping() {
+ let mut foca = Foca::new(ID::new(1), config(), rng(), codec());
+ let mut runtime = InMemoryRuntime::new();
+
+ let probe_number = foca.probe().probe_number();
+ let data = (
+ Header {
+ src: ID::new(2),
+ src_incarnation: Incarnation::default(),
+ dst: ID::new(1),
+ message: Message::PingReq {
+ target: ID::new(3),
+ probe_number,
+ },
+ },
+ Vec::new(),
+ );
+ assert_eq!(Ok(()), foca.handle_data(&encode(data), &mut runtime));
+
+ let (header, _updates) = decode(runtime.take_data(ID::new(3)).unwrap());
+ assert_eq!(
+ header.message,
+ Message::IndirectPing {
+ origin: ID::new(2),
+ probe_number
+ }
+ );
+ }
+
+ #[test]
+ fn probe_receiving_indirect_ping_sends_indirect_ack() {
+ let mut foca = Foca::new(ID::new(1), config(), rng(), codec());
+ let mut runtime = InMemoryRuntime::new();
+
+ let probe_number = foca.probe().probe_number();
+ let data = (
+ Header {
+ src: ID::new(2),
+ src_incarnation: Incarnation::default(),
+ dst: ID::new(1),
+ message: Message::IndirectPing {
+ origin: ID::new(3),
+ probe_number,
+ },
+ },
+ Vec::new(),
+ );
+ assert_eq!(Ok(()), foca.handle_data(&encode(data), &mut runtime));
+
+ let (header, _updates) = decode(runtime.take_data(ID::new(2)).unwrap());
+ assert_eq!(
+ header.message,
+ Message::IndirectAck {
+ target: ID::new(3),
+ probe_number
+ }
+ );
+ }
+
+ #[test]
+ fn probe_receiving_indirect_ack_sends_forwarded_ack() {
+ let mut foca = Foca::new(ID::new(1), config(), rng(), codec());
+ let mut runtime = InMemoryRuntime::new();
+
+ let probe_number = foca.probe().probe_number();
+ let data = (
+ Header {
+ src: ID::new(2),
+ src_incarnation: Incarnation::default(),
+ dst: ID::new(1),
+ message: Message::IndirectAck {
+ target: ID::new(3),
+ probe_number,
+ },
+ },
+ Vec::new(),
+ );
+ assert_eq!(Ok(()), foca.handle_data(&encode(data), &mut runtime));
+
+ let (header, _updates) = decode(runtime.take_data(ID::new(3)).unwrap());
+ assert_eq!(
+ header.message,
+ Message::ForwardedAck {
+ origin: ID::new(2),
+ probe_number
+ }
+ );
+ }
+
+ #[test]
+ fn message_from_aware_suspect_refutes_suspicion() -> Result<()> {
+ // Scenario: 3-member active cluster
+ // - One of the members will suspect the other two
+ // - Only one of the suspected members will learn about the suspicion
+ let mut herd = {
+ let mut herd = Vec::new();
+ let members = [
+ Member::alive(ID::new(1)),
+ Member::alive(ID::new(2)),
+ Member::alive(ID::new(3)),
+ ];
+
+ for member in members.iter().rev() {
+ let mut foca = Foca::new(*member.id(), config(), rng(), codec());
+ foca.apply_many(members.iter().cloned(), InMemoryRuntime::new())?;
+ herd.push(foca)
+ }
+
+ herd
+ };
+
+ let mut foca_one = herd.pop().unwrap();
+ let mut foca_two = herd.pop().unwrap();
+ let mut foca_three = herd.pop().unwrap();
+
+ let one = *foca_one.identity();
+ let two = *foca_two.identity();
+ let three = *foca_three.identity();
+
+ // foca_one starts suspecting two and three
+ let mut runtime = InMemoryRuntime::new();
+ foca_one.apply(Member::suspect(two), &mut runtime)?;
+ foca_one.apply(Member::suspect(three), &mut runtime)?;
+ assert_eq!(2, foca_one.num_members());
+
+ // But only foca_three learns that its being suspected
+ // (Likely learned about ID=2 too, but that's irrelevant)
+ foca_three.apply(Member::suspect(three), &mut runtime)?;
+
+ // `foca_two` messages `foca_one`
+ runtime.clear();
+ assert_eq!(Ok(()), foca_two.announce(one, &mut runtime));
+ let data = runtime
+ .take_data(one)
+ .expect("foca_two sending data to ID::new(1)");
+
+ assert_eq!(Ok(()), foca_one.handle_data(&data, &mut runtime));
+
+ // same for `foca_three`
+ runtime.clear();
+ assert_eq!(Ok(()), foca_three.announce(one, &mut runtime));
+ let data = runtime
+ .take_data(one)
+ .expect("foca_three sending data to ID::new(1)");
+ assert_eq!(Ok(()), foca_one.handle_data(&data, &mut runtime));
+
+ // Now `foca_one` has received messages from both members
+ // and our runtime triggered the timer to change suspect
+ // member to down
+
+ // timer event related to `foca_two`
+ runtime.clear();
+ assert_eq!(
+ Ok(()),
+ foca_one.handle_timer(
+ Timer::ChangeSuspectToDown {
+ member_id: two,
+ incarnation: Incarnation::default(),
+ token: foca_one.timer_token()
+ },
+ &mut runtime
+ )
+ );
+ // foca_two hasn't refuted the suspicion, so `foca_one` should
+ // have marked it as down
+ expect_notification!(runtime, Notification::MemberDown(two));
+ assert_eq!(1, foca_one.num_members());
+ assert!(
+ foca_one.iter_members().all(|id| id != &two),
+ "foca_two shouldn't be in the member list anymore"
+ );
+
+ // But `foca_three` knew about it, and its message should've
+ // been enough to remain active
+ assert_eq!(
+ Ok(()),
+ foca_one.handle_timer(
+ Timer::ChangeSuspectToDown {
+ member_id: three,
+ incarnation: Incarnation::default(),
+ token: foca_one.timer_token()
+ },
+ &mut runtime
+ )
+ );
+ assert_eq!(1, foca_one.num_members());
+ assert!(
+ foca_one.iter_members().any(|id| id == &three),
+ "foca_three should have recovered"
+ );
+
+ Ok(())
+ }
+
+ #[test]
+ fn leave_cluster_gossips_about_our_death() -> Result<()> {
+ let mut foca = Foca::new(ID::new(1), config(), rng(), codec());
+ let mut runtime = InMemoryRuntime::new();
+
+ foca.apply(Member::alive(ID::new(2)), &mut runtime)?;
+
+ assert_eq!(Ok(()), foca.leave_cluster(&mut runtime));
+
+ // Since we only have ID::new(2) as an active member, we know that
+ // `leave_cluster` should have sent a message to it
+ let (header, updates) = decode(
+ runtime
+ .take_data(ID::new(2))
+ .expect("No message for ID::new(2) found"),
+ );
+ assert_eq!(Message::Gossip, header.message);
+
+ assert!(
+ updates
+ .iter()
+ .any(|update| update.id() == &ID::new(1) && update.state() == State::Down),
+ "Gossip message should contain an update about our exit"
+ );
+
+ Ok(())
+ }
+
+ #[test]
+ fn leave_cluster_doesnt_gossip_to_duplicates() -> Result<()> {
+ // We want to gossip to 5 distict members when leaving
+ let config = Config {
+ num_indirect_probes: NonZeroUsize::new(5).unwrap(),
+ ..Config::simple()
+ };
+
+ let mut foca = Foca::new(ID::new(1), config, rng(), codec());
+ let mut runtime = InMemoryRuntime::new();
+
+ // And only have one
+ foca.apply(Member::alive(ID::new(2)), &mut runtime)?;
+
+ assert_eq!(Ok(()), foca.leave_cluster(&mut runtime));
+
+ assert!(
+ runtime.take_data(ID::new(2)).is_some(),
+ "Should have one message for ID::new(2)"
+ );
+ assert!(
+ runtime.take_data(ID::new(2)).is_none(),
+ "But never more than one to the same member"
+ );
+
+ Ok(())
+ }
+
+ #[test]
+ fn auto_rejoin_behaviour() {
+ let mut foca = Foca::new(ID::new(1).rejoinable(), config(), rng(), codec());
+ let mut runtime = InMemoryRuntime::new();
+
+ let updates = [
+ // New known members
+ Member::alive(ID::new(2)),
+ Member::alive(ID::new(3)),
+ Member::alive(ID::new(4)),
+ // Us, being down
+ Member::down(ID::new(1)),
+ ];
+
+ assert_eq!(
+ Ok(()),
+ foca.apply_many(updates.iter().cloned(), &mut runtime)
+ );
+
+ // Change our identity
+ let expected_new_id = ID::new_with_bump(1, 1);
+ assert_eq!(&expected_new_id, foca.identity());
+ expect_notification!(runtime, Notification::Rejoin(expected_new_id));
+ reject_notification!(runtime, Notification::<ID>::Defunct);
+
+ // And disseminate our new identity to K members
+ let to_send = runtime.take_all_data();
+ assert!(to_send.into_iter().any(|(_dst, data)| {
+ let (header, _updates) = decode(data);
+ header.message == Message::Gossip
+ }));
+ }
+
+ // TODO duplicate renew() identity no error test
+
+ #[test]
+ fn more_data_than_allowed_causes_error() {
+ let config = config();
+ let max_bytes = config.max_packet_size.get();
+
+ let mut foca = Foca::new(ID::new(1), config, rng(), codec());
+
+ let large_data = vec![42u8; max_bytes + 1];
+
+ assert_eq!(
+ Err(Error::DataTooBig),
+ foca.handle_data(&large_data[..], InMemoryRuntime::new())
+ );
+
+ assert_eq!(Err(Error::DataTooBig), foca.add_broadcast(&large_data[..]));
+ }
+
+ #[test]
+ fn cant_use_broadcasts_by_default() {
+ let mut foca = Foca::new(ID::new(1), config(), rng(), codec());
+ assert!(foca.add_broadcast(b"foo").is_err());
+ }
+
+ #[test]
+ fn trailing_data_is_error() {
+ // We'll prepare some data that's actually valid
+ let valid_data = encode((
+ Header {
+ src: ID::new(2),
+ src_incarnation: Incarnation::default(),
+ dst: ID::new(1),
+ message: Message::Ping(0),
+ },
+ vec![Member::alive(ID::new(3)), Member::down(ID::new(4))],
+ ));
+
+ let mut foca = Foca::new(ID::new(1), config(), rng(), codec());
+
+ assert_eq!(
+ Ok(()),
+ foca.handle_data(valid_data.as_ref(), InMemoryRuntime::new()),
+ "valid_data should be valid :-)"
+ );
+
+ // Now we'll append some rubbish to it, so that everything
+ // is still valid up the trash.
+ let mut bad_data = Vec::from(valid_data.as_ref());
+
+ // A single trailing byte should be enough to trigger an error
+ bad_data.push(0);
+
+ assert_eq!(
+ Err(Error::CustomBroadcast(anyhow::Error::msg(
+ BroadcastsDisabledError
+ ))),
+ foca.handle_data(bad_data.as_ref(), InMemoryRuntime::new()),
+ );
+ }
+
+ #[test]
+ fn custom_broadcast() {
+ // Here we'll do some basic testing of the custom broadcast
+ // functionality.
+
+ // This the item that gets broadcast. It's pretty useless
+ // as it is: just an identifier and a version.
+ #[derive(Debug)]
+ struct VersionedKey {
+ // A realistic broadcast would contain actual data
+ // but we're not interested in the contents here,
+ // just how it all behaves.
+ data: [u8; 10],
+ }
+
+ impl VersionedKey {
+ fn new(key: u64, version: u16) -> Self {
+ let mut data = [0u8; 10];
+ let mut buf = &mut data[..];
+ buf.put_u64(key);
+ buf.put_u16(version);
+ Self { data }
+ }
+
+ fn key(&self) -> u64 {
+ let mut buf = &self.data[..];
+ buf.get_u64()
+ }
+
+ fn version(&self) -> u16 {
+ let mut buf = &self.data[8..];
+ buf.get_u16()
+ }
+
+ fn from_bytes(mut src: impl Buf) -> core::result::Result<Self, &'static str> {
+ if src.remaining() < 10 {
+ Err("buffer too small")
+ } else {
+ let mut data = [0u8; 10];
+ let mut buf = &mut data[..];
+ buf.put_u64(src.get_u64());
+ buf.put_u16(src.get_u16());
+ Ok(Self { data })
+ }
+ }
+ }
+
+ // Invalidation based on same key => higher version
+ impl Invalidates for VersionedKey {
+ fn invalidates(&self, other: &Self) -> bool {
+ self.key() == other.key() && self.version() > other.version()
+ }
+ }
+
+ impl AsRef<[u8]> for VersionedKey {
+ fn as_ref(&self) -> &[u8] {
+ &self.data[..]
+ }
+ }
+
+ // Notice how if we don't need to cache the full broadcast here
+ // if VersionKey was very large it wouldn't matter: all we care
+ // about here is whether the broadcast is new information or
+ // not.
+ use alloc::collections::BTreeMap;
+ struct Handler(BTreeMap<u64, u16>);
+
+ impl BroadcastHandler for Handler {
+ type Broadcast = VersionedKey;
+
+ type Error = &'static str;
+
+ fn receive_item(
+ &mut self,
+ data: impl Buf,
+ ) -> core::result::Result<Option<Self::Broadcast>, Self::Error> {
+ let decoded = VersionedKey::from_bytes(data)?;
+
+ let is_new_information = self
+ .0
+ .get(&decoded.key())
+ // If the version we know about is smaller
+ .map(|&version| version < decoded.version())
+ // Or we never seen the key before
+ .unwrap_or(true);
+
+ if is_new_information {
+ self.0.insert(decoded.key(), decoded.version());
+ Ok(Some(decoded))
+ } else {
+ Ok(None)
+ }
+ }
+ }
+
+ // Now we can get use our custom broadcasts
+ let mut foca = Foca::with_custom_broadcast(
+ ID::new(1),
+ config(),
+ rng(),
+ codec(),
+ Handler(BTreeMap::new()),
+ );
+
+ assert!(
+ foca.add_broadcast(b"hue").is_err(),
+ "Adding garbage shouldn't work"
+ );
+
+ assert_eq!(
+ Ok(()),
+ foca.add_broadcast(VersionedKey::new(420, 0).as_ref()),
+ );
+
+ assert_eq!(
+ 1,
+ foca.custom_broadcast_backlog(),
+ "Adding a new custom broadcast should increase the backlog"
+ );
+
+ assert_eq!(
+ Ok(()),
+ foca.add_broadcast(VersionedKey::new(420, 1).as_ref()),
+ );
+
+ assert_eq!(
+ 1,
+ foca.custom_broadcast_backlog(),
+ "But receiving a new version should simply replace the existing one"
+ );
+
+ // Let's add one more custom broadcast because testing with N=1
+ // is pretty lousy :-)
+ assert_eq!(
+ Ok(()),
+ foca.add_broadcast(VersionedKey::new(710, 1).as_ref()),
+ );
+
+ // Now let's see if the custom broadcasts actually get
+ // disseminated.
+ let other_id = ID::new(2);
+ let mut other_foca = Foca::with_custom_broadcast(
+ other_id,
+ config(),
+ rng(),
+ codec(),
+ Handler(BTreeMap::new()),
+ );
+
+ // Teach the original foca about this new `other_foca`
+ let mut runtime = InMemoryRuntime::new();
+ assert_eq!(Ok(()), foca.apply(Member::alive(other_id), &mut runtime));
+
+ // Now foca will talk to other_foca. The encoded data
+ // should contain our custom broadcasts.
+ assert_eq!(Ok(()), foca.gossip(&mut runtime));
+ let data_for_another_foca = runtime
+ .take_data(other_id)
+ .expect("foca only knows about other_foca");
+
+ assert_eq!(
+ 0,
+ other_foca.custom_broadcast_backlog(),
+ "other_foca custom broadcast backlog should start empty"
+ );
+
+ assert_eq!(
+ Ok(()),
+ other_foca.handle_data(&data_for_another_foca, &mut runtime)
+ );
+
+ assert_eq!(
+ 2,
+ other_foca.custom_broadcast_backlog(),
+ "Should have received two new custom broadcasts"
+ );
+ }
+}
Created src/member.rs
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+extern crate alloc;
+use alloc::vec::Vec;
+
+use rand::{
+ prelude::{IteratorRandom, SliceRandom},
+ Rng,
+};
+
+/// State describes how a Foca instance perceives a member of the cluster.
+///
+/// This is part of the Suspicion Mechanism described in section 4.2 of the
+/// original SWIM paper.
+#[derive(Debug, PartialEq, Clone, Copy)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
+pub enum State {
+ /// Member is active.
+ Alive,
+ /// Member is active, but at least one cluster member
+ /// suspects its down. For all purposes, a `Suspect` member
+ /// is treated as if it were `Alive` until either it
+ /// refutes the suspicion (becoming `Alive`) or fails to
+ /// do so (being declared `Down`).
+ Suspect,
+ /// Confirmed Down.
+ /// A member that reaches this state can't join the cluster
+ /// with the same identity until the cluster forgets
+ /// this knowledge.
+ Down,
+}
+
+/// Incarnation is a member-controlled cluster-global number attached
+/// to a member identity.
+/// A member M's incarnation starts with zero and can only be incremented
+/// by said member M when refuting suspicion.
+pub type Incarnation = u16;
+
+/// A Cluster Member. Also often called "cluster update".
+///
+/// A [`Member`] represents Foca's snapshot knowledge about an
+/// [`crate::Identity`]. An individual cluster update is simply a
+/// serialized Member which other Foca instances receive and use to
+/// update their own cluster state representation.
+#[derive(Debug, Clone, PartialEq)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
+pub struct Member<T> {
+ id: T,
+ incarnation: Incarnation,
+ state: State,
+}
+
+impl<T> Member<T> {
+ /// Initializes a new member.
+ ///
+ /// `id` is an identity used to uniquely identify an individual
+ /// cluster member (say, a primary key).
+ pub fn new(id: T, incarnation: Incarnation, state: State) -> Self {
+ Self {
+ id,
+ incarnation,
+ state,
+ }
+ }
+
+ /// Shortcut for initializing a member as [`State::Alive`].
+ pub fn alive(id: T) -> Self {
+ Self::new(id, Incarnation::default(), State::Alive)
+ }
+
+ #[cfg(test)]
+ pub(crate) fn suspect(id: T) -> Self {
+ Self::new(id, Incarnation::default(), State::Suspect)
+ }
+
+ pub(crate) fn down(id: T) -> Self {
+ Self::new(id, Incarnation::default(), State::Down)
+ }
+
+ /// Getter for the member's Incarnation
+ pub fn incarnation(&self) -> Incarnation {
+ self.incarnation
+ }
+
+ /// Getter for the member's State
+ pub fn state(&self) -> State {
+ self.state
+ }
+
+ /// Getter for the member's identity
+ pub fn id(&self) -> &T {
+ &self.id
+ }
+
+ pub(crate) fn is_active(&self) -> bool {
+ match self.state {
+ State::Alive | State::Suspect => true,
+ State::Down => false,
+ }
+ }
+
+ pub(crate) fn change_state(&mut self, incarnation: Incarnation, state: State) -> bool {
+ if self.can_change(incarnation, state) {
+ self.state = state;
+ self.incarnation = incarnation;
+ true
+ } else {
+ false
+ }
+ }
+
+ fn can_change(&self, other_incarnation: Incarnation, other: State) -> bool {
+ // This implements the order of preference of the Suspicion subprotocol
+ // outlined on section 4.2 of the paper.
+ match self.state {
+ State::Alive => match other {
+ State::Alive => other_incarnation > self.incarnation,
+ State::Suspect => other_incarnation >= self.incarnation,
+ State::Down => true,
+ },
+ State::Suspect => match other {
+ State::Alive => other_incarnation > self.incarnation,
+ State::Suspect => other_incarnation > self.incarnation,
+ State::Down => true,
+ },
+ State::Down => false,
+ }
+ }
+
+ pub(crate) fn into_identity(self) -> T {
+ self.id
+ }
+}
+
+pub(crate) struct Members<T> {
+ inner: Vec<Member<T>>,
+ cursor: usize,
+ num_active: usize,
+}
+
+#[cfg(test)]
+impl<T> Members<T> {
+ pub fn len(&self) -> usize {
+ self.inner.len()
+ }
+}
+
+impl<T: PartialEq + Clone> Members<T> {
+ pub fn num_active(&self) -> usize {
+ self.num_active
+ }
+
+ pub fn new(inner: Vec<Member<T>>) -> Self {
+ // XXX This doesn't prevent someone initializing with
+ // duplicated members... Not a problem (yet?) since
+ // inner is always empty outside of tests
+ let num_active = inner.iter().filter(|member| member.is_active()).count();
+
+ Self {
+ cursor: 0,
+ num_active,
+ inner,
+ }
+ }
+
+ // Next member that's considered active
+ // Chosen at random (shuffle + round-robin)
+ pub fn next(&mut self, mut rng: impl Rng) -> Option<&Member<T>> {
+ // Round-robin with a shuffle at the end
+ if self.cursor >= self.inner.len() {
+ self.inner.shuffle(&mut rng);
+ self.cursor = 0;
+ }
+
+ // Find an active member from cursor..len()
+ let position = self
+ .inner
+ .iter()
+ .skip(self.cursor)
+ .position(|m| m.is_active())
+ // Since we skip(), position() will start counting from zero
+ // this ensures it's actually the index of the chosen member
+ .map(|pos| pos + self.cursor);
+
+ // And if we don't find any: try from 0..cursor
+ let position = position.or_else(|| {
+ self.inner
+ .iter()
+ .take(self.cursor)
+ .position(|m| m.is_active())
+ });
+
+ if let Some(pos) = position {
+ if pos < self.cursor {
+ // We wrapped around the list to find a member. A shuffle
+ // is needed, so we set it to MAX. Any other value could
+ // cause the shuffle to not happen since members may join
+ // in-between probes
+ self.cursor = core::usize::MAX;
+ } else {
+ self.cursor = pos.saturating_add(1);
+ }
+ self.inner.get(pos)
+ } else {
+ None
+ }
+ }
+
+ /// XXX This used to be a next_members() which would make use of the
+ /// already shuffled state and then simply advance the cursor
+ /// to trigger the next shuffle-after-round-robin that next()
+ /// does. However I'm not sure it was a good idea: the point
+ /// of what next() does is giving some sort of determinism giving
+ /// a high chance that every member will be *pinged* periodically
+ /// and using the same logic for other "pick random member"
+ /// mechanisms might break the math.
+ pub fn choose_active_members<F>(
+ &mut self,
+ wanted: usize,
+ output: &mut Vec<Member<T>>,
+ mut rng: impl Rng,
+ picker: F,
+ ) where
+ F: Fn(&T) -> bool,
+ {
+ // Basic reservoir sampling
+ let mut num_chosen = 0;
+ let mut num_seen = 0;
+
+ for member in self.iter_active() {
+ if !picker(member.id()) {
+ continue;
+ }
+
+ num_seen += 1;
+ if num_chosen < wanted {
+ num_chosen += 1;
+ output.push(member.clone());
+ } else {
+ let replace_at = rng.gen_range(0..num_seen);
+ if replace_at < wanted {
+ output[replace_at] = member.clone();
+ }
+ }
+ }
+ }
+
+ pub fn remove_if_down(&mut self, id: &T) -> Option<Member<T>> {
+ let position = self
+ .inner
+ .iter()
+ .position(|member| &member.id == id && member.state == State::Down);
+
+ position.map(|pos| self.inner.swap_remove(pos))
+ }
+
+ pub fn iter_active(&self) -> impl Iterator<Item = &Member<T>> {
+ self.inner.iter().filter(|m| m.is_active())
+ }
+
+ pub fn apply_existing_if<F: Fn(&Member<T>) -> bool>(
+ &mut self,
+ update: Member<T>,
+ condition: F,
+ ) -> Option<ApplySummary> {
+ if let Some(known_member) = self
+ .inner
+ .iter_mut()
+ .find(|member| &member.id == update.id())
+ {
+ if !condition(known_member) {
+ return Some(ApplySummary {
+ is_active_now: known_member.is_active(),
+ apply_successful: false,
+ changed_active_set: false,
+ });
+ }
+ let was_active = known_member.is_active();
+ let apply_successful = known_member.change_state(update.incarnation(), update.state());
+ let is_active_now = known_member.is_active();
+ let changed_active_set = is_active_now != was_active;
+
+ if changed_active_set {
+ // XXX Overzealous checking
+ if is_active_now {
+ self.num_active = self.num_active.saturating_add(1);
+ } else {
+ self.num_active = self.num_active.saturating_sub(1);
+ }
+ }
+
+ Some(ApplySummary {
+ is_active_now,
+ apply_successful,
+ changed_active_set,
+ })
+ } else {
+ None
+ }
+ }
+
+ pub fn apply(&mut self, update: Member<T>, mut rng: impl Rng) -> ApplySummary {
+ self.apply_existing_if(update.clone(), |_member| true)
+ .unwrap_or_else(|| {
+ // Unknown member, we'll register it
+ let is_active_now = update.is_active();
+
+ // Insert at the end and swap with a random position.
+ self.inner.push(update);
+ let inserted_at = self.inner.len() - 1;
+
+ let swap_idx = (0..self.inner.len())
+ .choose(&mut rng)
+ .unwrap_or(inserted_at);
+
+ self.inner.swap(swap_idx, inserted_at);
+
+ if is_active_now {
+ self.num_active = self.num_active.saturating_add(1);
+ }
+
+ ApplySummary {
+ is_active_now,
+ apply_successful: true,
+ // Registering a new active member changes the active set
+ changed_active_set: is_active_now,
+ }
+ })
+ }
+}
+
+#[derive(Debug, Clone, PartialEq)]
+#[must_use]
+pub(crate) struct ApplySummary {
+ pub is_active_now: bool,
+ pub apply_successful: bool,
+ pub changed_active_set: bool,
+}
+
+#[cfg(test)]
+mod tests {
+
+ use super::*;
+
+ use alloc::vec;
+ use rand::{rngs::SmallRng, SeedableRng};
+
+ use State::*;
+
+ #[test]
+ fn alive_transitions() {
+ let mut member = Member::new("a", 0, Alive);
+
+ // Alive => Alive
+ assert!(
+ member.change_state(member.incarnation + 1, Alive),
+ "can transition to a higher incarnation"
+ );
+
+ assert_eq!(1, member.incarnation);
+ assert_eq!(Alive, member.state);
+
+ assert!(
+ !member.change_state(member.incarnation - 1, Alive),
+ "cannot transition to a lower incarnation"
+ );
+
+ assert!(
+ !member.change_state(member.incarnation, Alive),
+ "cannot transition to same state and incarnation {:?}",
+ &member
+ );
+
+ // Alive => Suspect
+ assert!(
+ !member.change_state(member.incarnation - 1, Suspect),
+ "lower suspect incarnation shouldn't transition"
+ );
+
+ assert!(
+ member.change_state(member.incarnation, Suspect),
+ "transition to suspect with same incarnation"
+ );
+ assert_eq!(Suspect, member.state);
+
+ member = Member::new("b", 0, Alive);
+ assert!(
+ member.change_state(member.incarnation + 1, Suspect),
+ "transition to suspect with higher incarnation"
+ );
+ assert_eq!(1, member.incarnation);
+ assert_eq!(Suspect, member.state);
+
+ // Alive => Down, always works
+ assert!(
+ Member::new("c", 1, Alive).change_state(0, Down),
+ "transitions to down on lower incarnation"
+ );
+ assert!(
+ Member::new("c", 0, Alive).change_state(0, Down),
+ "transitions to down on same incarnation"
+ );
+ assert!(
+ Member::new("c", 0, Alive).change_state(1, Down),
+ "transitions to down on higher incarnation"
+ );
+ }
+
+ #[test]
+ fn suspect_transitions() {
+ let mut member = Member::new("a", 0, Suspect);
+
+ // Suspect => Suspect
+ assert!(
+ member.change_state(member.incarnation + 1, Suspect),
+ "can transition to a higher incarnation"
+ );
+
+ assert_eq!(1, member.incarnation);
+ assert_eq!(Suspect, member.state);
+
+ assert!(
+ !member.change_state(member.incarnation - 1, Suspect),
+ "cannot transition to a lower incarnation"
+ );
+
+ assert!(
+ !member.change_state(member.incarnation, Suspect),
+ "cannot transition to same state and incarnation {:?}",
+ &member
+ );
+
+ // Suspect => Alive
+ assert!(
+ !member.change_state(member.incarnation - 1, Alive),
+ "lower alive incarnation shouldn't transition"
+ );
+ assert!(
+ !member.change_state(member.incarnation, Alive),
+ "same alive incarnation shouldn't transition"
+ );
+
+ assert!(
+ member.change_state(member.incarnation + 1, Alive),
+ "can transition to alive with higher incarnation"
+ );
+ assert_eq!(Alive, member.state);
+
+ // Suspect => Down, always works
+ assert!(
+ Member::new("c", 1, Suspect).change_state(0, Down),
+ "transitions to down on lower incarnation"
+ );
+ assert!(
+ Member::new("c", 0, Suspect).change_state(0, Down),
+ "transitions to down on same incarnation"
+ );
+ assert!(
+ Member::new("c", 0, Suspect).change_state(1, Down),
+ "transitions to down on higher incarnation"
+ );
+ }
+
+ #[test]
+ fn down_never_transitions() {
+ let mut member = Member::new("dead", 1, Down);
+
+ for incarnation in 0..=2 {
+ assert!(!member.change_state(incarnation, Alive));
+ assert!(!member.change_state(incarnation, Suspect));
+ assert!(!member.change_state(incarnation, Down));
+ }
+ }
+
+ #[test]
+ fn next_walks_sequentially_then_shuffles() {
+ let ordered_ids = vec![1, 2, 3, 4, 5];
+ let mut members = Members::new(ordered_ids.iter().cloned().map(Member::alive).collect());
+
+ let mut rng = SmallRng::seed_from_u64(0xF0CA);
+
+ for wanted in ordered_ids.iter().cloned() {
+ let got = members
+ .next(&mut rng)
+ .expect("Non-empty set of Alive members should always yield Some()")
+ .id;
+ assert_eq!(wanted, got);
+ }
+
+ // By now we walked through all known live members so
+ // the internal state should've shuffled.
+ // We'll verify that by calling `next()` multiple
+ // times and comparing with the original `ordered_ids`
+ let mut after_shuffle = (0..ordered_ids.len())
+ .map(|_| members.next(&mut rng).unwrap().id)
+ .collect::<Vec<_>>();
+ assert_ne!(ordered_ids, after_shuffle);
+
+ // The shuffle only happens once the cursor walks
+ // through the whole set, so `after_shuffle` should
+ // contain every member, like `ordered_ids`, but in
+ // a distinct order
+ after_shuffle.sort_unstable();
+ assert_eq!(ordered_ids, after_shuffle);
+ }
+
+ #[test]
+ fn apply_existing_if_behaviour() {
+ let mut members = Members::new(Vec::new());
+
+ assert_eq!(
+ None,
+ members.apply_existing_if(Member::alive(1), |_member| true),
+ "Only yield None only if member is not found"
+ );
+
+ let mut rng = SmallRng::seed_from_u64(0xF0CA);
+ let _ = members.apply(Member::alive(1), &mut rng);
+
+ assert_ne!(
+ None,
+ members.apply_existing_if(Member::alive(1), |_member| true),
+ "Must yield Some() if existing, regardless of condition"
+ );
+
+ assert_ne!(
+ None,
+ members.apply_existing_if(Member::alive(1), |_member| false),
+ "Must yield Some() if existing, regardless of condition"
+ );
+ }
+
+ #[test]
+ fn apply_summary_behaviour() {
+ let mut members = Members::new(Vec::new());
+ let mut rng = SmallRng::seed_from_u64(0xF0CA);
+
+ // New and active member
+ let res = members.apply(Member::suspect(1), &mut rng);
+ assert_eq!(
+ ApplySummary {
+ is_active_now: true,
+ apply_successful: true,
+ changed_active_set: true
+ },
+ res,
+ );
+ assert_eq!(1, members.len());
+ assert_eq!(1, members.num_active());
+
+ // Failed attempt to change member id=1 to alive
+ // (since it's already suspect with same incarnation)
+ let res = members.apply(Member::alive(1), &mut rng);
+ assert_eq!(
+ ApplySummary {
+ is_active_now: true,
+ apply_successful: false,
+ changed_active_set: false
+ },
+ res,
+ );
+ assert_eq!(1, members.len());
+
+ // Successful attempt at changing member id=1 to
+ // alive by using a higher incarnation
+ let res = members.apply(Member::new(1, 1, State::Alive), &mut rng);
+ assert_eq!(
+ ApplySummary {
+ is_active_now: true,
+ apply_successful: true,
+ changed_active_set: false
+ },
+ res,
+ );
+ assert_eq!(1, members.len());
+
+ // Change existing member to down
+ let res = members.apply(Member::down(1), &mut rng);
+ assert_eq!(
+ ApplySummary {
+ is_active_now: false,
+ apply_successful: true,
+ changed_active_set: true
+ },
+ res,
+ );
+ assert_eq!(1, members.len());
+ assert_eq!(0, members.num_active());
+
+ // New and inactive member
+ let res = members.apply(Member::down(2), &mut rng);
+ assert_eq!(
+ ApplySummary {
+ is_active_now: false,
+ apply_successful: true,
+ changed_active_set: false
+ },
+ res,
+ );
+ assert_eq!(2, members.len());
+ assert_eq!(0, members.num_active());
+ }
+
+ #[test]
+ fn remove_if_down_works() {
+ let mut members = Members::new(Vec::new());
+ let mut rng = SmallRng::seed_from_u64(0xF0CA);
+
+ assert_eq!(
+ None,
+ members.remove_if_down(&1),
+ "cant remove member that does not exist"
+ );
+ let _ = members.apply(Member::alive(1), &mut rng);
+
+ assert_eq!(
+ None,
+ members.remove_if_down(&1),
+ "cant remove member that isnt down"
+ );
+ let _ = members.apply(Member::down(1), &mut rng);
+
+ assert_eq!(
+ Some(Member::down(1)),
+ members.remove_if_down(&1),
+ "must return the removed member"
+ );
+ }
+
+ #[test]
+ fn next_yields_none_with_no_active_members() {
+ let mut members = Members::new(Vec::new());
+ let mut rng = SmallRng::seed_from_u64(0xF0CA);
+
+ assert_eq!(
+ None,
+ members.next(&mut rng),
+ "next() should yield None when there are no members"
+ );
+
+ let _ = members.apply(Member::down(-1), &mut rng);
+ let _ = members.apply(Member::down(-2), &mut rng);
+ let _ = members.apply(Member::down(-3), &mut rng);
+
+ assert_eq!(
+ None,
+ members.next(&mut rng),
+ "next() should yield None when there are no active members"
+ );
+
+ let _ = members.apply(Member::alive(1), &mut rng);
+
+ for _i in 0..10 {
+ assert_eq!(
+ Some(1),
+ members.next(&mut rng).map(|m| m.id),
+ "next() should yield the same member if its the only active"
+ )
+ }
+ }
+
+ #[test]
+ fn choose_active_members_behaviour() {
+ let mut members = Members::new(Vec::from([
+ // 5 active members
+ Member::alive(1),
+ Member::alive(2),
+ Member::alive(3),
+ Member::suspect(4),
+ Member::suspect(5),
+ // 2 down
+ Member::down(6),
+ Member::down(7),
+ ]));
+
+ assert_eq!(7, members.len());
+ assert_eq!(5, members.num_active());
+
+ let mut out = Vec::new();
+ let mut rng = SmallRng::seed_from_u64(0xF0CA);
+
+ out.clear();
+ members.choose_active_members(0, &mut out, &mut rng, |_| true);
+ assert_eq!(0, out.len(), "Can pointlessly choose 0 members");
+
+ out.clear();
+ members.choose_active_members(10, &mut out, &mut rng, |_| false);
+ assert_eq!(0, out.len(), "Filtering works");
+
+ out.clear();
+ members.choose_active_members(members.len(), &mut out, &mut rng, |_| true);
+ assert_eq!(
+ members.num_active(),
+ out.len(),
+ "Only chooses active members"
+ );
+
+ out.clear();
+ members.choose_active_members(2, &mut out, &mut rng, |_| true);
+ assert_eq!(2, out.len(), "Respects `wanted` even if we have more");
+
+ out.clear();
+ members.choose_active_members(usize::MAX, &mut out, &mut rng, |&member_id| member_id > 4);
+ assert_eq!(vec![Member::suspect(5)], out);
+ }
+}
Created src/payload.rs
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+use crate::Incarnation;
+
+/// The preamble of every datagram sent by Foca.
+///
+/// A foca packet is always:
+///
+/// - A Header. Optionally followed by:
+/// - A `u16` in network byte order to signal how many updates are
+/// expected;
+/// - A sequence of said `u16` updates (`foca::Member`);
+/// - And finally a tail of custom broadcasts, if at all used.
+#[derive(Debug, Clone, PartialEq)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
+pub struct Header<T> {
+ /// The identity of the sender
+ pub src: T,
+ /// The sender's incarnation, so that other cluster members
+ /// may keep it up-to-date
+ pub src_incarnation: Incarnation,
+ /// The target of the message
+ pub dst: T,
+ /// The actual message
+ pub message: Message<T>,
+}
+
+/// Messages are how members request interaction from each other.
+///
+/// There are a few different kind of interactions that may occur:
+///
+/// ## Direct Probe Cycle
+///
+/// Foca will periodically check if members are still active. It
+/// sends a `Ping` to said member and expects an `Ack` in return.
+///
+/// If `B` takes too long to reply with an Ack, the indirect
+/// probe cycle starts.
+///
+/// ## Indirect Probe Cycle
+///
+/// A previously pinged member may be too busy, its reply may have been
+/// dropped by an unreliable network or maybe it's actually down.
+///
+/// The indirect probe cycle helps with getting more certainty about
+/// its current state by asking other members to execute a ping
+/// on our behalf.
+///
+/// Here, member `A` will ask member `C` to ping `B` on their
+/// behalf:
+///
+/// ~~~txt
+/// A ->[PingReq(B)] C
+/// C ->[IndirectPing(A)] B
+/// B ->[IndirectAck(A)] C
+/// C ->[ForwardedAck(B)] A
+/// ~~~
+///
+/// If by the end of the full probe cycle (direct and indirect) Foca
+/// has received either an `Ack` or a `ForwardedAck`, the member is
+/// considered active. Otherwise the member is declared `State::Suspect`
+/// and will need to refute it before the configured deadline
+/// else it will be declared `State::Down`.
+///
+/// ## "Join" sub-protocol
+///
+/// Foca instances can join a cluster by sending `Announce` messages
+/// to one or more identities. If a recipient decides to accept it,
+/// it replies with a `Feed` message, containing other active cluster
+/// members.
+#[derive(Debug, Clone, PartialEq)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
+pub enum Message<T> {
+ /// A Ping message. Replied with `Ack`.
+ Ping(ProbeNumber),
+ /// Acknowledment of a Ping. Reply to `Ping`.
+ Ack(ProbeNumber),
+
+ /// Receiver is being asked to Ping `target` on their behalf
+ /// and should emit `IndirectPing(sender)` to `target`
+ PingReq {
+ /// The identity that failed to reply to the original
+ /// `Ping` in a timely manner.
+ target: T,
+ /// See `foca::ProbeNumber`
+ probe_number: ProbeNumber,
+ },
+
+ /// Analogous to `Ping`, with added metadata about the original
+ /// requesting member.
+ /// Recipient should reply `IndirectAck(origin)` to sender
+ IndirectPing {
+ /// The identity that started the indirect cycle. I.e.:
+ /// whoever sent the unanswered `Ping`.
+ origin: T,
+ /// See `foca::ProbeNumber`
+ probe_number: ProbeNumber,
+ },
+
+ /// Analogous to `Ack`, with added metadta about the final
+ /// destination.
+ /// Recipient should emit `ForwardedAck(sender)` to `target`
+ IndirectAck {
+ /// The identity that started the indirect cycle. I.e.:
+ /// whoever sent the unanswered `Ping`.
+ target: T,
+ /// See `foca::ProbeNumber`
+ probe_number: ProbeNumber,
+ },
+
+ /// The result of a successful indirect probe cycle. Sender
+ /// is indicating that they've managed to ping and receive
+ /// an ack from `origin`
+ ForwardedAck {
+ /// The identity that failed to reply to the original
+ /// `Ping` in a timely manner.
+ origin: T,
+ /// See `foca::ProbeNumber`
+ probe_number: ProbeNumber,
+ },
+
+ /// Request to join a cluster. Replied with `Feed`.
+ Announce,
+ /// Response to a Announce, signals that the remaining bytes in the
+ /// payload will be a sequence of active members, instead of just
+ /// cluster updates. Reply to `Announce`.
+ Feed,
+
+ /// Deliberate dissemination of cluster updates.
+ /// Non-interactive, doesn't expect a reply.
+ Gossip,
+}
+
+/// ProbeNumber is simply a bookkeeping mechanism to try and prevent
+/// incorrect sequencing of protocol messages.
+///
+/// Similar in spirit to `foca::TimerToken`.
+pub type ProbeNumber = u8;
Created src/probe.rs
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+extern crate alloc;
+use alloc::vec::Vec;
+
+use crate::{member::Member, ProbeNumber};
+
+// FIXME This whole thing is ugly AF :(
+
+pub struct Probe<T> {
+ direct: Option<Member<T>>,
+ indirect: Vec<T>,
+ probe_number: ProbeNumber,
+
+ direct_ack_ok: bool,
+ indirect_ack_count: usize,
+
+ reached_indirect_probe_stage: bool,
+}
+
+impl<T: Clone + PartialEq> Probe<T> {
+ pub fn new(indirect: Vec<T>) -> Self {
+ Self {
+ indirect,
+ direct: None,
+ direct_ack_ok: false,
+ indirect_ack_count: 0,
+ reached_indirect_probe_stage: false,
+ probe_number: ProbeNumber::default(),
+ }
+ }
+
+ #[must_use]
+ pub fn start(&mut self, target: Member<T>) -> ProbeNumber {
+ self.clear();
+ self.direct = Some(target);
+ self.probe_number = self.probe_number.wrapping_add(1);
+ self.probe_number
+ }
+
+ pub(crate) fn probe_number(&self) -> ProbeNumber {
+ self.probe_number
+ }
+
+ pub fn clear(&mut self) {
+ self.direct = None;
+ self.indirect.clear();
+ self.direct_ack_ok = false;
+ self.indirect_ack_count = 0;
+ self.reached_indirect_probe_stage = false;
+ // do NOT reset probe_number
+ }
+
+ pub fn mark_indirect_probe_stage_reached(&mut self) {
+ self.reached_indirect_probe_stage = true;
+ }
+
+ pub fn validate(&self) -> bool {
+ // A probe that hasn't been started is
+ // valid
+ self.direct.is_none()
+ // Otherwise it's only valid if the indirect
+ // probing stage has been reached
+ || self.reached_indirect_probe_stage
+ }
+
+ pub fn take_failed(&mut self) -> Option<Member<T>> {
+ if !self.succeeded() {
+ self.direct.take()
+ } else {
+ None
+ }
+ }
+
+ pub fn target(&self) -> Option<&T> {
+ self.direct.as_ref().map(|probed| probed.id())
+ }
+
+ pub fn is_probing(&self, id: &T) -> bool {
+ self.direct
+ .as_ref()
+ .map(|probed| probed.id() == id)
+ .unwrap_or(false)
+ }
+
+ pub fn succeeded(&self) -> bool {
+ self.direct_ack_ok || self.indirect_ack_count > 0
+ }
+
+ pub fn receive_ack(&mut self, from: &T, probeno: ProbeNumber) -> bool {
+ if probeno == self.probe_number
+ && self
+ .direct
+ .as_ref()
+ .map(|direct| direct.id() == from)
+ .unwrap_or(false)
+ {
+ self.direct_ack_ok = true;
+ true
+ } else {
+ false
+ }
+ }
+
+ pub fn expect_indirect_ack(&mut self, from: T) {
+ debug_assert!(self
+ .direct
+ .as_ref()
+ .map(|probed| probed.id() != &from)
+ .unwrap_or(false));
+ self.indirect.push(from);
+ }
+
+ pub fn receive_indirect_ack(&mut self, from: &T, probeno: ProbeNumber) -> bool {
+ if self.probe_number != probeno {
+ return false;
+ }
+
+ if let Some(position) = self.indirect.iter().position(|id| id == from) {
+ self.indirect_ack_count += 1;
+ // Ensure we can't double count the same candidate
+ self.indirect.swap_remove(position);
+ true
+ } else {
+ false
+ }
+ }
+}
Created src/runtime.rs
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+use core::time::Duration;
+
+use crate::{Identity, Incarnation};
+
+/// A Runtime is Foca's gateway to the real world: here is where
+/// implementations decide how to interact with the network, the
+/// hardware timer and the user.
+///
+/// Implementations may react directly to it for a fully synchronous
+/// behavior or accumulate-then-drain when dispatching via fancier
+/// mechanisms like async.
+pub trait Runtime<T: Identity> {
+ /// Whenever something changes Foca's state significantly a
+ /// notification is emitted.
+ ///
+ /// It's the best mechanism to watch for membership changes
+ /// and allows implementors to keep track of the cluster state
+ /// without having direct access to the running Foca instance.
+ ///
+ /// Implementations may completely disregard this if desired.
+ fn notify(&mut self, notification: Notification<T>);
+
+ /// This is how Foca connects to an actual transport.
+ ///
+ /// Implementations are responsible for the actual delivery.
+ fn send_to(&mut self, to: T, data: &[u8]);
+
+ /// Request to schedule the delivery of a given event after
+ /// a specified duration.
+ ///
+ /// Implementations MUST ensure that every event is delivered.
+ /// Foca is very tolerant to delays, but non-delivery will
+ /// cause errors.
+ fn submit_after(&mut self, event: Timer<T>, after: Duration);
+}
+
+// A mutable reference to a Runtime is a Runtime too
+impl<T, R> Runtime<T> for &mut R
+where
+ T: Identity,
+ R: Runtime<T>,
+{
+ fn notify(&mut self, notification: Notification<T>) {
+ R::notify(self, notification)
+ }
+
+ fn send_to(&mut self, to: T, data: &[u8]) {
+ R::send_to(self, to, data)
+ }
+
+ fn submit_after(&mut self, event: Timer<T>, after: Duration) {
+ R::submit_after(self, event, after)
+ }
+}
+
+/// A Notification contains information about high-level relevant
+/// state changes in the cluster or Foca itself.
+#[derive(Debug, Clone, PartialEq)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
+pub enum Notification<T> {
+ /// Foca discovered a new active member with identity T.
+ MemberUp(T),
+ /// A previously active member has been declared down by the cluster.
+ ///
+ /// If Foca detects a down member but didn't know about its activity
+ /// before, this notification will not be emitted.
+ ///
+ /// Can only happen if `MemberUp(T)` happened before.
+ MemberDown(T),
+
+ /// Foca's current identity is known by at least one active member
+ /// of the cluster.
+ ///
+ /// Fired when successfully joining a cluster for the first time and
+ /// every time after a successful identity change.
+ Active,
+
+ /// All known active members have either left the cluster or been
+ /// declared down.
+ Idle,
+
+ /// Foca's current identity has been declared down.
+ ///
+ /// Manual intervention via `Foca::change_identity` or
+ /// `Foca::reuse_down_identity` is required to return to a functioning
+ /// state.
+ Defunct,
+
+ /// Foca automatically changed its identity and rejoined the cluster
+ /// after being declared down.
+ ///
+ /// This happens instead of `Defunct` when identities opt-in on
+ /// `Identity::renew()` functionality.
+ Rejoin(T),
+}
+
+/// Timer is an event that's scheduled by a [`Runtime`]. You won't need
+/// to construct or understand these, just ensure a timely delivery.
+#[derive(Debug, Clone, PartialEq)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
+pub enum Timer<T> {
+ /// Pick a random active member and initiate the probe cycle.
+ ProbeRandomMember(TimerToken),
+
+ /// Send indirect probes if the direct one hasn't completed yet.
+ SendIndirectProbe {
+ /// The current member being probed
+ probed_id: T,
+ /// See `TimerToken`
+ token: TimerToken,
+ },
+
+ /// Transitions member T from Suspect to Down if the incarnation is
+ /// still the same.
+ ChangeSuspectToDown {
+ /// Target member identity
+ member_id: T,
+ /// Its Incarnation the moment the suspicion was raised. If the
+ /// member refutes the suspicion (by increasing its Incarnation),
+ /// this won't match and it won't be declared Down.
+ incarnation: Incarnation,
+ /// See `TimerToken`
+ token: TimerToken,
+ },
+
+ /// Forgets about dead member `T`, allowing them to join the
+ /// cluster again with the same identity.
+ RemoveDown(T),
+}
+
+/// TimerToken is simply a bookkeeping mechanism to try and prevent
+/// reacting to events dispatched that aren't relevant anymore.
+///
+/// Certain interactions may cause Foca to decide to disregard every
+/// event it scheduled previously- so it changes the token in order
+/// to drop everything that doesn't match.
+///
+/// Similar in spirit to [`crate::ProbeNumber`].
+pub type TimerToken = u8;
Created src/testing.rs
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+use alloc::vec::Vec;
+use core::time::Duration;
+
+use bytes::{Buf, BufMut, Bytes};
+
+use crate::{Codec, Header, Identity, Member, Message, Notification, Runtime, State, Timer};
+
+#[derive(Debug, Clone, Copy, PartialOrd, Ord)]
+pub struct ID {
+ id: u8,
+ bump: u8,
+ rejoinable: bool,
+}
+
+impl PartialEq for ID {
+ fn eq(&self, other: &Self) -> bool {
+ // Ignoring `rejoinable` field
+ self.id == other.id && self.bump == other.bump
+ }
+}
+
+impl Eq for ID {}
+
+impl ID {
+ pub fn new(id: u8) -> Self {
+ ID::new_with_bump(id, 0)
+ }
+
+ pub fn new_with_bump(id: u8, bump: u8) -> Self {
+ Self {
+ id,
+ bump,
+ rejoinable: false,
+ }
+ }
+
+ pub fn rejoinable(mut self) -> Self {
+ self.rejoinable = true;
+ self
+ }
+
+ pub fn serialize_into(&self, mut buf: impl BufMut) -> Result<(), BadCodecError> {
+ if buf.remaining_mut() >= 2 {
+ buf.put_u8(self.id);
+ buf.put_u8(self.bump);
+ Ok(())
+ } else {
+ Err(BadCodecError::SerializeInto)
+ }
+ }
+
+ pub fn deserialize_from(mut buf: impl Buf) -> Result<Self, BadCodecError> {
+ if buf.remaining() >= 2 {
+ Ok(Self {
+ id: buf.get_u8(),
+ bump: buf.get_u8(),
+ // Only the identity held by foca cares about this
+ rejoinable: false,
+ })
+ } else {
+ Err(BadCodecError::DeserializeFrom)
+ }
+ }
+}
+
+impl Identity for ID {
+ fn has_same_prefix(&self, other: &Self) -> bool {
+ self.id == other.id
+ }
+
+ fn renew(&self) -> Option<Self> {
+ if self.rejoinable {
+ Some(ID::new_with_bump(self.id, self.bump.wrapping_add(1)).rejoinable())
+ } else {
+ None
+ }
+ }
+}
+
+pub struct BadCodec;
+
+#[derive(Debug, PartialEq)]
+pub enum BadCodecError {
+ BufTooSmall,
+ BadMessageID(u8),
+ BadStateByte(u8),
+ SerializeInto,
+ DeserializeFrom,
+ EncodeHeader,
+ DecodeHeader,
+ DecodeMessage,
+}
+
+impl core::fmt::Display for BadCodecError {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ f.write_fmt(format_args!("{:?}", self))
+ }
+}
+
+impl BadCodec {
+ fn encode_header(
+ &self,
+ header: &Header<ID>,
+ mut buf: impl BufMut,
+ ) -> Result<(), BadCodecError> {
+ if buf.remaining_mut() >= 2 + 2 + 2 {
+ header.src.serialize_into(&mut buf)?;
+ buf.put_u16(header.src_incarnation);
+ header.dst.serialize_into(&mut buf)?;
+ self.encode_message(&header.message, &mut buf)?;
+ Ok(())
+ } else {
+ Err(BadCodecError::EncodeHeader)
+ }
+ }
+
+ fn decode_header(&self, mut buf: impl Buf) -> Result<Header<ID>, BadCodecError> {
+ if buf.remaining() > 2 + 2 + 2 {
+ let src = ID::deserialize_from(&mut buf)?;
+ let src_incarnation = buf.get_u16();
+ let dst = ID::deserialize_from(&mut buf)?;
+ let message = self.decode_message(&mut buf)?;
+
+ Ok(Header {
+ src,
+ src_incarnation,
+ dst,
+ message,
+ })
+ } else {
+ Err(BadCodecError::DecodeHeader)
+ }
+ }
+
+ fn decode_message(&self, mut buf: impl Buf) -> Result<Message<ID>, BadCodecError> {
+ if !buf.has_remaining() {
+ return Err(BadCodecError::BufTooSmall);
+ }
+
+ let message_id = buf.get_u8();
+
+ if message_id < 7 && !buf.has_remaining() {
+ return Err(BadCodecError::DecodeMessage);
+ }
+
+ let message = match message_id {
+ 1 => Message::Ping(buf.get_u8()),
+ 2 => Message::Ack(buf.get_u8()),
+ 3 => {
+ let target = ID::deserialize_from(&mut buf)?;
+ let probe_number = buf.get_u8();
+ Message::PingReq {
+ target,
+ probe_number,
+ }
+ }
+ 4 => {
+ let origin = ID::deserialize_from(&mut buf)?;
+ let probe_number = buf.get_u8();
+ Message::IndirectPing {
+ origin,
+ probe_number,
+ }
+ }
+ 5 => {
+ let target = ID::deserialize_from(&mut buf)?;
+ let probe_number = buf.get_u8();
+ Message::IndirectAck {
+ target,
+ probe_number,
+ }
+ }
+ 6 => {
+ let origin = ID::deserialize_from(&mut buf)?;
+ let probe_number = buf.get_u8();
+ Message::ForwardedAck {
+ origin,
+ probe_number,
+ }
+ }
+ 7 => Message::Gossip,
+ 8 => Message::Announce,
+ 9 => Message::Feed,
+ other => return Err(BadCodecError::BadMessageID(other)),
+ };
+
+ Ok(message)
+ }
+
+ fn encode_message(
+ &self,
+ message: &Message<ID>,
+ mut buf: impl BufMut,
+ ) -> Result<(), BadCodecError> {
+ if buf.remaining_mut() < 2 {
+ return Err(BadCodecError::BufTooSmall);
+ }
+ match message {
+ Message::Ping(ping_nr) => {
+ buf.put_u8(1);
+ buf.put_u8(*ping_nr);
+ }
+ Message::Ack(ping_nr) => {
+ buf.put_u8(2);
+ buf.put_u8(*ping_nr);
+ }
+ Message::PingReq {
+ target,
+ probe_number,
+ } => {
+ buf.put_u8(3);
+ target.serialize_into(&mut buf)?;
+ buf.put_u8(*probe_number);
+ }
+ Message::IndirectPing {
+ origin,
+ probe_number,
+ } => {
+ buf.put_u8(4);
+ origin.serialize_into(&mut buf)?;
+ buf.put_u8(*probe_number);
+ }
+ Message::IndirectAck {
+ target,
+ probe_number,
+ } => {
+ buf.put_u8(5);
+ target.serialize_into(&mut buf)?;
+ buf.put_u8(*probe_number);
+ }
+ Message::ForwardedAck {
+ origin,
+ probe_number,
+ } => {
+ buf.put_u8(6);
+ origin.serialize_into(&mut buf)?;
+ buf.put_u8(*probe_number);
+ }
+ Message::Gossip => {
+ buf.put_u8(7);
+ }
+ Message::Announce => {
+ buf.put_u8(8);
+ }
+ Message::Feed => {
+ buf.put_u8(9);
+ }
+ }
+
+ Ok(())
+ }
+
+ fn decode_member(&self, mut buf: impl Buf) -> Result<Member<ID>, BadCodecError> {
+ let id = ID::deserialize_from(&mut buf)?;
+ let incarnation = buf.get_u16();
+ let state = match buf.get_u8() {
+ 1 => State::Alive,
+ 2 => State::Suspect,
+ 3 => State::Down,
+ other => return Err(BadCodecError::BadStateByte(other)),
+ };
+
+ Ok(Member::new(id, incarnation, state))
+ }
+
+ fn encode_member(
+ &self,
+ member: &Member<ID>,
+ mut buf: impl BufMut,
+ ) -> Result<(), BadCodecError> {
+ // threshold = 2 (ID) + 2 (incarnation) + 1 (state) = 7
+ if buf.remaining_mut() >= 5 {
+ member.id().serialize_into(&mut buf)?;
+ buf.put_u16(member.incarnation());
+ match member.state() {
+ State::Alive => buf.put_u8(1),
+ State::Suspect => buf.put_u8(2),
+ State::Down => buf.put_u8(3),
+ }
+ Ok(())
+ } else {
+ Err(BadCodecError::BufTooSmall)
+ }
+ }
+}
+
+// More like PlzDontFuzzMeCodec amirite
+impl Codec<ID> for BadCodec {
+ type Error = BadCodecError;
+
+ fn encode_header(
+ &mut self,
+ header: &Header<ID>,
+ mut buf: impl BufMut,
+ ) -> Result<(), Self::Error> {
+ BadCodec::encode_header(self, header, &mut buf)?;
+ Ok(())
+ }
+
+ fn decode_header(&mut self, mut buf: impl Buf) -> Result<Header<ID>, Self::Error> {
+ BadCodec::decode_header(self, &mut buf)
+ }
+
+ fn encode_member(
+ &mut self,
+ member: &Member<ID>,
+ mut buf: impl BufMut,
+ ) -> Result<(), Self::Error> {
+ BadCodec::encode_member(self, member, &mut buf)
+ }
+
+ fn decode_member(&mut self, mut buf: impl Buf) -> Result<Member<ID>, Self::Error> {
+ BadCodec::decode_member(self, &mut buf)
+ }
+}
+
+#[derive(Debug, Clone)]
+pub struct InMemoryRuntime {
+ notifications: Vec<Notification<ID>>,
+ to_send: Vec<(ID, Bytes)>,
+ to_schedule: Vec<(Timer<ID>, Duration)>,
+}
+
+impl InMemoryRuntime {
+ pub fn new() -> Self {
+ Self {
+ notifications: Vec::new(),
+ to_send: Vec::new(),
+ to_schedule: Vec::new(),
+ }
+ }
+
+ pub fn clear(&mut self) {
+ self.notifications.clear();
+ self.to_send.clear();
+ self.to_schedule.clear();
+ }
+
+ pub fn take_all_data(&mut self) -> Vec<(ID, Bytes)> {
+ core::mem::take(&mut self.to_send)
+ }
+
+ pub fn take_data(&mut self, dst: ID) -> Option<Bytes> {
+ let position = self.to_send.iter().position(|(to, _data)| to == &dst)?;
+
+ let taken = self.to_send.swap_remove(position);
+ Some(taken.1)
+ }
+
+ pub fn take_notification(&mut self, wanted: Notification<ID>) -> Option<Notification<ID>> {
+ let position = self
+ .notifications
+ .iter()
+ .position(|notification| notification == &wanted)?;
+
+ let taken = self.notifications.swap_remove(position);
+ Some(taken)
+ }
+
+ pub fn take_scheduling(&mut self, timer: Timer<ID>) -> Option<Duration> {
+ let position = self
+ .to_schedule
+ .iter()
+ .position(|(event, _when)| event == &timer)?;
+
+ let taken = self.to_schedule.swap_remove(position);
+ Some(taken.1)
+ }
+}
+
+impl Runtime<ID> for InMemoryRuntime {
+ fn notify(&mut self, notification: Notification<ID>) {
+ self.notifications.push(notification);
+ }
+
+ fn send_to(&mut self, to: ID, data: &[u8]) {
+ self.to_send.push((to, Bytes::copy_from_slice(data)));
+ }
+
+ fn submit_after(&mut self, event: Timer<ID>, after: Duration) {
+ self.to_schedule.push((event, after));
+ }
+}
+
+#[derive(Clone, Copy, PartialEq, Eq, Debug)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
+pub struct TrivialID(u64);
+
+#[cfg(any(feature = "bincode-codec", feature = "postcard-codec"))]
+pub fn verify_codec_roundtrip<C: Codec<TrivialID>>(mut codec: C) -> Result<(), C::Error> {
+ let mut buf = Vec::new();
+
+ let payload = Header {
+ src: TrivialID(1),
+ src_incarnation: 0,
+ dst: TrivialID(2),
+ message: Message::PingReq {
+ target: TrivialID(3),
+ probe_number: 1,
+ },
+ };
+
+ codec.encode_header(&payload, &mut buf)?;
+ let decoded = codec.decode_header(&buf[..])?;
+
+ assert_eq!(payload, decoded);
+
+ buf.clear();
+ let member = Member::new(TrivialID(42), 12, State::Down);
+
+ codec.encode_member(&member, &mut buf)?;
+ let decoded = codec.decode_member(&buf[..])?;
+
+ assert_eq!(member, decoded);
+
+ Ok(())
+}
+
+mod tests {
+ use super::*;
+
+ #[test]
+ fn message_roundtrip() {
+ let messages = [
+ Message::Ping(1),
+ Message::Ack(2),
+ Message::PingReq {
+ target: ID::new(3),
+ probe_number: 4,
+ },
+ Message::IndirectPing {
+ origin: ID::new_with_bump(5, 6),
+ probe_number: 7,
+ },
+ Message::IndirectAck {
+ target: ID::new_with_bump(8, 9),
+ probe_number: 10,
+ },
+ Message::ForwardedAck {
+ origin: ID::new_with_bump(11, 12),
+ probe_number: 13,
+ },
+ Message::Gossip,
+ Message::Announce,
+ Message::Feed,
+ ];
+
+ let codec = BadCodec;
+ let mut buf = alloc::vec![0; 1500];
+
+ for msg in messages.iter() {
+ codec.encode_message(msg, &mut buf[..]).unwrap();
+ let decoded = codec.decode_message(&buf[..]).unwrap();
+ assert_eq!(msg, &decoded);
+ }
+ }
+
+ #[test]
+ fn header_roundtrip() {
+ let header = Header {
+ src: ID::new(0),
+ src_incarnation: 710,
+ dst: ID::new_with_bump(1, 2),
+ message: Message::ForwardedAck {
+ origin: ID::new_with_bump(2, 254),
+ probe_number: 11,
+ },
+ };
+
+ let codec = BadCodec;
+ let mut buf = Vec::new();
+
+ codec.encode_header(&header, &mut buf).unwrap();
+ let decoded = codec.decode_header(&buf[..]).unwrap();
+
+ assert_eq!(header, decoded);
+ }
+
+ #[test]
+ fn member_roundtrip() {
+ for state in [State::Alive, State::Suspect, State::Down] {
+ let member = Member::new(ID::new_with_bump(7, 13), 420, state);
+ let codec = BadCodec;
+ let mut buf = Vec::new();
+
+ codec.encode_member(&member, &mut buf).unwrap();
+ let decoded = codec.decode_member(&buf[..]).unwrap();
+
+ assert_eq!(member, decoded);
+ }
+ }
+}