agreement_tests.rs 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333
  1. #[macro_use]
  2. extern crate anyhow;
  3. extern crate bytes;
  4. extern crate labrpc;
  5. extern crate ruaft;
  6. use rand::{thread_rng, Rng};
  7. mod config;
  8. #[test]
  9. fn basic_agreement() -> config::Result<()> {
  10. const SERVERS: usize = 5;
  11. let cfg = config::make_config(SERVERS, false);
  12. let _guard = cfg.deferred_cleanup();
  13. cfg.begin("Test (2B): basic agreement");
  14. for index in 1..4 {
  15. let committed = cfg.committed_count(index)?;
  16. assert_eq!(0, committed.0, "some have committed before start()");
  17. let commit_index = cfg.one(index as i32 * 100, SERVERS, false)?;
  18. assert_eq!(
  19. index, commit_index,
  20. "got index {} but expected {}",
  21. commit_index, index
  22. );
  23. }
  24. cfg.end();
  25. Ok(())
  26. }
  27. #[test]
  28. fn fail_agree() -> config::Result<()> {
  29. const SERVERS: usize = 3;
  30. let cfg = config::make_config(SERVERS, false);
  31. let _guard = cfg.deferred_cleanup();
  32. cfg.begin("Test (2B): agreement despite follower disconnection");
  33. cfg.one(101, SERVERS, false)?;
  34. // follower network disconnection
  35. let leader = cfg.check_one_leader()?;
  36. cfg.disconnect((leader + 1) % SERVERS);
  37. // agree despite one disconnected server?
  38. cfg.one(102, SERVERS - 1, false)?;
  39. cfg.one(103, SERVERS - 1, false)?;
  40. config::sleep_election_timeouts(1);
  41. cfg.one(104, SERVERS - 1, false)?;
  42. cfg.one(105, SERVERS - 1, false)?;
  43. // re-connect
  44. cfg.connect((leader + 1) % SERVERS);
  45. // agree with full set of servers?
  46. cfg.one(106, SERVERS, true)?;
  47. config::sleep_election_timeouts(1);
  48. cfg.one(107, SERVERS, true)?;
  49. cfg.end();
  50. Ok(())
  51. }
  52. #[test]
  53. fn fail_no_agree() -> config::Result<()> {
  54. const SERVERS: usize = 5;
  55. let cfg = config::make_config(SERVERS, false);
  56. let _guard = cfg.deferred_cleanup();
  57. cfg.begin("Test (2B): no agreement if too many followers disconnect");
  58. cfg.one(10, SERVERS, false)?;
  59. // 3 of 5 followers disconnect
  60. let leader = cfg.check_one_leader()?;
  61. cfg.disconnect((leader + 1) % SERVERS);
  62. cfg.disconnect((leader + 2) % SERVERS);
  63. cfg.disconnect((leader + 3) % SERVERS);
  64. let result = cfg.leader_start(leader, 20);
  65. assert!(result.is_some(), "leader rejected start()");
  66. let index = result.unwrap().1;
  67. assert_eq!(2, index, "expected index 2, got {}", index);
  68. config::sleep_election_timeouts(2);
  69. let (commit_count, _) = cfg.committed_count(index)?;
  70. assert_eq!(
  71. 0, commit_count,
  72. "{} committed but no majority",
  73. commit_count
  74. );
  75. // repair
  76. cfg.connect((leader + 1) % SERVERS);
  77. cfg.connect((leader + 2) % SERVERS);
  78. cfg.connect((leader + 3) % SERVERS);
  79. // the disconnected majority may have chosen a leader from
  80. // among their own ranks, forgetting index 2.
  81. let leader2 = cfg.check_one_leader()?;
  82. let result = cfg.leader_start(leader2, 30);
  83. assert!(result.is_some(), "leader2 rejected start()");
  84. let index = result.unwrap().1;
  85. assert!(index == 2 || index == 3, "unexpected index {}", index);
  86. cfg.one(1000, SERVERS, true)?;
  87. cfg.end();
  88. Ok(())
  89. }
  90. #[test]
  91. fn rejoin() -> config::Result<()> {
  92. const SERVERS: usize = 3;
  93. let cfg = config::make_config(SERVERS, false);
  94. let _guard = cfg.deferred_cleanup();
  95. cfg.begin("Test (2B): rejoin of partitioned leader");
  96. cfg.one(101, SERVERS, true)?;
  97. // leader network failure
  98. let leader1 = cfg.check_one_leader()?;
  99. cfg.disconnect(leader1);
  100. // make old leader try to agree on some entries
  101. cfg.leader_start(leader1, 102);
  102. cfg.leader_start(leader1, 103);
  103. cfg.leader_start(leader1, 104);
  104. // new leader commits, also for index=2
  105. cfg.one(103, 2, true)?;
  106. // new leader network failure
  107. let leader2 = cfg.check_one_leader()?;
  108. cfg.disconnect(leader2);
  109. // old leader connected again
  110. cfg.connect(leader1);
  111. cfg.one(104, 2, true)?;
  112. // all together now
  113. cfg.connect(leader2);
  114. cfg.one(105, SERVERS, true)?;
  115. cfg.end();
  116. Ok(())
  117. }
  118. #[test]
  119. fn backup() -> config::Result<()> {
  120. const SERVERS: usize = 5;
  121. let cfg = config::make_config(SERVERS, false);
  122. let _guard = cfg.deferred_cleanup();
  123. cfg.begin(
  124. "Test (2B): leader backs up quickly over incorrect follower logs",
  125. );
  126. cfg.one(thread_rng().gen(), SERVERS, true)?;
  127. // put leader and one follower in a partition
  128. let leader1 = cfg.check_one_leader()?;
  129. cfg.disconnect((leader1 + 2) % SERVERS);
  130. cfg.disconnect((leader1 + 3) % SERVERS);
  131. cfg.disconnect((leader1 + 4) % SERVERS);
  132. // submit lots of commands that won't commit
  133. for _ in 0..SERVERS {
  134. cfg.leader_start(leader1, thread_rng().gen());
  135. }
  136. config::sleep_election_timeouts(2);
  137. cfg.disconnect((leader1 + 0) % SERVERS);
  138. cfg.disconnect((leader1 + 1) % SERVERS);
  139. // allow other partition to recover
  140. cfg.connect((leader1 + 2) % SERVERS);
  141. cfg.connect((leader1 + 3) % SERVERS);
  142. cfg.connect((leader1 + 4) % SERVERS);
  143. // lots of successful commands to new group.
  144. for _ in 0..50 {
  145. cfg.one(thread_rng().gen(), 3, true)?;
  146. }
  147. // now another partitioned leader and one follower
  148. let leader2 = cfg.check_one_leader()?;
  149. let mut other = (leader1 + 2) % SERVERS;
  150. if leader2 == other {
  151. other = (leader2 + 1) % SERVERS;
  152. }
  153. cfg.disconnect(other);
  154. // lots more commands that won't commit
  155. for _ in 0..50 {
  156. cfg.leader_start(leader2, thread_rng().gen());
  157. }
  158. config::sleep_election_timeouts(2);
  159. // bring original leader back to life,
  160. for i in 0..SERVERS {
  161. cfg.disconnect(i);
  162. }
  163. cfg.connect((leader1 + 0) % SERVERS);
  164. cfg.connect((leader1 + 1) % SERVERS);
  165. cfg.connect(other);
  166. // lots of successful commands to new group.
  167. for _ in 0..50 {
  168. cfg.one(thread_rng().gen(), 3, true)?;
  169. }
  170. // now everyone
  171. for i in 0..SERVERS {
  172. cfg.connect(i);
  173. }
  174. cfg.one(thread_rng().gen(), SERVERS, true)?;
  175. cfg.end();
  176. Ok(())
  177. }
  178. #[test]
  179. fn count() -> config::Result<()> {
  180. const SERVERS: usize = 3;
  181. let cfg = config::make_config(SERVERS, false);
  182. let _guard = cfg.deferred_cleanup();
  183. cfg.begin("Test (2B): RPC counts aren't too high");
  184. cfg.check_one_leader()?;
  185. let total = cfg.total_rpcs();
  186. assert!(
  187. total >= 1 && total <= 30,
  188. "too many or few RPCs ({}) to elect initial leader",
  189. total
  190. );
  191. let mut retries = 0;
  192. let (success, total) = loop {
  193. if retries == 5 {
  194. break (false, 0);
  195. }
  196. if retries != 0 {
  197. config::sleep_millis(3000);
  198. }
  199. retries += 1;
  200. let leader = cfg.check_one_leader()?;
  201. let start_total = cfg.total_rpcs();
  202. const ITERS: usize = 10;
  203. let (term, start_index) = match cfg.leader_start(leader, 1) {
  204. Some(pair) => pair,
  205. None => continue,
  206. };
  207. let mut cmds = vec![];
  208. for i in 1..(ITERS + 2) {
  209. let cmd: i32 = thread_rng().gen();
  210. cmds.push(cmd);
  211. let index = match cfg.leader_start(leader, cmd) {
  212. Some((new_term, index)) => {
  213. if new_term == term {
  214. Some(index)
  215. } else {
  216. None
  217. }
  218. }
  219. None => None,
  220. };
  221. if let Some(index) = index {
  222. assert_eq!(start_index + i, index, "start() failed");
  223. } else {
  224. retries = 0;
  225. break;
  226. }
  227. }
  228. if retries == 0 {
  229. continue;
  230. }
  231. for i in 1..(ITERS + 1) {
  232. let cmd = cfg.wait(start_index + i, SERVERS, Some(term))?;
  233. if let Some(cmd) = cmd {
  234. assert_eq!(
  235. cmd,
  236. cmds[i - 1],
  237. "wrong value {} committed for index {}; expected {:?}",
  238. cmd,
  239. start_index + i,
  240. cmds
  241. )
  242. } else {
  243. retries = 0;
  244. break;
  245. }
  246. }
  247. if term != cfg.check_terms()?.expect("terms should be agreed on") {
  248. retries = 0;
  249. }
  250. if retries == 0 {
  251. continue;
  252. }
  253. let diff = cfg.total_rpcs() - start_total;
  254. if diff > (ITERS + 1 + 3) * 3 {
  255. panic!("too many RPCs ({}) for {} entries", diff, ITERS);
  256. }
  257. break (true, cfg.total_rpcs());
  258. };
  259. assert!(success, "term change too often");
  260. config::sleep_election_timeouts(1);
  261. let diff = cfg.total_rpcs() - total;
  262. assert!(diff < 3 * 20, "too many RPCs ({}) for 1 second of idleness");
  263. cfg.end();
  264. Ok(())
  265. }