agreement_tests.rs 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335
  1. #[macro_use]
  2. extern crate anyhow;
  3. extern crate bytes;
  4. extern crate labrpc;
  5. extern crate ruaft;
  6. #[macro_use]
  7. extern crate scopeguard;
  8. use rand::{thread_rng, Rng};
  9. mod config;
  10. #[test]
  11. fn basic_agreement() -> config::Result<()> {
  12. const SERVERS: usize = 5;
  13. let cfg = config::make_config(SERVERS, false);
  14. defer!(cfg.cleanup());
  15. cfg.begin("Test (2B): basic agreement");
  16. for index in 1..4 {
  17. let committed = cfg.committed_count(index)?;
  18. assert_eq!(0, committed.0, "some have committed before start()");
  19. let commit_index = cfg.one(index as i32 * 100, SERVERS, false)?;
  20. assert_eq!(
  21. index, commit_index,
  22. "got index {} but expected {}",
  23. commit_index, index
  24. );
  25. }
  26. cfg.end();
  27. Ok(())
  28. }
  29. #[test]
  30. fn fail_agree() -> config::Result<()> {
  31. const SERVERS: usize = 3;
  32. let cfg = config::make_config(SERVERS, false);
  33. defer!(cfg.cleanup());
  34. cfg.begin("Test (2B): agreement despite follower disconnection");
  35. cfg.one(101, SERVERS, false)?;
  36. // follower network disconnection
  37. let leader = cfg.check_one_leader()?;
  38. cfg.disconnect((leader + 1) % SERVERS);
  39. // agree despite one disconnected server?
  40. cfg.one(102, SERVERS - 1, false)?;
  41. cfg.one(103, SERVERS - 1, false)?;
  42. config::sleep_election_timeouts(1);
  43. cfg.one(104, SERVERS - 1, false)?;
  44. cfg.one(105, SERVERS - 1, false)?;
  45. // re-connect
  46. cfg.connect((leader + 1) % SERVERS);
  47. // agree with full set of servers?
  48. cfg.one(106, SERVERS, true)?;
  49. config::sleep_election_timeouts(1);
  50. cfg.one(107, SERVERS, true)?;
  51. cfg.end();
  52. Ok(())
  53. }
  54. #[test]
  55. fn fail_no_agree() -> config::Result<()> {
  56. const SERVERS: usize = 5;
  57. let cfg = config::make_config(SERVERS, false);
  58. defer!(cfg.cleanup());
  59. cfg.begin("Test (2B): no agreement if too many followers disconnect");
  60. cfg.one(10, SERVERS, false)?;
  61. // 3 of 5 followers disconnect
  62. let leader = cfg.check_one_leader()?;
  63. cfg.disconnect((leader + 1) % SERVERS);
  64. cfg.disconnect((leader + 2) % SERVERS);
  65. cfg.disconnect((leader + 3) % SERVERS);
  66. let result = cfg.leader_start(leader, 20);
  67. assert!(result.is_some(), "leader rejected start()");
  68. let index = result.unwrap().1;
  69. assert_eq!(2, index, "expected index 2, got {}", index);
  70. config::sleep_election_timeouts(2);
  71. let (commit_count, _) = cfg.committed_count(index)?;
  72. assert_eq!(
  73. 0, commit_count,
  74. "{} committed but no majority",
  75. commit_count
  76. );
  77. // repair
  78. cfg.connect((leader + 1) % SERVERS);
  79. cfg.connect((leader + 2) % SERVERS);
  80. cfg.connect((leader + 3) % SERVERS);
  81. // the disconnected majority may have chosen a leader from
  82. // among their own ranks, forgetting index 2.
  83. let leader2 = cfg.check_one_leader()?;
  84. let result = cfg.leader_start(leader2, 30);
  85. assert!(result.is_some(), "leader2 rejected start()");
  86. let index = result.unwrap().1;
  87. assert!(index == 2 || index == 3, "unexpected index {}", index);
  88. cfg.one(1000, SERVERS, true)?;
  89. cfg.end();
  90. Ok(())
  91. }
  92. #[test]
  93. fn rejoin() -> config::Result<()> {
  94. const SERVERS: usize = 3;
  95. let cfg = config::make_config(SERVERS, false);
  96. defer!(cfg.cleanup());
  97. cfg.begin("Test (2B): rejoin of partitioned leader");
  98. cfg.one(101, SERVERS, true)?;
  99. // leader network failure
  100. let leader1 = cfg.check_one_leader()?;
  101. cfg.disconnect(leader1);
  102. // make old leader try to agree on some entries
  103. cfg.leader_start(leader1, 102);
  104. cfg.leader_start(leader1, 103);
  105. cfg.leader_start(leader1, 104);
  106. // new leader commits, also for index=2
  107. cfg.one(103, 2, true)?;
  108. // new leader network failure
  109. let leader2 = cfg.check_one_leader()?;
  110. cfg.disconnect(leader2);
  111. // old leader connected again
  112. cfg.connect(leader1);
  113. cfg.one(104, 2, true)?;
  114. // all together now
  115. cfg.connect(leader2);
  116. cfg.one(105, SERVERS, true)?;
  117. cfg.end();
  118. Ok(())
  119. }
  120. #[test]
  121. fn backup() -> config::Result<()> {
  122. const SERVERS: usize = 5;
  123. let cfg = config::make_config(SERVERS, false);
  124. defer!(cfg.cleanup());
  125. cfg.begin(
  126. "Test (2B): leader backs up quickly over incorrect follower logs",
  127. );
  128. cfg.one(thread_rng().gen(), SERVERS, true)?;
  129. // put leader and one follower in a partition
  130. let leader1 = cfg.check_one_leader()?;
  131. cfg.disconnect((leader1 + 2) % SERVERS);
  132. cfg.disconnect((leader1 + 3) % SERVERS);
  133. cfg.disconnect((leader1 + 4) % SERVERS);
  134. // submit lots of commands that won't commit
  135. for _ in 0..SERVERS {
  136. cfg.leader_start(leader1, thread_rng().gen());
  137. }
  138. config::sleep_election_timeouts(2);
  139. cfg.disconnect((leader1 + 0) % SERVERS);
  140. cfg.disconnect((leader1 + 1) % SERVERS);
  141. // allow other partition to recover
  142. cfg.connect((leader1 + 2) % SERVERS);
  143. cfg.connect((leader1 + 3) % SERVERS);
  144. cfg.connect((leader1 + 4) % SERVERS);
  145. // lots of successful commands to new group.
  146. for _ in 0..50 {
  147. cfg.one(thread_rng().gen(), 3, true)?;
  148. }
  149. // now another partitioned leader and one follower
  150. let leader2 = cfg.check_one_leader()?;
  151. let mut other = (leader1 + 2) % SERVERS;
  152. if leader2 == other {
  153. other = (leader2 + 1) % SERVERS;
  154. }
  155. cfg.disconnect(other);
  156. // lots more commands that won't commit
  157. for _ in 0..50 {
  158. cfg.leader_start(leader2, thread_rng().gen());
  159. }
  160. config::sleep_election_timeouts(2);
  161. // bring original leader back to life,
  162. for i in 0..SERVERS {
  163. cfg.disconnect(i);
  164. }
  165. cfg.connect((leader1 + 0) % SERVERS);
  166. cfg.connect((leader1 + 1) % SERVERS);
  167. cfg.connect(other);
  168. // lots of successful commands to new group.
  169. for _ in 0..50 {
  170. cfg.one(thread_rng().gen(), 3, true)?;
  171. }
  172. // now everyone
  173. for i in 0..SERVERS {
  174. cfg.connect(i);
  175. }
  176. cfg.one(thread_rng().gen(), SERVERS, true)?;
  177. cfg.end();
  178. Ok(())
  179. }
  180. #[test]
  181. fn count() -> config::Result<()> {
  182. const SERVERS: usize = 3;
  183. let cfg = config::make_config(SERVERS, false);
  184. defer!(cfg.cleanup());
  185. cfg.begin("Test (2B): RPC counts aren't too high");
  186. cfg.check_one_leader()?;
  187. let total = cfg.total_rpcs();
  188. assert!(
  189. total >= 1 && total <= 30,
  190. "too many or few RPCs ({}) to elect initial leader",
  191. total
  192. );
  193. let mut retries = 0;
  194. let (success, total) = loop {
  195. if retries == 5 {
  196. break (false, 0);
  197. }
  198. if retries != 0 {
  199. config::sleep_millis(3000);
  200. }
  201. retries += 1;
  202. let leader = cfg.check_one_leader()?;
  203. let start_total = cfg.total_rpcs();
  204. const ITERS: usize = 10;
  205. let (term, start_index) = match cfg.leader_start(leader, 1) {
  206. Some(pair) => pair,
  207. None => continue,
  208. };
  209. let mut cmds = vec![];
  210. for i in 1..(ITERS + 2) {
  211. let cmd: i32 = thread_rng().gen();
  212. cmds.push(cmd);
  213. let index = match cfg.leader_start(leader, cmd) {
  214. Some((new_term, index)) => {
  215. if new_term == term {
  216. Some(index)
  217. } else {
  218. None
  219. }
  220. }
  221. None => None,
  222. };
  223. if let Some(index) = index {
  224. assert_eq!(start_index + i, index, "start() failed");
  225. } else {
  226. retries = 0;
  227. break;
  228. }
  229. }
  230. if retries == 0 {
  231. continue;
  232. }
  233. for i in 1..(ITERS + 1) {
  234. let cmd = cfg.wait(start_index + i, SERVERS, Some(term))?;
  235. if let Some(cmd) = cmd {
  236. assert_eq!(
  237. cmd,
  238. cmds[i - 1],
  239. "wrong value {} committed for index {}; expected {:?}",
  240. cmd,
  241. start_index + i,
  242. cmds
  243. )
  244. } else {
  245. retries = 0;
  246. break;
  247. }
  248. }
  249. if term != cfg.check_terms()?.expect("terms should be agreed on") {
  250. retries = 0;
  251. }
  252. if retries == 0 {
  253. continue;
  254. }
  255. let diff = cfg.total_rpcs() - start_total;
  256. if diff > (ITERS + 1 + 3) * 3 {
  257. panic!("too many RPCs ({}) for {} entries", diff, ITERS);
  258. }
  259. break (true, cfg.total_rpcs());
  260. };
  261. assert!(success, "term change too often");
  262. config::sleep_election_timeouts(1);
  263. let diff = cfg.total_rpcs() - total;
  264. assert!(diff < 3 * 20, "too many RPCs ({}) for 1 second of idleness");
  265. cfg.end();
  266. Ok(())
  267. }