persist_tests.rs 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427
  1. #![allow(clippy::identity_op)]
  2. use std::sync::atomic::AtomicBool;
  3. use std::sync::atomic::Ordering;
  4. use std::sync::Arc;
  5. use rand::{thread_rng, Rng};
  6. use scopeguard::defer;
  7. use test_configs::{make_config, raft::config};
  8. #[test]
  9. fn persist1() -> config::Result<()> {
  10. const SERVERS: usize = 5;
  11. let cfg = make_config!(SERVERS, false);
  12. defer!(cfg.cleanup());
  13. cfg.begin("Test (2C): basic persistence");
  14. cfg.one(11, SERVERS, true)?;
  15. // crash and re-start all
  16. for i in 0..SERVERS {
  17. cfg.start1(i)?;
  18. }
  19. for i in 0..SERVERS {
  20. cfg.disconnect(i);
  21. cfg.connect(i);
  22. }
  23. cfg.one(12, SERVERS, true)?;
  24. let leader1 = cfg.check_one_leader()?;
  25. cfg.disconnect(leader1);
  26. cfg.start1(leader1)?;
  27. cfg.connect(leader1);
  28. cfg.one(13, SERVERS, true)?;
  29. let leader2 = cfg.check_one_leader()?;
  30. cfg.disconnect(leader2);
  31. cfg.one(14, SERVERS - 1, true)?;
  32. cfg.start1(leader2)?;
  33. cfg.connect(leader2);
  34. // wait for leader2 to join before killing i3
  35. cfg.wait(4, SERVERS, None)?;
  36. let i3 = (cfg.check_one_leader()? + 1) % SERVERS;
  37. cfg.disconnect(i3);
  38. cfg.one(15, SERVERS - 1, true)?;
  39. cfg.start1(i3)?;
  40. cfg.connect(i3);
  41. cfg.one(16, SERVERS, true)?;
  42. cfg.end();
  43. Ok(())
  44. }
  45. #[test]
  46. fn persist2() -> config::Result<()> {
  47. const SERVERS: usize = 5;
  48. let cfg = make_config!(SERVERS, false);
  49. defer!(cfg.cleanup());
  50. cfg.begin("Test (2C): more persistence");
  51. let mut index = 1;
  52. for _ in 0..5 {
  53. cfg.one(10 + index, SERVERS, true)?;
  54. index += 1;
  55. let leader1 = cfg.check_one_leader()?;
  56. cfg.disconnect((leader1 + 1) % SERVERS);
  57. cfg.disconnect((leader1 + 2) % SERVERS);
  58. cfg.one(10 + index, SERVERS - 2, true)?;
  59. index += 1;
  60. cfg.disconnect((leader1 + 0) % SERVERS);
  61. cfg.disconnect((leader1 + 3) % SERVERS);
  62. cfg.disconnect((leader1 + 4) % SERVERS);
  63. cfg.start1((leader1 + 1) % SERVERS)?;
  64. cfg.start1((leader1 + 2) % SERVERS)?;
  65. cfg.connect((leader1 + 1) % SERVERS);
  66. cfg.connect((leader1 + 2) % SERVERS);
  67. config::sleep_election_timeouts(1);
  68. cfg.start1((leader1 + 3) % SERVERS)?;
  69. cfg.connect((leader1 + 3) % SERVERS);
  70. cfg.one(10 + index, SERVERS - 2, true)?;
  71. index += 1;
  72. cfg.connect((leader1 + 4) % SERVERS);
  73. cfg.connect((leader1 + 0) % SERVERS);
  74. }
  75. cfg.one(1000, SERVERS, true)?;
  76. cfg.end();
  77. Ok(())
  78. }
  79. #[test]
  80. fn persist3() -> config::Result<()> {
  81. const SERVERS: usize = 3;
  82. let cfg = make_config!(SERVERS, false);
  83. defer!(cfg.cleanup());
  84. cfg.begin(
  85. "Test (2C): partitioned leader and one follower crash, leader restarts",
  86. );
  87. cfg.one(101, 3, true)?;
  88. let leader = cfg.check_one_leader()?;
  89. cfg.disconnect((leader + 2) % SERVERS);
  90. cfg.one(102, 2, true)?;
  91. cfg.crash1((leader + 0) % SERVERS);
  92. cfg.crash1((leader + 1) % SERVERS);
  93. cfg.connect((leader + 2) % SERVERS);
  94. cfg.start1((leader + 0) % SERVERS)?;
  95. cfg.connect((leader + 0) % SERVERS);
  96. cfg.one(103, 2, true)?;
  97. cfg.start1((leader + 1) % SERVERS)?;
  98. cfg.connect((leader + 1) % SERVERS);
  99. cfg.one(104, SERVERS, true)?;
  100. cfg.end();
  101. Ok(())
  102. }
  103. #[test]
  104. fn figure8() -> config::Result<()> {
  105. const SERVERS: usize = 5;
  106. let cfg = make_config!(SERVERS, false);
  107. defer!(cfg.cleanup());
  108. cfg.begin("Test (2C): Figure 8");
  109. cfg.one(thread_rng().gen(), 1, true)?;
  110. let mut nup = SERVERS;
  111. for _ in 0..1000 {
  112. let mut leader = None;
  113. for i in 0..SERVERS {
  114. if cfg.is_server_alive(i)
  115. && cfg.leader_start(i, thread_rng().gen()).is_some()
  116. {
  117. leader = Some(i);
  118. }
  119. }
  120. let millis_upper = if thread_rng().gen_ratio(100, 1000) {
  121. config::LONG_ELECTION_TIMEOUT_MILLIS >> 1
  122. } else {
  123. // Magic number 13?
  124. 13
  125. };
  126. let millis = thread_rng().gen_range(0..millis_upper);
  127. config::sleep_millis(millis);
  128. if let Some(leader) = leader {
  129. cfg.crash1(leader);
  130. nup -= 1;
  131. }
  132. if nup < 3 {
  133. let index = thread_rng().gen_range(0..SERVERS);
  134. if !cfg.is_server_alive(index) {
  135. cfg.start1(index)?;
  136. cfg.connect(index);
  137. nup += 1
  138. }
  139. }
  140. }
  141. for index in 0..SERVERS {
  142. if !cfg.is_server_alive(index) {
  143. cfg.start1(index)?;
  144. cfg.connect(index);
  145. }
  146. }
  147. cfg.one(thread_rng().gen(), SERVERS, true)?;
  148. cfg.end();
  149. Ok(())
  150. }
  151. #[test]
  152. fn unreliable_agree() -> config::Result<()> {
  153. const SERVERS: usize = 5;
  154. let cfg = Arc::new(make_config!(SERVERS, true));
  155. defer!(cfg.cleanup());
  156. cfg.begin("Test (2C): unreliable agreement");
  157. let mut handles = vec![];
  158. for iters in 1..50 {
  159. for j in 0..4 {
  160. let cfg = cfg.clone();
  161. let logger = test_utils::thread_local_logger::get();
  162. let handle = std::thread::spawn(move || {
  163. test_utils::thread_local_logger::set(logger);
  164. cfg.one(100 * iters + j, 1, true)
  165. });
  166. handles.push(handle);
  167. }
  168. cfg.one(iters, 1, true)?;
  169. }
  170. cfg.set_unreliable(false);
  171. for handle in handles {
  172. handle.join().expect("Thread join should not fail")?;
  173. }
  174. cfg.one(100, SERVERS, true)?;
  175. cfg.end();
  176. Ok(())
  177. }
  178. #[test]
  179. fn figure8_unreliable() -> config::Result<()> {
  180. const SERVERS: usize = 5;
  181. let cfg = make_config!(SERVERS, false);
  182. defer!(cfg.cleanup());
  183. cfg.begin("Test (2C): Figure 8 (unreliable)");
  184. cfg.one(thread_rng().gen_range(0..10000), 1, true)?;
  185. let mut nup = SERVERS;
  186. for iters in 0..1000 {
  187. if iters == 200 {
  188. cfg.set_long_reordering(true);
  189. }
  190. let mut leader = None;
  191. for i in 0..SERVERS {
  192. if cfg.is_server_alive(i)
  193. && cfg.leader_start(i, thread_rng().gen()).is_some()
  194. && cfg.is_connected(i)
  195. {
  196. leader = Some(i);
  197. }
  198. }
  199. let millis_upper = if thread_rng().gen_ratio(100, 1000) {
  200. config::LONG_ELECTION_TIMEOUT_MILLIS >> 1
  201. } else {
  202. // Magic number 13?
  203. 13
  204. };
  205. let millis = thread_rng().gen_range(0..millis_upper);
  206. config::sleep_millis(millis);
  207. if let Some(leader) = leader {
  208. if thread_rng().gen_ratio(1, 2) {
  209. cfg.disconnect(leader);
  210. nup -= 1;
  211. }
  212. }
  213. if nup < 3 {
  214. let index = thread_rng().gen_range(0..SERVERS);
  215. if !cfg.is_connected(index) {
  216. cfg.connect(index);
  217. nup += 1
  218. }
  219. }
  220. }
  221. for index in 0..SERVERS {
  222. if !cfg.is_connected(index) {
  223. cfg.connect(index);
  224. }
  225. }
  226. cfg.one(thread_rng().gen_range(0..10000), SERVERS, true)?;
  227. cfg.end();
  228. Ok(())
  229. }
  230. fn internal_churn(unreliable: bool) -> config::Result<()> {
  231. const SERVERS: usize = 5;
  232. let cfg = Arc::new(make_config!(SERVERS, false));
  233. defer!(cfg.cleanup());
  234. if unreliable {
  235. cfg.begin("Test (2C): unreliable churn");
  236. } else {
  237. cfg.begin("Test (2C): churn");
  238. }
  239. let stop = Arc::new(AtomicBool::new(false));
  240. let mut handles = vec![];
  241. for client_index in 0..3 {
  242. let stop = stop.clone();
  243. let cfg = cfg.clone();
  244. let logger = test_utils::thread_local_logger::get();
  245. let handle = std::thread::spawn(move || {
  246. test_utils::thread_local_logger::set(logger);
  247. let mut cmds = vec![];
  248. while !stop.load(Ordering::SeqCst) {
  249. let cmd = thread_rng().gen();
  250. let mut index = None;
  251. for i in 0..SERVERS {
  252. if cfg.is_server_alive(i) {
  253. let start = cfg.leader_start(i, cmd);
  254. if start.is_some() {
  255. index = Some(i);
  256. }
  257. }
  258. }
  259. if let Some(index) = index {
  260. for millis in [10, 20, 50, 100, 200].iter() {
  261. let (cmd_index, cmd_committed) =
  262. // somehow the compiler cannot infer the error type.
  263. match cfg.committed_count(index) {
  264. Ok(t) => t,
  265. Err(e) => return Err(e),
  266. };
  267. #[allow(clippy::collapsible_if)]
  268. if cmd_index > 0 {
  269. if cmd_committed == cmd {
  270. cmds.push(cmd);
  271. }
  272. // The contract we started might not get
  273. }
  274. config::sleep_millis(*millis);
  275. }
  276. } else {
  277. config::sleep_millis(79 + client_index * 17);
  278. }
  279. }
  280. Ok(cmds)
  281. });
  282. handles.push(handle);
  283. }
  284. for _ in 0..20 {
  285. if thread_rng().gen_ratio(200, 1000) {
  286. cfg.disconnect(thread_rng().gen_range(0..SERVERS));
  287. }
  288. if thread_rng().gen_ratio(500, 1000) {
  289. let server = thread_rng().gen_range(0..SERVERS);
  290. if !cfg.is_server_alive(server) {
  291. cfg.start1(server)?;
  292. }
  293. cfg.connect(server);
  294. }
  295. if thread_rng().gen_ratio(200, 1000) {
  296. let server = thread_rng().gen_range(0..SERVERS);
  297. if cfg.is_server_alive(server) {
  298. cfg.crash1(server);
  299. }
  300. }
  301. config::sleep_millis(config::LONG_ELECTION_TIMEOUT_MILLIS / 10 * 7);
  302. }
  303. config::sleep_election_timeouts(1);
  304. cfg.set_unreliable(false);
  305. for i in 0..SERVERS {
  306. if !cfg.is_server_alive(i) {
  307. cfg.start1(i)?;
  308. }
  309. cfg.connect(i);
  310. }
  311. stop.store(true, Ordering::SeqCst);
  312. let mut all_cmds = vec![];
  313. for handle in handles {
  314. let mut cmds = handle.join().expect("Client should not fail")?;
  315. all_cmds.append(&mut cmds);
  316. }
  317. config::sleep_election_timeouts(1);
  318. let last_cmd_index = cfg.one(thread_rng().gen(), SERVERS, true)?;
  319. let mut consented = vec![];
  320. for cmd_index in 1..last_cmd_index + 1 {
  321. let cmd = cfg.wait(cmd_index, SERVERS, None)?;
  322. let cmd = cmd.expect("There should always be a command");
  323. consented.push(cmd);
  324. }
  325. for cmd in all_cmds {
  326. assert!(
  327. consented.contains(&cmd),
  328. "Cmd {} not found in {:?}",
  329. cmd,
  330. consented
  331. );
  332. }
  333. cfg.end();
  334. Ok(())
  335. }
  336. #[test]
  337. fn reliable_churn() -> config::Result<()> {
  338. internal_churn(false)
  339. }
  340. #[test]
  341. fn unreliable_churn() -> config::Result<()> {
  342. internal_churn(true)
  343. }