persist_tests.rs 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429
  1. #![allow(clippy::identity_op)]
  2. use std::sync::atomic::AtomicBool;
  3. use std::sync::atomic::Ordering;
  4. use std::sync::Arc;
  5. use rand::{thread_rng, Rng};
  6. use scopeguard::defer;
  7. // This is to remove the annoying "unused code in config" warnings.
  8. pub mod config;
  9. #[test]
  10. fn persist1() -> config::Result<()> {
  11. const SERVERS: usize = 5;
  12. let cfg = make_config!(SERVERS, false);
  13. defer!(cfg.cleanup());
  14. cfg.begin("Test (2C): basic persistence");
  15. cfg.one(11, SERVERS, true)?;
  16. // crash and re-start all
  17. for i in 0..SERVERS {
  18. cfg.start1(i)?;
  19. }
  20. for i in 0..SERVERS {
  21. cfg.disconnect(i);
  22. cfg.connect(i);
  23. }
  24. cfg.one(12, SERVERS, true)?;
  25. let leader1 = cfg.check_one_leader()?;
  26. cfg.disconnect(leader1);
  27. cfg.start1(leader1)?;
  28. cfg.connect(leader1);
  29. cfg.one(13, SERVERS, true)?;
  30. let leader2 = cfg.check_one_leader()?;
  31. cfg.disconnect(leader2);
  32. cfg.one(14, SERVERS - 1, true)?;
  33. cfg.start1(leader2)?;
  34. cfg.connect(leader2);
  35. // wait for leader2 to join before killing i3
  36. cfg.wait(4, SERVERS, None)?;
  37. let i3 = (cfg.check_one_leader()? + 1) % SERVERS;
  38. cfg.disconnect(i3);
  39. cfg.one(15, SERVERS - 1, true)?;
  40. cfg.start1(i3)?;
  41. cfg.connect(i3);
  42. cfg.one(16, SERVERS, true)?;
  43. cfg.end();
  44. Ok(())
  45. }
  46. #[test]
  47. fn persist2() -> config::Result<()> {
  48. const SERVERS: usize = 5;
  49. let cfg = make_config!(SERVERS, false);
  50. defer!(cfg.cleanup());
  51. cfg.begin("Test (2C): more persistence");
  52. let mut index = 1;
  53. for _ in 0..5 {
  54. cfg.one(10 + index, SERVERS, true)?;
  55. index += 1;
  56. let leader1 = cfg.check_one_leader()?;
  57. cfg.disconnect((leader1 + 1) % SERVERS);
  58. cfg.disconnect((leader1 + 2) % SERVERS);
  59. cfg.one(10 + index, SERVERS - 2, true)?;
  60. index += 1;
  61. cfg.disconnect((leader1 + 0) % SERVERS);
  62. cfg.disconnect((leader1 + 3) % SERVERS);
  63. cfg.disconnect((leader1 + 4) % SERVERS);
  64. cfg.start1((leader1 + 1) % SERVERS)?;
  65. cfg.start1((leader1 + 2) % SERVERS)?;
  66. cfg.connect((leader1 + 1) % SERVERS);
  67. cfg.connect((leader1 + 2) % SERVERS);
  68. config::sleep_election_timeouts(1);
  69. cfg.start1((leader1 + 3) % SERVERS)?;
  70. cfg.connect((leader1 + 3) % SERVERS);
  71. cfg.one(10 + index, SERVERS - 2, true)?;
  72. index += 1;
  73. cfg.connect((leader1 + 4) % SERVERS);
  74. cfg.connect((leader1 + 0) % SERVERS);
  75. }
  76. cfg.one(1000, SERVERS, true)?;
  77. cfg.end();
  78. Ok(())
  79. }
  80. #[test]
  81. fn persist3() -> config::Result<()> {
  82. const SERVERS: usize = 3;
  83. let cfg = make_config!(SERVERS, false);
  84. defer!(cfg.cleanup());
  85. cfg.begin(
  86. "Test (2C): partitioned leader and one follower crash, leader restarts",
  87. );
  88. cfg.one(101, 3, true)?;
  89. let leader = cfg.check_one_leader()?;
  90. cfg.disconnect((leader + 2) % SERVERS);
  91. cfg.one(102, 2, true)?;
  92. cfg.crash1((leader + 0) % SERVERS);
  93. cfg.crash1((leader + 1) % SERVERS);
  94. cfg.connect((leader + 2) % SERVERS);
  95. cfg.start1((leader + 0) % SERVERS)?;
  96. cfg.connect((leader + 0) % SERVERS);
  97. cfg.one(103, 2, true)?;
  98. cfg.start1((leader + 1) % SERVERS)?;
  99. cfg.connect((leader + 1) % SERVERS);
  100. cfg.one(104, SERVERS, true)?;
  101. cfg.end();
  102. Ok(())
  103. }
  104. #[test]
  105. fn figure8() -> config::Result<()> {
  106. const SERVERS: usize = 5;
  107. let cfg = make_config!(SERVERS, false);
  108. defer!(cfg.cleanup());
  109. cfg.begin("Test (2C): Figure 8");
  110. cfg.one(thread_rng().gen(), 1, true)?;
  111. let mut nup = SERVERS;
  112. for _ in 0..1000 {
  113. let mut leader = None;
  114. for i in 0..SERVERS {
  115. if cfg.is_server_alive(i)
  116. && cfg.leader_start(i, thread_rng().gen()).is_some()
  117. {
  118. leader = Some(i);
  119. }
  120. }
  121. let millis_upper = if thread_rng().gen_ratio(100, 1000) {
  122. config::LONG_ELECTION_TIMEOUT_MILLIS >> 1
  123. } else {
  124. // Magic number 13?
  125. 13
  126. };
  127. let millis = thread_rng().gen_range(0..millis_upper);
  128. config::sleep_millis(millis);
  129. if let Some(leader) = leader {
  130. cfg.crash1(leader);
  131. nup -= 1;
  132. }
  133. if nup < 3 {
  134. let index = thread_rng().gen_range(0..SERVERS);
  135. if !cfg.is_server_alive(index) {
  136. cfg.start1(index)?;
  137. cfg.connect(index);
  138. nup += 1
  139. }
  140. }
  141. }
  142. for index in 0..SERVERS {
  143. if !cfg.is_server_alive(index) {
  144. cfg.start1(index)?;
  145. cfg.connect(index);
  146. }
  147. }
  148. cfg.one(thread_rng().gen(), SERVERS, true)?;
  149. cfg.end();
  150. Ok(())
  151. }
  152. #[test]
  153. fn unreliable_agree() -> config::Result<()> {
  154. const SERVERS: usize = 5;
  155. let cfg = Arc::new(make_config!(SERVERS, true));
  156. defer!(cfg.cleanup());
  157. cfg.begin("Test (2C): unreliable agreement");
  158. let mut handles = vec![];
  159. for iters in 1..50 {
  160. for j in 0..4 {
  161. let cfg = cfg.clone();
  162. let logger = test_utils::thread_local_logger::get();
  163. let handle = std::thread::spawn(move || {
  164. test_utils::thread_local_logger::set(logger);
  165. cfg.one(100 * iters + j, 1, true)
  166. });
  167. handles.push(handle);
  168. }
  169. cfg.one(iters, 1, true)?;
  170. }
  171. cfg.set_unreliable(false);
  172. for handle in handles {
  173. handle.join().expect("Thread join should not fail")?;
  174. }
  175. cfg.one(100, SERVERS, true)?;
  176. cfg.end();
  177. Ok(())
  178. }
  179. #[test]
  180. fn figure8_unreliable() -> config::Result<()> {
  181. const SERVERS: usize = 5;
  182. let cfg = make_config!(SERVERS, false);
  183. defer!(cfg.cleanup());
  184. cfg.begin("Test (2C): Figure 8 (unreliable)");
  185. cfg.one(thread_rng().gen_range(0..10000), 1, true)?;
  186. let mut nup = SERVERS;
  187. for iters in 0..1000 {
  188. if iters == 200 {
  189. cfg.set_long_reordering(true);
  190. }
  191. let mut leader = None;
  192. for i in 0..SERVERS {
  193. if cfg.is_server_alive(i)
  194. && cfg.leader_start(i, thread_rng().gen()).is_some()
  195. && cfg.is_connected(i)
  196. {
  197. leader = Some(i);
  198. }
  199. }
  200. let millis_upper = if thread_rng().gen_ratio(100, 1000) {
  201. config::LONG_ELECTION_TIMEOUT_MILLIS >> 1
  202. } else {
  203. // Magic number 13?
  204. 13
  205. };
  206. let millis = thread_rng().gen_range(0..millis_upper);
  207. config::sleep_millis(millis);
  208. if let Some(leader) = leader {
  209. if thread_rng().gen_ratio(1, 2) {
  210. cfg.disconnect(leader);
  211. nup -= 1;
  212. }
  213. }
  214. if nup < 3 {
  215. let index = thread_rng().gen_range(0..SERVERS);
  216. if !cfg.is_connected(index) {
  217. cfg.connect(index);
  218. nup += 1
  219. }
  220. }
  221. }
  222. for index in 0..SERVERS {
  223. if !cfg.is_connected(index) {
  224. cfg.connect(index);
  225. }
  226. }
  227. cfg.one(thread_rng().gen_range(0..10000), SERVERS, true)?;
  228. cfg.end();
  229. Ok(())
  230. }
  231. fn internal_churn(unreliable: bool) -> config::Result<()> {
  232. const SERVERS: usize = 5;
  233. let cfg = Arc::new(make_config!(SERVERS, false));
  234. defer!(cfg.cleanup());
  235. if unreliable {
  236. cfg.begin("Test (2C): unreliable churn");
  237. } else {
  238. cfg.begin("Test (2C): churn");
  239. }
  240. let stop = Arc::new(AtomicBool::new(false));
  241. let mut handles = vec![];
  242. for client_index in 0..3 {
  243. let stop = stop.clone();
  244. let cfg = cfg.clone();
  245. let logger = test_utils::thread_local_logger::get();
  246. let handle = std::thread::spawn(move || {
  247. test_utils::thread_local_logger::set(logger);
  248. let mut cmds = vec![];
  249. while !stop.load(Ordering::SeqCst) {
  250. let cmd = thread_rng().gen();
  251. let mut index = None;
  252. for i in 0..SERVERS {
  253. if cfg.is_server_alive(i) {
  254. let start = cfg.leader_start(i, cmd);
  255. if start.is_some() {
  256. index = Some(i);
  257. }
  258. }
  259. }
  260. if let Some(index) = index {
  261. for millis in [10, 20, 50, 100, 200].iter() {
  262. let (cmd_index, cmd_committed) =
  263. // somehow the compiler cannot infer the error type.
  264. match cfg.committed_count(index) {
  265. Ok(t) => t,
  266. Err(e) => return Err(e),
  267. };
  268. #[allow(clippy::collapsible_if)]
  269. if cmd_index > 0 {
  270. if cmd_committed == cmd {
  271. cmds.push(cmd);
  272. }
  273. // The contract we started might not get
  274. }
  275. config::sleep_millis(*millis);
  276. }
  277. } else {
  278. config::sleep_millis(79 + client_index * 17);
  279. }
  280. }
  281. Ok(cmds)
  282. });
  283. handles.push(handle);
  284. }
  285. for _ in 0..20 {
  286. if thread_rng().gen_ratio(200, 1000) {
  287. cfg.disconnect(thread_rng().gen_range(0..SERVERS));
  288. }
  289. if thread_rng().gen_ratio(500, 1000) {
  290. let server = thread_rng().gen_range(0..SERVERS);
  291. if !cfg.is_server_alive(server) {
  292. cfg.start1(server)?;
  293. }
  294. cfg.connect(server);
  295. }
  296. if thread_rng().gen_ratio(200, 1000) {
  297. let server = thread_rng().gen_range(0..SERVERS);
  298. if cfg.is_server_alive(server) {
  299. cfg.crash1(server);
  300. }
  301. }
  302. config::sleep_millis(config::LONG_ELECTION_TIMEOUT_MILLIS / 10 * 7);
  303. }
  304. config::sleep_election_timeouts(1);
  305. cfg.set_unreliable(false);
  306. for i in 0..SERVERS {
  307. if !cfg.is_server_alive(i) {
  308. cfg.start1(i)?;
  309. }
  310. cfg.connect(i);
  311. }
  312. stop.store(true, Ordering::SeqCst);
  313. let mut all_cmds = vec![];
  314. for handle in handles {
  315. let mut cmds = handle.join().expect("Client should not fail")?;
  316. all_cmds.append(&mut cmds);
  317. }
  318. config::sleep_election_timeouts(1);
  319. let last_cmd_index = cfg.one(thread_rng().gen(), SERVERS, true)?;
  320. let mut consented = vec![];
  321. for cmd_index in 1..last_cmd_index + 1 {
  322. let cmd = cfg.wait(cmd_index, SERVERS, None)?;
  323. let cmd = cmd.expect("There should always be a command");
  324. consented.push(cmd);
  325. }
  326. for cmd in all_cmds {
  327. assert!(
  328. consented.contains(&cmd),
  329. "Cmd {} not found in {:?}",
  330. cmd,
  331. consented
  332. );
  333. }
  334. cfg.end();
  335. Ok(())
  336. }
  337. #[test]
  338. fn reliable_churn() -> config::Result<()> {
  339. internal_churn(false)
  340. }
  341. #[test]
  342. fn unreliable_churn() -> config::Result<()> {
  343. internal_churn(true)
  344. }