persist_tests.rs 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432
  1. #![allow(clippy::identity_op)]
  2. use std::sync::atomic::AtomicBool;
  3. use std::sync::atomic::Ordering;
  4. use std::sync::Arc;
  5. use rand::{thread_rng, Rng};
  6. use scopeguard::defer;
  7. use test_configs::utils::{
  8. sleep_election_timeouts, sleep_millis, LONG_ELECTION_TIMEOUT_MILLIS,
  9. };
  10. use test_configs::{make_config, raft::config};
  11. #[test]
  12. fn persist1() -> config::Result<()> {
  13. const SERVERS: usize = 5;
  14. let cfg = make_config!(SERVERS, false);
  15. defer!(cfg.cleanup());
  16. cfg.begin("Test (2C): basic persistence");
  17. cfg.one(11, SERVERS, true)?;
  18. // crash and re-start all
  19. for i in 0..SERVERS {
  20. cfg.start1(i)?;
  21. }
  22. for i in 0..SERVERS {
  23. cfg.disconnect(i);
  24. cfg.connect(i);
  25. }
  26. cfg.one(12, SERVERS, true)?;
  27. let leader1 = cfg.check_one_leader()?;
  28. cfg.disconnect(leader1);
  29. cfg.start1(leader1)?;
  30. cfg.connect(leader1);
  31. cfg.one(13, SERVERS, true)?;
  32. let leader2 = cfg.check_one_leader()?;
  33. cfg.disconnect(leader2);
  34. cfg.one(14, SERVERS - 1, true)?;
  35. cfg.start1(leader2)?;
  36. cfg.connect(leader2);
  37. // wait for leader2 to join before killing i3
  38. cfg.wait(4, SERVERS, None)?;
  39. let i3 = (cfg.check_one_leader()? + 1) % SERVERS;
  40. cfg.disconnect(i3);
  41. cfg.one(15, SERVERS - 1, true)?;
  42. cfg.start1(i3)?;
  43. cfg.connect(i3);
  44. cfg.one(16, SERVERS, true)?;
  45. cfg.end();
  46. Ok(())
  47. }
  48. #[test]
  49. fn persist2() -> config::Result<()> {
  50. const SERVERS: usize = 5;
  51. let cfg = make_config!(SERVERS, false);
  52. defer!(cfg.cleanup());
  53. cfg.begin("Test (2C): more persistence");
  54. let mut index = 1;
  55. for _ in 0..5 {
  56. cfg.one(10 + index, SERVERS, true)?;
  57. index += 1;
  58. let leader1 = cfg.check_one_leader()?;
  59. cfg.disconnect((leader1 + 1) % SERVERS);
  60. cfg.disconnect((leader1 + 2) % SERVERS);
  61. cfg.one(10 + index, SERVERS - 2, true)?;
  62. index += 1;
  63. cfg.disconnect((leader1 + 0) % SERVERS);
  64. cfg.disconnect((leader1 + 3) % SERVERS);
  65. cfg.disconnect((leader1 + 4) % SERVERS);
  66. cfg.start1((leader1 + 1) % SERVERS)?;
  67. cfg.start1((leader1 + 2) % SERVERS)?;
  68. cfg.connect((leader1 + 1) % SERVERS);
  69. cfg.connect((leader1 + 2) % SERVERS);
  70. sleep_election_timeouts(1);
  71. cfg.start1((leader1 + 3) % SERVERS)?;
  72. cfg.connect((leader1 + 3) % SERVERS);
  73. cfg.one(10 + index, SERVERS - 2, true)?;
  74. index += 1;
  75. cfg.connect((leader1 + 4) % SERVERS);
  76. cfg.connect((leader1 + 0) % SERVERS);
  77. }
  78. cfg.one(1000, SERVERS, true)?;
  79. cfg.end();
  80. Ok(())
  81. }
  82. #[test]
  83. fn persist3() -> config::Result<()> {
  84. const SERVERS: usize = 3;
  85. let cfg = make_config!(SERVERS, false);
  86. defer!(cfg.cleanup());
  87. cfg.begin(
  88. "Test (2C): partitioned leader and one follower crash, leader restarts",
  89. );
  90. cfg.one(101, 3, true)?;
  91. let leader = cfg.check_one_leader()?;
  92. cfg.disconnect((leader + 2) % SERVERS);
  93. cfg.one(102, 2, true)?;
  94. cfg.crash1((leader + 0) % SERVERS);
  95. cfg.crash1((leader + 1) % SERVERS);
  96. cfg.connect((leader + 2) % SERVERS);
  97. cfg.start1((leader + 0) % SERVERS)?;
  98. cfg.connect((leader + 0) % SERVERS);
  99. cfg.one(103, 2, true)?;
  100. cfg.start1((leader + 1) % SERVERS)?;
  101. cfg.connect((leader + 1) % SERVERS);
  102. cfg.one(104, SERVERS, true)?;
  103. cfg.end();
  104. Ok(())
  105. }
  106. #[test]
  107. fn figure8() -> config::Result<()> {
  108. const SERVERS: usize = 5;
  109. let cfg = make_config!(SERVERS, false);
  110. defer!(cfg.cleanup());
  111. cfg.begin("Test (2C): Figure 8");
  112. cfg.one(thread_rng().gen(), 1, true)?;
  113. let mut nup = SERVERS;
  114. for _ in 0..1000 {
  115. let mut leader = None;
  116. for i in 0..SERVERS {
  117. if cfg.is_server_alive(i)
  118. && cfg.leader_start(i, thread_rng().gen()).is_some()
  119. {
  120. leader = Some(i);
  121. }
  122. }
  123. let millis_upper = if thread_rng().gen_ratio(100, 1000) {
  124. LONG_ELECTION_TIMEOUT_MILLIS >> 1
  125. } else {
  126. // Magic number 13?
  127. 13
  128. };
  129. let millis = thread_rng().gen_range(0..millis_upper);
  130. sleep_millis(millis);
  131. if let Some(leader) = leader {
  132. cfg.crash1(leader);
  133. nup -= 1;
  134. }
  135. if nup < 3 {
  136. let index = thread_rng().gen_range(0..SERVERS);
  137. if !cfg.is_server_alive(index) {
  138. cfg.start1(index)?;
  139. cfg.connect(index);
  140. nup += 1
  141. }
  142. }
  143. }
  144. for index in 0..SERVERS {
  145. if !cfg.is_server_alive(index) {
  146. cfg.start1(index)?;
  147. cfg.connect(index);
  148. }
  149. }
  150. cfg.one(thread_rng().gen(), SERVERS, true)?;
  151. cfg.end();
  152. Ok(())
  153. }
  154. #[test]
  155. fn unreliable_agree() -> config::Result<()> {
  156. const SERVERS: usize = 5;
  157. let cfg = Arc::new(make_config!(SERVERS, true));
  158. defer!(cfg.cleanup());
  159. cfg.begin("Test (2C): unreliable agreement");
  160. let mut handles = vec![];
  161. for iters in 1..50 {
  162. for j in 0..4 {
  163. let cfg = cfg.clone();
  164. let logger = test_utils::thread_local_logger::get();
  165. let handle = std::thread::spawn(move || {
  166. test_utils::thread_local_logger::set(logger);
  167. cfg.one(100 * iters + j, 1, true)
  168. });
  169. handles.push(handle);
  170. }
  171. cfg.one(iters, 1, true)?;
  172. }
  173. cfg.set_unreliable(false);
  174. for handle in handles {
  175. handle.join().expect("Thread join should not fail")?;
  176. }
  177. cfg.one(100, SERVERS, true)?;
  178. cfg.end();
  179. Ok(())
  180. }
  181. #[test]
  182. fn figure8_unreliable() -> config::Result<()> {
  183. const SERVERS: usize = 5;
  184. let cfg = make_config!(SERVERS, false);
  185. defer!(cfg.cleanup());
  186. cfg.begin("Test (2C): Figure 8 (unreliable)");
  187. cfg.one(thread_rng().gen_range(0..10000), 1, true)?;
  188. let mut nup = SERVERS;
  189. for iters in 0..1000 {
  190. if iters == 200 {
  191. cfg.set_long_reordering(true);
  192. }
  193. let mut leader = None;
  194. for i in 0..SERVERS {
  195. if cfg.is_server_alive(i)
  196. && cfg.leader_start(i, thread_rng().gen()).is_some()
  197. && cfg.is_connected(i)
  198. {
  199. leader = Some(i);
  200. }
  201. }
  202. let millis_upper = if thread_rng().gen_ratio(100, 1000) {
  203. LONG_ELECTION_TIMEOUT_MILLIS >> 1
  204. } else {
  205. // Magic number 13?
  206. 13
  207. };
  208. let millis = thread_rng().gen_range(0..millis_upper);
  209. sleep_millis(millis);
  210. if let Some(leader) = leader {
  211. if thread_rng().gen_ratio(1, 2) {
  212. cfg.disconnect(leader);
  213. nup -= 1;
  214. }
  215. }
  216. if nup < 3 {
  217. let index = thread_rng().gen_range(0..SERVERS);
  218. if !cfg.is_connected(index) {
  219. cfg.connect(index);
  220. nup += 1
  221. }
  222. }
  223. }
  224. for index in 0..SERVERS {
  225. if !cfg.is_connected(index) {
  226. cfg.connect(index);
  227. }
  228. }
  229. cfg.one(thread_rng().gen_range(0..10000), SERVERS, true)?;
  230. cfg.end();
  231. Ok(())
  232. }
  233. fn internal_churn(
  234. unreliable: bool,
  235. test_name: &'static str,
  236. ) -> config::Result<()> {
  237. const SERVERS: usize = 5;
  238. let cfg = Arc::new(config::make_config(SERVERS, false, test_name));
  239. defer!(cfg.cleanup());
  240. if unreliable {
  241. cfg.begin("Test (2C): unreliable churn");
  242. } else {
  243. cfg.begin("Test (2C): churn");
  244. }
  245. let stop = Arc::new(AtomicBool::new(false));
  246. let mut handles = vec![];
  247. for client_index in 0..3 {
  248. let stop = stop.clone();
  249. let cfg = cfg.clone();
  250. let logger = test_utils::thread_local_logger::get();
  251. let handle = std::thread::spawn(move || {
  252. test_utils::thread_local_logger::set(logger);
  253. let mut cmds = vec![];
  254. while !stop.load(Ordering::Acquire) {
  255. let cmd = thread_rng().gen();
  256. let mut index = None;
  257. for i in 0..SERVERS {
  258. if cfg.is_server_alive(i) {
  259. let start = cfg.leader_start(i, cmd);
  260. if start.is_some() {
  261. index = Some(i);
  262. }
  263. }
  264. }
  265. if let Some(index) = index {
  266. for millis in [10, 20, 50, 100, 200].iter() {
  267. let (cmd_index, cmd_committed) =
  268. // somehow the compiler cannot infer the error type.
  269. match cfg.committed_count(index) {
  270. Ok(t) => t,
  271. Err(e) => return Err(e),
  272. };
  273. #[allow(clippy::collapsible_if)]
  274. if cmd_index > 0 {
  275. if cmd_committed == cmd {
  276. cmds.push(cmd);
  277. }
  278. // The contract we started might not get
  279. }
  280. sleep_millis(*millis);
  281. }
  282. } else {
  283. sleep_millis(79 + client_index * 17);
  284. }
  285. }
  286. Ok(cmds)
  287. });
  288. handles.push(handle);
  289. }
  290. for _ in 0..20 {
  291. if thread_rng().gen_ratio(200, 1000) {
  292. cfg.disconnect(thread_rng().gen_range(0..SERVERS));
  293. }
  294. if thread_rng().gen_ratio(500, 1000) {
  295. let server = thread_rng().gen_range(0..SERVERS);
  296. if !cfg.is_server_alive(server) {
  297. cfg.start1(server)?;
  298. }
  299. cfg.connect(server);
  300. }
  301. if thread_rng().gen_ratio(200, 1000) {
  302. let server = thread_rng().gen_range(0..SERVERS);
  303. if cfg.is_server_alive(server) {
  304. cfg.crash1(server);
  305. }
  306. }
  307. sleep_millis(LONG_ELECTION_TIMEOUT_MILLIS / 10 * 7);
  308. }
  309. sleep_election_timeouts(1);
  310. cfg.set_unreliable(false);
  311. for i in 0..SERVERS {
  312. if !cfg.is_server_alive(i) {
  313. cfg.start1(i)?;
  314. }
  315. cfg.connect(i);
  316. }
  317. stop.store(true, Ordering::Release);
  318. let mut all_cmds = vec![];
  319. for handle in handles {
  320. let mut cmds = handle.join().expect("Client should not fail")?;
  321. all_cmds.append(&mut cmds);
  322. }
  323. sleep_election_timeouts(1);
  324. let last_cmd_index = cfg.one(thread_rng().gen(), SERVERS, true)?;
  325. let mut consented = vec![];
  326. for cmd_index in 1..last_cmd_index + 1 {
  327. let cmd = cfg.wait(cmd_index, SERVERS, None)?;
  328. let cmd = cmd.expect("There should always be a command");
  329. consented.push(cmd);
  330. }
  331. for cmd in all_cmds {
  332. assert!(
  333. consented.contains(&cmd),
  334. "Cmd {cmd} not found in {consented:?}"
  335. );
  336. }
  337. cfg.end();
  338. Ok(())
  339. }
  340. #[test]
  341. fn reliable_churn() -> config::Result<()> {
  342. internal_churn(false, stdext::function_name!())
  343. }
  344. #[test]
  345. fn unreliable_churn() -> config::Result<()> {
  346. internal_churn(true, stdext::function_name!())
  347. }