persist_tests.rs 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435
  1. #![allow(clippy::identity_op)]
  2. #![allow(clippy::uninlined_format_args)]
  3. use std::sync::atomic::AtomicBool;
  4. use std::sync::atomic::Ordering;
  5. use std::sync::Arc;
  6. use rand::{thread_rng, Rng};
  7. use scopeguard::defer;
  8. use test_configs::utils::{
  9. sleep_election_timeouts, sleep_millis, LONG_ELECTION_TIMEOUT_MILLIS,
  10. };
  11. use test_configs::{make_config, raft::config};
  12. #[test]
  13. fn persist1() -> config::Result<()> {
  14. const SERVERS: usize = 5;
  15. let cfg = make_config!(SERVERS, false);
  16. defer!(cfg.cleanup());
  17. cfg.begin("Test (2C): basic persistence");
  18. cfg.one(11, SERVERS, true)?;
  19. // crash and re-start all
  20. for i in 0..SERVERS {
  21. cfg.start1(i)?;
  22. }
  23. for i in 0..SERVERS {
  24. cfg.disconnect(i);
  25. cfg.connect(i);
  26. }
  27. cfg.one(12, SERVERS, true)?;
  28. let leader1 = cfg.check_one_leader()?;
  29. cfg.disconnect(leader1);
  30. cfg.start1(leader1)?;
  31. cfg.connect(leader1);
  32. cfg.one(13, SERVERS, true)?;
  33. let leader2 = cfg.check_one_leader()?;
  34. cfg.disconnect(leader2);
  35. cfg.one(14, SERVERS - 1, true)?;
  36. cfg.start1(leader2)?;
  37. cfg.connect(leader2);
  38. // wait for leader2 to join before killing i3
  39. cfg.wait(4, SERVERS, None)?;
  40. let i3 = (cfg.check_one_leader()? + 1) % SERVERS;
  41. cfg.disconnect(i3);
  42. cfg.one(15, SERVERS - 1, true)?;
  43. cfg.start1(i3)?;
  44. cfg.connect(i3);
  45. cfg.one(16, SERVERS, true)?;
  46. cfg.end();
  47. Ok(())
  48. }
  49. #[test]
  50. fn persist2() -> config::Result<()> {
  51. const SERVERS: usize = 5;
  52. let cfg = make_config!(SERVERS, false);
  53. defer!(cfg.cleanup());
  54. cfg.begin("Test (2C): more persistence");
  55. let mut index = 1;
  56. for _ in 0..5 {
  57. cfg.one(10 + index, SERVERS, true)?;
  58. index += 1;
  59. let leader1 = cfg.check_one_leader()?;
  60. cfg.disconnect((leader1 + 1) % SERVERS);
  61. cfg.disconnect((leader1 + 2) % SERVERS);
  62. cfg.one(10 + index, SERVERS - 2, true)?;
  63. index += 1;
  64. cfg.disconnect((leader1 + 0) % SERVERS);
  65. cfg.disconnect((leader1 + 3) % SERVERS);
  66. cfg.disconnect((leader1 + 4) % SERVERS);
  67. cfg.start1((leader1 + 1) % SERVERS)?;
  68. cfg.start1((leader1 + 2) % SERVERS)?;
  69. cfg.connect((leader1 + 1) % SERVERS);
  70. cfg.connect((leader1 + 2) % SERVERS);
  71. sleep_election_timeouts(1);
  72. cfg.start1((leader1 + 3) % SERVERS)?;
  73. cfg.connect((leader1 + 3) % SERVERS);
  74. cfg.one(10 + index, SERVERS - 2, true)?;
  75. index += 1;
  76. cfg.connect((leader1 + 4) % SERVERS);
  77. cfg.connect((leader1 + 0) % SERVERS);
  78. }
  79. cfg.one(1000, SERVERS, true)?;
  80. cfg.end();
  81. Ok(())
  82. }
  83. #[test]
  84. fn persist3() -> config::Result<()> {
  85. const SERVERS: usize = 3;
  86. let cfg = make_config!(SERVERS, false);
  87. defer!(cfg.cleanup());
  88. cfg.begin(
  89. "Test (2C): partitioned leader and one follower crash, leader restarts",
  90. );
  91. cfg.one(101, 3, true)?;
  92. let leader = cfg.check_one_leader()?;
  93. cfg.disconnect((leader + 2) % SERVERS);
  94. cfg.one(102, 2, true)?;
  95. cfg.crash1((leader + 0) % SERVERS);
  96. cfg.crash1((leader + 1) % SERVERS);
  97. cfg.connect((leader + 2) % SERVERS);
  98. cfg.start1((leader + 0) % SERVERS)?;
  99. cfg.connect((leader + 0) % SERVERS);
  100. cfg.one(103, 2, true)?;
  101. cfg.start1((leader + 1) % SERVERS)?;
  102. cfg.connect((leader + 1) % SERVERS);
  103. cfg.one(104, SERVERS, true)?;
  104. cfg.end();
  105. Ok(())
  106. }
  107. #[test]
  108. fn figure8() -> config::Result<()> {
  109. const SERVERS: usize = 5;
  110. let cfg = make_config!(SERVERS, false);
  111. defer!(cfg.cleanup());
  112. cfg.begin("Test (2C): Figure 8");
  113. cfg.one(thread_rng().gen(), 1, true)?;
  114. let mut nup = SERVERS;
  115. for _ in 0..1000 {
  116. let mut leader = None;
  117. for i in 0..SERVERS {
  118. if cfg.is_server_alive(i)
  119. && cfg.leader_start(i, thread_rng().gen()).is_some()
  120. {
  121. leader = Some(i);
  122. }
  123. }
  124. let millis_upper = if thread_rng().gen_ratio(100, 1000) {
  125. LONG_ELECTION_TIMEOUT_MILLIS >> 1
  126. } else {
  127. // Magic number 13?
  128. 13
  129. };
  130. let millis = thread_rng().gen_range(0..millis_upper);
  131. sleep_millis(millis);
  132. if let Some(leader) = leader {
  133. cfg.crash1(leader);
  134. nup -= 1;
  135. }
  136. if nup < 3 {
  137. let index = thread_rng().gen_range(0..SERVERS);
  138. if !cfg.is_server_alive(index) {
  139. cfg.start1(index)?;
  140. cfg.connect(index);
  141. nup += 1
  142. }
  143. }
  144. }
  145. for index in 0..SERVERS {
  146. if !cfg.is_server_alive(index) {
  147. cfg.start1(index)?;
  148. cfg.connect(index);
  149. }
  150. }
  151. cfg.one(thread_rng().gen(), SERVERS, true)?;
  152. cfg.end();
  153. Ok(())
  154. }
  155. #[test]
  156. fn unreliable_agree() -> config::Result<()> {
  157. const SERVERS: usize = 5;
  158. let cfg = Arc::new(make_config!(SERVERS, true));
  159. defer!(cfg.cleanup());
  160. cfg.begin("Test (2C): unreliable agreement");
  161. let mut handles = vec![];
  162. for iters in 1..50 {
  163. for j in 0..4 {
  164. let cfg = cfg.clone();
  165. let logger = test_utils::thread_local_logger::get();
  166. let handle = std::thread::spawn(move || {
  167. test_utils::thread_local_logger::set(logger);
  168. cfg.one(100 * iters + j, 1, true)
  169. });
  170. handles.push(handle);
  171. }
  172. cfg.one(iters, 1, true)?;
  173. }
  174. cfg.set_unreliable(false);
  175. for handle in handles {
  176. handle.join().expect("Thread join should not fail")?;
  177. }
  178. cfg.one(100, SERVERS, true)?;
  179. cfg.end();
  180. Ok(())
  181. }
  182. #[test]
  183. fn figure8_unreliable() -> config::Result<()> {
  184. const SERVERS: usize = 5;
  185. let cfg = make_config!(SERVERS, false);
  186. defer!(cfg.cleanup());
  187. cfg.begin("Test (2C): Figure 8 (unreliable)");
  188. cfg.one(thread_rng().gen_range(0..10000), 1, true)?;
  189. let mut nup = SERVERS;
  190. for iters in 0..1000 {
  191. if iters == 200 {
  192. cfg.set_long_reordering(true);
  193. }
  194. let mut leader = None;
  195. for i in 0..SERVERS {
  196. if cfg.is_server_alive(i)
  197. && cfg.leader_start(i, thread_rng().gen()).is_some()
  198. && cfg.is_connected(i)
  199. {
  200. leader = Some(i);
  201. }
  202. }
  203. let millis_upper = if thread_rng().gen_ratio(100, 1000) {
  204. LONG_ELECTION_TIMEOUT_MILLIS >> 1
  205. } else {
  206. // Magic number 13?
  207. 13
  208. };
  209. let millis = thread_rng().gen_range(0..millis_upper);
  210. sleep_millis(millis);
  211. if let Some(leader) = leader {
  212. if thread_rng().gen_ratio(1, 2) {
  213. cfg.disconnect(leader);
  214. nup -= 1;
  215. }
  216. }
  217. if nup < 3 {
  218. let index = thread_rng().gen_range(0..SERVERS);
  219. if !cfg.is_connected(index) {
  220. cfg.connect(index);
  221. nup += 1
  222. }
  223. }
  224. }
  225. for index in 0..SERVERS {
  226. if !cfg.is_connected(index) {
  227. cfg.connect(index);
  228. }
  229. }
  230. cfg.one(thread_rng().gen_range(0..10000), SERVERS, true)?;
  231. cfg.end();
  232. Ok(())
  233. }
  234. fn internal_churn(
  235. unreliable: bool,
  236. test_name: &'static str,
  237. ) -> config::Result<()> {
  238. const SERVERS: usize = 5;
  239. let cfg = Arc::new(config::make_config(SERVERS, false, test_name));
  240. defer!(cfg.cleanup());
  241. if unreliable {
  242. cfg.begin("Test (2C): unreliable churn");
  243. } else {
  244. cfg.begin("Test (2C): churn");
  245. }
  246. let stop = Arc::new(AtomicBool::new(false));
  247. let mut handles = vec![];
  248. for client_index in 0..3 {
  249. let stop = stop.clone();
  250. let cfg = cfg.clone();
  251. let logger = test_utils::thread_local_logger::get();
  252. let handle = std::thread::spawn(move || {
  253. test_utils::thread_local_logger::set(logger);
  254. let mut cmds = vec![];
  255. while !stop.load(Ordering::Acquire) {
  256. let cmd = thread_rng().gen();
  257. let mut index = None;
  258. for i in 0..SERVERS {
  259. if cfg.is_server_alive(i) {
  260. let start = cfg.leader_start(i, cmd);
  261. if start.is_some() {
  262. index = Some(i);
  263. }
  264. }
  265. }
  266. if let Some(index) = index {
  267. for millis in [10, 20, 50, 100, 200].iter() {
  268. let (cmd_index, cmd_committed) =
  269. // somehow the compiler cannot infer the error type.
  270. match cfg.committed_count(index) {
  271. Ok(t) => t,
  272. Err(e) => return Err(e),
  273. };
  274. #[allow(clippy::collapsible_if)]
  275. if cmd_index > 0 {
  276. if cmd_committed == cmd {
  277. cmds.push(cmd);
  278. }
  279. // The contract we started might not get
  280. }
  281. sleep_millis(*millis);
  282. }
  283. } else {
  284. sleep_millis(79 + client_index * 17);
  285. }
  286. }
  287. Ok(cmds)
  288. });
  289. handles.push(handle);
  290. }
  291. for _ in 0..20 {
  292. if thread_rng().gen_ratio(200, 1000) {
  293. cfg.disconnect(thread_rng().gen_range(0..SERVERS));
  294. }
  295. if thread_rng().gen_ratio(500, 1000) {
  296. let server = thread_rng().gen_range(0..SERVERS);
  297. if !cfg.is_server_alive(server) {
  298. cfg.start1(server)?;
  299. }
  300. cfg.connect(server);
  301. }
  302. if thread_rng().gen_ratio(200, 1000) {
  303. let server = thread_rng().gen_range(0..SERVERS);
  304. if cfg.is_server_alive(server) {
  305. cfg.crash1(server);
  306. }
  307. }
  308. sleep_millis(LONG_ELECTION_TIMEOUT_MILLIS / 10 * 7);
  309. }
  310. sleep_election_timeouts(1);
  311. cfg.set_unreliable(false);
  312. for i in 0..SERVERS {
  313. if !cfg.is_server_alive(i) {
  314. cfg.start1(i)?;
  315. }
  316. cfg.connect(i);
  317. }
  318. stop.store(true, Ordering::Release);
  319. let mut all_cmds = vec![];
  320. for handle in handles {
  321. let mut cmds = handle.join().expect("Client should not fail")?;
  322. all_cmds.append(&mut cmds);
  323. }
  324. sleep_election_timeouts(1);
  325. let last_cmd_index = cfg.one(thread_rng().gen(), SERVERS, true)?;
  326. let mut consented = vec![];
  327. for cmd_index in 1..last_cmd_index + 1 {
  328. let cmd = cfg.wait(cmd_index, SERVERS, None)?;
  329. let cmd = cmd.expect("There should always be a command");
  330. consented.push(cmd);
  331. }
  332. for cmd in all_cmds {
  333. assert!(
  334. consented.contains(&cmd),
  335. "Cmd {} not found in {:?}",
  336. cmd,
  337. consented
  338. );
  339. }
  340. cfg.end();
  341. Ok(())
  342. }
  343. #[test]
  344. fn reliable_churn() -> config::Result<()> {
  345. internal_churn(false, stdext::function_name!())
  346. }
  347. #[test]
  348. fn unreliable_churn() -> config::Result<()> {
  349. internal_churn(true, stdext::function_name!())
  350. }