persist_tests.rs 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440
  1. #[macro_use]
  2. extern crate anyhow;
  3. extern crate bytes;
  4. extern crate labrpc;
  5. extern crate ruaft;
  6. use std::sync::atomic::AtomicBool;
  7. use std::sync::atomic::Ordering;
  8. use std::sync::Arc;
  9. use rand::{thread_rng, Rng};
  10. mod config;
  11. #[test]
  12. fn persist() -> config::Result<()> {
  13. const SERVERS: usize = 5;
  14. let cfg = config::make_config(SERVERS, false);
  15. let _guard = cfg.deferred_cleanup();
  16. cfg.begin("Test (2C): basic persistence");
  17. cfg.one(11, SERVERS, true)?;
  18. // crash and re-start all
  19. for i in 0..SERVERS {
  20. cfg.start1(i)?;
  21. }
  22. for i in 0..SERVERS {
  23. cfg.disconnect(i);
  24. cfg.connect(i);
  25. }
  26. cfg.one(12, SERVERS, true)?;
  27. let leader1 = cfg.check_one_leader()?;
  28. cfg.disconnect(leader1);
  29. cfg.start1(leader1)?;
  30. cfg.connect(leader1);
  31. cfg.one(13, SERVERS, true)?;
  32. let leader2 = cfg.check_one_leader()?;
  33. cfg.disconnect(leader2);
  34. cfg.one(14, SERVERS - 1, true)?;
  35. cfg.start1(leader2)?;
  36. cfg.connect(leader2);
  37. // wait for leader2 to join before killing i3
  38. cfg.wait(4, SERVERS, None)?;
  39. let i3 = (cfg.check_one_leader()? + 1) % SERVERS;
  40. cfg.disconnect(i3);
  41. cfg.one(15, SERVERS - 1, true)?;
  42. cfg.start1(i3)?;
  43. cfg.connect(i3);
  44. cfg.one(16, SERVERS, true)?;
  45. cfg.end();
  46. drop(_guard);
  47. Ok(())
  48. }
  49. #[test]
  50. fn persist2() -> config::Result<()> {
  51. const SERVERS: usize = 5;
  52. let cfg = config::make_config(SERVERS, false);
  53. let _guard = cfg.deferred_cleanup();
  54. cfg.begin("Test (2C): more persistence");
  55. let mut index = 1;
  56. for _ in 0..5 {
  57. cfg.one(10 + index, SERVERS, true)?;
  58. index += 1;
  59. let leader1 = cfg.check_one_leader()?;
  60. cfg.disconnect((leader1 + 1) % SERVERS);
  61. cfg.disconnect((leader1 + 2) % SERVERS);
  62. cfg.one(10 + index, SERVERS - 2, true)?;
  63. index += 1;
  64. cfg.disconnect((leader1 + 0) % SERVERS);
  65. cfg.disconnect((leader1 + 3) % SERVERS);
  66. cfg.disconnect((leader1 + 4) % SERVERS);
  67. cfg.start1((leader1 + 1) % SERVERS)?;
  68. cfg.start1((leader1 + 2) % SERVERS)?;
  69. cfg.connect((leader1 + 1) % SERVERS);
  70. cfg.connect((leader1 + 2) % SERVERS);
  71. config::sleep_election_timeouts(1);
  72. cfg.start1((leader1 + 3) % SERVERS)?;
  73. cfg.connect((leader1 + 3) % SERVERS);
  74. cfg.one(10 + index, SERVERS - 2, true)?;
  75. index += 1;
  76. cfg.connect((leader1 + 4) % SERVERS);
  77. cfg.connect((leader1 + 0) % SERVERS);
  78. }
  79. cfg.one(1000, SERVERS, true)?;
  80. cfg.end();
  81. drop(_guard);
  82. Ok(())
  83. }
  84. #[test]
  85. fn persist3() -> config::Result<()> {
  86. const SERVERS: usize = 3;
  87. let cfg = config::make_config(SERVERS, false);
  88. let _guard = cfg.deferred_cleanup();
  89. cfg.begin(
  90. "Test (2C): partitioned leader and one follower crash, leader restarts",
  91. );
  92. cfg.one(101, 3, true)?;
  93. let leader = cfg.check_one_leader()?;
  94. cfg.disconnect((leader + 2) % SERVERS);
  95. cfg.one(102, 2, true)?;
  96. cfg.crash1((leader + 0) % SERVERS);
  97. cfg.crash1((leader + 1) % SERVERS);
  98. cfg.connect((leader + 2) % SERVERS);
  99. cfg.start1((leader + 0) % SERVERS)?;
  100. cfg.connect((leader + 0) % SERVERS);
  101. cfg.one(103, 2, true)?;
  102. cfg.start1((leader + 1) % SERVERS)?;
  103. cfg.connect((leader + 1) % SERVERS);
  104. cfg.one(104, SERVERS, true)?;
  105. drop(_guard);
  106. Ok(())
  107. }
  108. #[test]
  109. fn figure8() -> config::Result<()> {
  110. const SERVERS: usize = 5;
  111. let cfg = config::make_config(SERVERS, false);
  112. let _guard = cfg.deferred_cleanup();
  113. cfg.begin("Test (2C): Figure 8");
  114. cfg.one(thread_rng().gen(), 1, true)?;
  115. let mut nup = SERVERS;
  116. for _ in 0..1000 {
  117. let mut leader = None;
  118. for i in 0..SERVERS {
  119. if cfg.is_server_alive(i) {
  120. if let Some(_) = cfg.leader_start(i, thread_rng().gen()) {
  121. leader = Some(i);
  122. }
  123. }
  124. }
  125. let millis_upper = if thread_rng().gen_ratio(100, 1000) {
  126. config::LONG_ELECTION_TIMEOUT_MILLIS >> 1
  127. } else {
  128. // Magic number 13?
  129. 13
  130. };
  131. let millis = thread_rng().gen_range(0, millis_upper);
  132. config::sleep_millis(millis);
  133. if let Some(leader) = leader {
  134. cfg.crash1(leader);
  135. nup -= 1;
  136. }
  137. if nup < 3 {
  138. let index = thread_rng().gen_range(0, SERVERS);
  139. if !cfg.is_server_alive(index) {
  140. cfg.start1(index)?;
  141. cfg.connect(index);
  142. nup += 1
  143. }
  144. }
  145. }
  146. for index in 0..SERVERS {
  147. if !cfg.is_server_alive(index) {
  148. cfg.start1(index)?;
  149. cfg.connect(index);
  150. }
  151. }
  152. cfg.one(thread_rng().gen(), SERVERS, true)?;
  153. cfg.end();
  154. drop(_guard);
  155. Ok(())
  156. }
  157. #[test]
  158. fn unreliable_agree() -> config::Result<()> {
  159. const SERVERS: usize = 5;
  160. let cfg = Arc::new(config::make_config(SERVERS, true));
  161. let guard_cfg = cfg.clone();
  162. let _guard = guard_cfg.deferred_cleanup();
  163. cfg.begin("Test (2C): unreliable agreement");
  164. let mut handles = vec![];
  165. let cfg = Arc::new(cfg);
  166. for iters in 1..50 {
  167. for j in 0..4 {
  168. let cfg = cfg.clone();
  169. let handle =
  170. std::thread::spawn(move || cfg.one(100 * iters + j, 1, true));
  171. handles.push(handle);
  172. }
  173. cfg.one(iters, 1, true)?;
  174. }
  175. cfg.set_unreliable(false);
  176. for handle in handles {
  177. handle.join().expect("Thread join should not fail")?;
  178. }
  179. cfg.one(100, SERVERS, true)?;
  180. cfg.end();
  181. drop(_guard);
  182. Ok(())
  183. }
  184. #[test]
  185. fn figure8_unreliable() -> config::Result<()> {
  186. const SERVERS: usize = 5;
  187. let cfg = config::make_config(SERVERS, false);
  188. let _guard = cfg.deferred_cleanup();
  189. cfg.begin("Test (2C): Figure 8 (unreliable)");
  190. cfg.one(thread_rng().gen_range(0, 10000), 1, true)?;
  191. let mut nup = SERVERS;
  192. for iters in 0..1000 {
  193. if iters == 200 {
  194. cfg.set_long_reordering(true);
  195. }
  196. let mut leader = None;
  197. for i in 0..SERVERS {
  198. if cfg.is_server_alive(i) {
  199. if let Some(_) = cfg.leader_start(i, thread_rng().gen()) {
  200. if cfg.is_connected(i) {
  201. leader = Some(i);
  202. }
  203. }
  204. }
  205. }
  206. let millis_upper = if thread_rng().gen_ratio(100, 1000) {
  207. config::LONG_ELECTION_TIMEOUT_MILLIS >> 1
  208. } else {
  209. // Magic number 13?
  210. 13
  211. };
  212. let millis = thread_rng().gen_range(0, millis_upper);
  213. config::sleep_millis(millis);
  214. if let Some(leader) = leader {
  215. if thread_rng().gen_ratio(1, 2) {
  216. cfg.disconnect(leader);
  217. nup -= 1;
  218. }
  219. }
  220. if nup < 3 {
  221. let index = thread_rng().gen_range(0, SERVERS);
  222. if !cfg.is_connected(index) {
  223. cfg.connect(index);
  224. nup += 1
  225. }
  226. }
  227. }
  228. for index in 0..SERVERS {
  229. if !cfg.is_connected(index) {
  230. cfg.connect(index);
  231. }
  232. }
  233. cfg.one(thread_rng().gen_range(0, 10000), SERVERS, true)?;
  234. drop(_guard);
  235. Ok(())
  236. }
  237. fn internal_churn(unreliable: bool) -> config::Result<()> {
  238. const SERVERS: usize = 5;
  239. let cfg = Arc::new(config::make_config(SERVERS, false));
  240. let cfg_clone = cfg.clone();
  241. let _guard = cfg_clone.deferred_cleanup();
  242. if unreliable {
  243. cfg.begin("Test (2C): unreliable churn");
  244. } else {
  245. cfg.begin("Test (2C): churn");
  246. }
  247. let stop = Arc::new(AtomicBool::new(false));
  248. let mut handles = vec![];
  249. for client_index in 0..3 {
  250. let stop = stop.clone();
  251. let cfg = cfg.clone();
  252. let handle = std::thread::spawn(move || {
  253. let mut cmds = vec![];
  254. while !stop.load(Ordering::SeqCst) {
  255. let cmd = thread_rng().gen();
  256. let mut index = None;
  257. for i in 0..SERVERS {
  258. if cfg.is_server_alive(i) {
  259. let start = cfg.leader_start(i, cmd);
  260. if start.is_some() {
  261. index = Some(i);
  262. }
  263. }
  264. }
  265. if let Some(index) = index {
  266. for millis in [10, 20, 50, 100, 200].iter() {
  267. let (cmd_index, cmd_committed) =
  268. // somehow the compiler cannot infer the error type.
  269. match cfg.committed_count(index) {
  270. Ok(t) => t,
  271. Err(e) => return Err(e),
  272. };
  273. if cmd_index > 0 {
  274. if cmd_committed == cmd {
  275. cmds.push(cmd);
  276. }
  277. // The contract we started might not get
  278. }
  279. config::sleep_millis(*millis);
  280. }
  281. } else {
  282. config::sleep_millis(79 + client_index * 17);
  283. }
  284. }
  285. Ok(cmds)
  286. });
  287. handles.push(handle);
  288. }
  289. for _ in 0..20 {
  290. if thread_rng().gen_ratio(200, 1000) {
  291. cfg.disconnect(thread_rng().gen_range(0, SERVERS));
  292. }
  293. if thread_rng().gen_ratio(500, 1000) {
  294. let server = thread_rng().gen_range(0, SERVERS);
  295. if !cfg.is_server_alive(server) {
  296. cfg.start1(server)?;
  297. }
  298. cfg.connect(server);
  299. }
  300. if thread_rng().gen_ratio(200, 1000) {
  301. let server = thread_rng().gen_range(0, SERVERS);
  302. if cfg.is_server_alive(server) {
  303. cfg.crash1(server);
  304. }
  305. }
  306. config::sleep_millis(config::LONG_ELECTION_TIMEOUT_MILLIS / 10 * 7);
  307. }
  308. config::sleep_election_timeouts(1);
  309. cfg.set_unreliable(false);
  310. for i in 0..SERVERS {
  311. if !cfg.is_server_alive(i) {
  312. cfg.start1(i)?;
  313. }
  314. cfg.connect(i);
  315. }
  316. stop.store(true, Ordering::SeqCst);
  317. let mut all_cmds = vec![];
  318. for handle in handles {
  319. let mut cmds = handle.join().expect("Client should not fail")?;
  320. all_cmds.append(&mut cmds);
  321. }
  322. config::sleep_election_timeouts(1);
  323. let last_cmd_index = cfg.one(thread_rng().gen(), SERVERS, true)?;
  324. let mut consented = vec![];
  325. for cmd_index in 1..last_cmd_index + 1 {
  326. let cmd = cfg.wait(cmd_index, SERVERS, None)?;
  327. let cmd = cmd.expect("There should always be a command");
  328. consented.push(cmd);
  329. }
  330. for cmd in all_cmds {
  331. assert!(
  332. consented.contains(&cmd),
  333. "Cmd {} not found in {:?}",
  334. cmd,
  335. consented
  336. );
  337. }
  338. cfg.end();
  339. drop(_guard);
  340. Ok(())
  341. }
  342. #[test]
  343. fn reliable_churn() -> config::Result<()> {
  344. internal_churn(false)
  345. }
  346. #[test]
  347. fn unreliable_churn() -> config::Result<()> {
  348. internal_churn(true)
  349. }