service_test.rs 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405
  1. #[macro_use]
  2. extern crate anyhow;
  3. extern crate kvraft;
  4. extern crate rand;
  5. #[macro_use]
  6. extern crate scopeguard;
  7. use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
  8. use std::sync::Arc;
  9. use std::thread::JoinHandle;
  10. use std::time::Duration;
  11. use anyhow::Context;
  12. use rand::{thread_rng, Rng};
  13. use kvraft::testing_utils::config::{
  14. make_config, sleep_election_timeouts, sleep_millis, Config,
  15. LONG_ELECTION_TIMEOUT_MILLIS,
  16. };
  17. use kvraft::Clerk;
  18. fn spawn_clients<T, Func>(
  19. config: Arc<Config>,
  20. clients: usize,
  21. func: Func,
  22. ) -> Vec<JoinHandle<T>>
  23. where
  24. T: 'static + Send,
  25. Func: 'static + Clone + Send + Sync + Fn(usize, Clerk) -> T,
  26. {
  27. let mut client_threads = vec![];
  28. for i in 0..clients {
  29. let clerk = config.make_clerk();
  30. let func = func.clone();
  31. client_threads.push(std::thread::spawn(move || func(i, clerk)))
  32. }
  33. eprintln!("spawning clients done.");
  34. client_threads
  35. }
  36. fn appending_client(
  37. index: usize,
  38. mut clerk: Clerk,
  39. stop: Arc<AtomicBool>,
  40. ) -> (usize, String) {
  41. eprintln!("client {} running.", index);
  42. let mut op_count = 0usize;
  43. let key = index.to_string();
  44. let mut last = String::new();
  45. let mut rng = thread_rng();
  46. clerk.put(&key, &last);
  47. while !stop.load(Ordering::Acquire) {
  48. eprintln!("client {} starting {}.", index, op_count);
  49. if rng.gen_ratio(1, 2) {
  50. let value = format!("({}, {}), ", index, op_count);
  51. last.push_str(&value);
  52. clerk.append(&key, &value);
  53. op_count += 1;
  54. } else {
  55. let value = clerk
  56. .get(&key)
  57. .expect(&format!("Key {} should exist.", index));
  58. assert_eq!(value, last);
  59. }
  60. eprintln!("client {} done {}.", index, op_count);
  61. }
  62. eprintln!("client {} done.", index);
  63. (op_count, last)
  64. }
  65. const PARTITION_MAX_DELAY_MILLIS: u64 = 200;
  66. fn run_partition(cfg: Arc<Config>, stop: Arc<AtomicBool>) {
  67. while !stop.load(Ordering::Acquire) {
  68. cfg.random_partition();
  69. let delay = thread_rng().gen_range(
  70. LONG_ELECTION_TIMEOUT_MILLIS
  71. ..LONG_ELECTION_TIMEOUT_MILLIS + PARTITION_MAX_DELAY_MILLIS,
  72. );
  73. std::thread::sleep(Duration::from_millis(delay));
  74. }
  75. }
  76. #[derive(Default)]
  77. struct GenericTestParams {
  78. clients: usize,
  79. unreliable: bool,
  80. partition: bool,
  81. crash: bool,
  82. maxraftstate: Option<usize>,
  83. }
  84. fn generic_test(test_params: GenericTestParams) {
  85. let GenericTestParams {
  86. clients,
  87. unreliable,
  88. partition,
  89. crash,
  90. maxraftstate,
  91. } = test_params;
  92. let maxraftstate = maxraftstate.unwrap_or(usize::MAX);
  93. const SERVERS: usize = 5;
  94. let cfg = Arc::new(make_config(SERVERS, unreliable, maxraftstate));
  95. defer!(cfg.clean_up());
  96. cfg.begin("");
  97. let mut clerk = cfg.make_clerk();
  98. const ROUNDS: usize = 3;
  99. for _ in 0..ROUNDS {
  100. // Network partition thread.
  101. let partition_stop = Arc::new(AtomicBool::new(false));
  102. // KV server clients.
  103. let clients_stop = Arc::new(AtomicBool::new(false));
  104. let config = cfg.clone();
  105. let clients_stop_clone = clients_stop.clone();
  106. let spawn_client_results = std::thread::spawn(move || {
  107. spawn_clients(config, clients, move |index: usize, clerk: Clerk| {
  108. appending_client(index, clerk, clients_stop_clone.clone())
  109. })
  110. });
  111. let partition_result = if partition {
  112. // Let the clients perform some operations without interruption.
  113. sleep_millis(1000);
  114. let config = cfg.clone();
  115. let partition_stop_clone = partition_stop.clone();
  116. Some(std::thread::spawn(|| {
  117. run_partition(config, partition_stop_clone)
  118. }))
  119. } else {
  120. None
  121. };
  122. if crash {
  123. cfg.crash_all();
  124. sleep_election_timeouts(1);
  125. cfg.restart_all();
  126. }
  127. std::thread::sleep(Duration::from_secs(5));
  128. // Stop partitions.
  129. partition_stop.store(true, Ordering::Release);
  130. partition_result.map(|result| {
  131. result.join().expect("Partition thread should never fail");
  132. cfg.connect_all();
  133. sleep_election_timeouts(1);
  134. });
  135. // Tell all clients to stop.
  136. clients_stop.store(true, Ordering::Release);
  137. let client_results = spawn_client_results
  138. .join()
  139. .expect("Spawning clients should never fail.");
  140. for (index, client_result) in client_results.into_iter().enumerate() {
  141. let (op_count, last_result) =
  142. client_result.join().expect("Client should never fail.");
  143. let real_result = clerk
  144. .get(index.to_string())
  145. .expect(&format!("Key {} should exist.", index));
  146. assert_eq!(real_result, last_result);
  147. eprintln!("Client {} committed {} operations", index, op_count);
  148. assert!(op_count > 10, "Client committed less than 10 operations");
  149. }
  150. }
  151. cfg.end();
  152. }
  153. fn check_concurrent_results(
  154. value: String,
  155. clients: usize,
  156. expected: Vec<usize>,
  157. ) -> anyhow::Result<()> {
  158. if !value.starts_with('(') || !value.ends_with(')') {
  159. bail!("Malformed value string {}", value)
  160. }
  161. let inner_value = &value[1..value.len() - 1];
  162. let mut progress = vec![0; clients];
  163. for pair_str in inner_value.split(")(") {
  164. let mut nums = vec![];
  165. for num_str in pair_str.split(", ") {
  166. let num: usize = num_str.parse().context(format!(
  167. "Parsing '{:?}' failed within '{:?}'",
  168. num_str, value,
  169. ))?;
  170. nums.push(num);
  171. }
  172. if nums.len() != 2 {
  173. bail!(
  174. concat!(
  175. "More than two numbers in the same group when",
  176. " parsing '{:?}' failed within '{:?}'",
  177. ),
  178. pair_str,
  179. value,
  180. );
  181. }
  182. let (client, curr) = (nums[0], nums[1]);
  183. if progress[client] != curr {
  184. bail!(
  185. "Client {} failed, expecting {}, got {}, others are {:?} in {}",
  186. client,
  187. progress[client],
  188. curr,
  189. progress,
  190. value,
  191. )
  192. }
  193. progress[client] = curr + 1;
  194. }
  195. assert_eq!(progress, expected, "Expecting progress in {}", value);
  196. Ok(())
  197. }
  198. #[test]
  199. fn basic_service() {
  200. generic_test(GenericTestParams {
  201. clients: 1,
  202. ..Default::default()
  203. });
  204. }
  205. #[test]
  206. fn concurrent_client() {
  207. generic_test(GenericTestParams {
  208. clients: 5,
  209. ..Default::default()
  210. });
  211. }
  212. #[test]
  213. fn unreliable_many_clients() {
  214. generic_test(GenericTestParams {
  215. clients: 5,
  216. unreliable: true,
  217. ..Default::default()
  218. });
  219. }
  220. #[test]
  221. fn unreliable_one_key_many_clients() -> anyhow::Result<()> {
  222. const SERVERS: usize = 5;
  223. let cfg = Arc::new(make_config(SERVERS, true, 0));
  224. defer!(cfg.clean_up());
  225. let mut clerk = cfg.make_clerk();
  226. cfg.begin("Test: concurrent append to same key, unreliable (3A)");
  227. clerk.put("k", "");
  228. const CLIENTS: usize = 5;
  229. const ATTEMPTS: usize = 10;
  230. let client_results =
  231. spawn_clients(cfg.clone(), CLIENTS, |index, mut clerk| {
  232. for i in 0..ATTEMPTS {
  233. clerk.append("k", format!("({}, {})", index, i));
  234. }
  235. });
  236. for client_result in client_results {
  237. client_result.join().expect("Client should never fail");
  238. }
  239. let value = clerk.get("k").expect("Key should exist");
  240. check_concurrent_results(value, CLIENTS, vec![ATTEMPTS; CLIENTS])
  241. }
  242. #[test]
  243. fn one_partition() -> anyhow::Result<()> {
  244. const SERVERS: usize = 5;
  245. let cfg = Arc::new(make_config(SERVERS, false, 0));
  246. defer!(cfg.clean_up());
  247. cfg.begin("Test: progress in majority (3A)");
  248. const KEY: &str = "1";
  249. let mut clerk = cfg.make_clerk();
  250. clerk.put(KEY, "13");
  251. let (majority, minority) = cfg.partition();
  252. assert!(minority.len() < majority.len());
  253. assert_eq!(minority.len() + majority.len(), SERVERS);
  254. let mut clerk_majority = cfg.make_limited_clerk(&majority);
  255. let mut clerk_minority1 = cfg.make_limited_clerk(&minority);
  256. let mut clerk_minority2 = cfg.make_limited_clerk(&minority);
  257. clerk_majority.put(KEY, "14");
  258. assert_eq!(clerk_majority.get(KEY), Some("14".to_owned()));
  259. cfg.begin("Test: no progress in minority (3A)");
  260. let counter = Arc::new(AtomicUsize::new(0));
  261. let counter1 = counter.clone();
  262. std::thread::spawn(move || {
  263. clerk_minority1.put(KEY, "15");
  264. counter1.fetch_or(1, Ordering::SeqCst);
  265. });
  266. let counter2 = counter.clone();
  267. std::thread::spawn(move || {
  268. clerk_minority2.get(KEY);
  269. counter2.fetch_or(2, Ordering::SeqCst);
  270. });
  271. sleep_millis(1000);
  272. assert_eq!(counter.load(Ordering::SeqCst), 0);
  273. assert_eq!(clerk_majority.get(KEY), Some("14".to_owned()));
  274. clerk_majority.put(KEY, "16");
  275. assert_eq!(clerk_majority.get(KEY), Some("16".to_owned()));
  276. cfg.begin("Test: completion after heal (3A)");
  277. cfg.connect_all();
  278. cfg.connect_all_clerks();
  279. sleep_election_timeouts(1);
  280. for _ in 0..100 {
  281. sleep_millis(60);
  282. if counter.load(Ordering::SeqCst) == 3 {
  283. break;
  284. }
  285. }
  286. assert_eq!(counter.load(Ordering::SeqCst), 3);
  287. assert_eq!(clerk.get(KEY), Some("15".to_owned()));
  288. Ok(())
  289. }
  290. #[test]
  291. fn many_partitions_one_client() {
  292. generic_test(GenericTestParams {
  293. clients: 1,
  294. partition: true,
  295. ..Default::default()
  296. });
  297. }
  298. #[test]
  299. fn many_partitions_many_client() {
  300. generic_test(GenericTestParams {
  301. clients: 5,
  302. partition: true,
  303. ..Default::default()
  304. });
  305. }
  306. #[test]
  307. fn persist_one_client() {
  308. generic_test(GenericTestParams {
  309. clients: 1,
  310. crash: true,
  311. ..Default::default()
  312. });
  313. }
  314. #[test]
  315. fn persist_concurrent() {
  316. generic_test(GenericTestParams {
  317. clients: 5,
  318. crash: true,
  319. ..Default::default()
  320. });
  321. }
  322. #[test]
  323. fn persist_concurrent_unreliable() {
  324. generic_test(GenericTestParams {
  325. clients: 5,
  326. unreliable: true,
  327. crash: true,
  328. ..Default::default()
  329. });
  330. }
  331. #[test]
  332. fn persist_partition() {
  333. generic_test(GenericTestParams {
  334. clients: 5,
  335. partition: true,
  336. crash: true,
  337. ..Default::default()
  338. });
  339. }
  340. #[test]
  341. fn persist_partition_unreliable() {
  342. generic_test(GenericTestParams {
  343. clients: 5,
  344. unreliable: true,
  345. partition: true,
  346. crash: true,
  347. ..Default::default()
  348. });
  349. }