service_test.rs 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369
  1. #[macro_use]
  2. extern crate anyhow;
  3. extern crate kvraft;
  4. extern crate rand;
  5. #[macro_use]
  6. extern crate scopeguard;
  7. use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
  8. use std::sync::Arc;
  9. use std::thread::JoinHandle;
  10. use std::time::Duration;
  11. use anyhow::Context;
  12. use rand::{thread_rng, Rng};
  13. use kvraft::testing_utils::config::{
  14. make_config, sleep_election_timeouts, sleep_millis, Config,
  15. LONG_ELECTION_TIMEOUT_MILLIS,
  16. };
  17. use kvraft::Clerk;
  18. fn spawn_clients<T, Func>(
  19. config: Arc<Config>,
  20. clients: usize,
  21. func: Func,
  22. ) -> Vec<JoinHandle<T>>
  23. where
  24. T: 'static + Send,
  25. Func: 'static + Clone + Send + Sync + Fn(usize, Clerk) -> T,
  26. {
  27. let mut client_threads = vec![];
  28. for i in 0..clients {
  29. let clerk = config.make_clerk();
  30. let func = func.clone();
  31. client_threads.push(std::thread::spawn(move || func(i, clerk)))
  32. }
  33. eprintln!("spawning clients done.");
  34. client_threads
  35. }
  36. fn appending_client(
  37. index: usize,
  38. mut clerk: Clerk,
  39. stop: Arc<AtomicBool>,
  40. ) -> (usize, String) {
  41. eprintln!("client {} running.", index);
  42. let mut op_count = 0usize;
  43. let key = index.to_string();
  44. let mut last = String::new();
  45. let mut rng = thread_rng();
  46. clerk.put(&key, &last);
  47. while !stop.load(Ordering::Acquire) {
  48. eprintln!("client {} starting {}.", index, op_count);
  49. if rng.gen_ratio(1, 2) {
  50. let value = format!("({}, {}), ", index, op_count);
  51. last.push_str(&value);
  52. clerk.append(&key, &value);
  53. op_count += 1;
  54. } else {
  55. let value = clerk
  56. .get(&key)
  57. .expect(&format!("Key {} should exist.", index));
  58. assert_eq!(value, last);
  59. }
  60. eprintln!("client {} done {}.", index, op_count);
  61. }
  62. eprintln!("client {} done.", index);
  63. (op_count, last)
  64. }
  65. const PARTITION_MAX_DELAY_MILLIS: u64 = 200;
  66. fn run_partition(cfg: Arc<Config>, stop: Arc<AtomicBool>) {
  67. while !stop.load(Ordering::Acquire) {
  68. let mut indexes = cfg.shuffled_indexes();
  69. let len = indexes.len();
  70. cfg.partition(&(indexes.split_off(len / 2)), &indexes);
  71. let delay = thread_rng().gen_range(
  72. LONG_ELECTION_TIMEOUT_MILLIS
  73. ..LONG_ELECTION_TIMEOUT_MILLIS + PARTITION_MAX_DELAY_MILLIS,
  74. );
  75. std::thread::sleep(Duration::from_millis(delay));
  76. }
  77. }
  78. #[derive(Default)]
  79. struct GenericTestParams {
  80. clients: usize,
  81. unreliable: bool,
  82. partition: bool,
  83. crash: bool,
  84. maxraftstate: Option<usize>,
  85. }
  86. fn generic_test(test_params: GenericTestParams) {
  87. let GenericTestParams {
  88. clients,
  89. unreliable,
  90. partition,
  91. crash,
  92. maxraftstate,
  93. } = test_params;
  94. let maxraftstate = maxraftstate.unwrap_or(usize::MAX);
  95. const SERVERS: usize = 5;
  96. let cfg = Arc::new(make_config(SERVERS, unreliable, maxraftstate));
  97. defer!(cfg.clean_up());
  98. cfg.begin("");
  99. let mut clerk = cfg.make_clerk();
  100. const ROUNDS: usize = 3;
  101. for _ in 0..ROUNDS {
  102. // Network partition thread.
  103. let partition_stop = Arc::new(AtomicBool::new(false));
  104. // KV server clients.
  105. let clients_stop = Arc::new(AtomicBool::new(false));
  106. let config = cfg.clone();
  107. let clients_stop_clone = clients_stop.clone();
  108. let spawn_client_results = std::thread::spawn(move || {
  109. spawn_clients(config, clients, move |index: usize, clerk: Clerk| {
  110. appending_client(index, clerk, clients_stop_clone.clone())
  111. })
  112. });
  113. let partition_result = if partition {
  114. let config = cfg.clone();
  115. let partition_stop_clone = partition_stop.clone();
  116. Some(std::thread::spawn(|| {
  117. run_partition(config, partition_stop_clone)
  118. }))
  119. } else {
  120. None
  121. };
  122. if crash {
  123. cfg.crash_all();
  124. sleep_election_timeouts(1);
  125. cfg.restart_all();
  126. }
  127. std::thread::sleep(Duration::from_secs(5));
  128. // Stop partitions.
  129. partition_stop.store(true, Ordering::Release);
  130. partition_result.map(|result| {
  131. result.join().expect("Partition thread should never fail");
  132. cfg.connect_all();
  133. sleep_election_timeouts(1);
  134. });
  135. // Tell all clients to stop.
  136. clients_stop.store(true, Ordering::Release);
  137. let client_results = spawn_client_results
  138. .join()
  139. .expect("Spawning clients should never fail.");
  140. for (index, client_result) in client_results.into_iter().enumerate() {
  141. let (op_count, last_result) =
  142. client_result.join().expect("Client should never fail.");
  143. let real_result = clerk
  144. .get(index.to_string())
  145. .expect(&format!("Key {} should exist.", index));
  146. assert_eq!(real_result, last_result);
  147. assert!(
  148. op_count > 10,
  149. "Client committed only {} operations",
  150. op_count
  151. );
  152. }
  153. }
  154. cfg.end();
  155. }
  156. fn check_concurrent_results(
  157. value: String,
  158. clients: usize,
  159. expected: Vec<usize>,
  160. ) -> anyhow::Result<()> {
  161. if !value.starts_with('(') || !value.ends_with(')') {
  162. bail!("Malformed value string {}", value)
  163. }
  164. let inner_value = &value[1..value.len() - 1];
  165. let mut progress = vec![0; clients];
  166. for pair_str in inner_value.split(")(") {
  167. let mut nums = vec![];
  168. for num_str in pair_str.split(", ") {
  169. let num: usize = num_str.parse().context(format!(
  170. "Parsing '{:?}' failed within '{:?}'",
  171. num_str, value,
  172. ))?;
  173. nums.push(num);
  174. }
  175. if nums.len() != 2 {
  176. bail!(
  177. concat!(
  178. "More than two numbers in the same group when",
  179. " parsing '{:?}' failed within '{:?}'",
  180. ),
  181. pair_str,
  182. value,
  183. );
  184. }
  185. let (client, curr) = (nums[0], nums[1]);
  186. if progress[client] != curr {
  187. bail!(
  188. "Client {} failed, expecting {}, got {}, others are {:?} in {}",
  189. client,
  190. progress[client],
  191. curr,
  192. progress,
  193. value,
  194. )
  195. }
  196. progress[client] = curr + 1;
  197. }
  198. assert_eq!(progress, expected, "Expecting progress in {}", value);
  199. Ok(())
  200. }
  201. #[test]
  202. fn basic_service() {
  203. generic_test(GenericTestParams {
  204. clients: 1,
  205. ..Default::default()
  206. });
  207. }
  208. #[test]
  209. fn concurrent_client() {
  210. generic_test(GenericTestParams {
  211. clients: 5,
  212. ..Default::default()
  213. });
  214. }
  215. #[test]
  216. fn unreliable_many_clients() {
  217. generic_test(GenericTestParams {
  218. clients: 5,
  219. unreliable: true,
  220. ..Default::default()
  221. });
  222. }
  223. #[test]
  224. fn unreliable_one_key_many_clients() -> anyhow::Result<()> {
  225. const SERVERS: usize = 5;
  226. let cfg = Arc::new(make_config(SERVERS, true, 0));
  227. defer!(cfg.clean_up());
  228. let mut clerk = cfg.make_clerk();
  229. cfg.begin("Test: concurrent append to same key, unreliable (3A)");
  230. clerk.put("k", "");
  231. const CLIENTS: usize = 5;
  232. const ATTEMPTS: usize = 10;
  233. let client_results =
  234. spawn_clients(cfg.clone(), CLIENTS, |index, mut clerk| {
  235. for i in 0..ATTEMPTS {
  236. clerk.append("k", format!("({}, {})", index, i));
  237. }
  238. });
  239. for client_result in client_results {
  240. client_result.join().expect("Client should never fail");
  241. }
  242. let value = clerk.get("k").expect("Key should exist");
  243. check_concurrent_results(value, CLIENTS, vec![ATTEMPTS; CLIENTS])
  244. }
  245. #[test]
  246. fn one_partition() -> anyhow::Result<()> {
  247. const SERVERS: usize = 5;
  248. let cfg = Arc::new(make_config(SERVERS, false, 0));
  249. defer!(cfg.clean_up());
  250. cfg.begin("Test: progress in majority (3A)");
  251. const KEY: &str = "1";
  252. let mut clerk = cfg.make_clerk();
  253. clerk.put(KEY, "13");
  254. let (majority, minority) = cfg.make_partition();
  255. assert!(minority.len() < majority.len());
  256. assert_eq!(minority.len() + majority.len(), SERVERS);
  257. cfg.partition(&majority, &minority);
  258. let mut clerk_majority = cfg.make_limited_clerk(&majority);
  259. let mut clerk_minority1 = cfg.make_limited_clerk(&minority);
  260. let mut clerk_minority2 = cfg.make_limited_clerk(&minority);
  261. clerk_majority.put(KEY, "14");
  262. assert_eq!(clerk_majority.get(KEY), Some("14".to_owned()));
  263. cfg.begin("Test: no progress in minority (3A)");
  264. let counter = Arc::new(AtomicUsize::new(0));
  265. let counter1 = counter.clone();
  266. std::thread::spawn(move || {
  267. clerk_minority1.put(KEY, "15");
  268. counter1.fetch_or(1, Ordering::SeqCst);
  269. });
  270. let counter2 = counter.clone();
  271. std::thread::spawn(move || {
  272. clerk_minority2.get(KEY);
  273. counter2.fetch_or(2, Ordering::SeqCst);
  274. });
  275. sleep_millis(1000);
  276. assert_eq!(counter.load(Ordering::SeqCst), 0);
  277. assert_eq!(clerk_majority.get(KEY), Some("14".to_owned()));
  278. clerk_majority.put(KEY, "16");
  279. assert_eq!(clerk_majority.get(KEY), Some("16".to_owned()));
  280. cfg.begin("Test: completion after heal (3A)");
  281. cfg.connect_all();
  282. cfg.connect_all_clerks();
  283. sleep_election_timeouts(1);
  284. for _ in 0..100 {
  285. sleep_millis(60);
  286. if counter.load(Ordering::SeqCst) == 3 {
  287. break;
  288. }
  289. }
  290. assert_eq!(counter.load(Ordering::SeqCst), 3);
  291. assert_eq!(clerk.get(KEY), Some("15".to_owned()));
  292. Ok(())
  293. }
  294. #[test]
  295. fn many_partitions_one_client() {
  296. generic_test(GenericTestParams {
  297. clients: 1,
  298. partition: true,
  299. ..Default::default()
  300. });
  301. }
  302. #[test]
  303. fn many_partitions_many_client() {
  304. generic_test(GenericTestParams {
  305. clients: 5,
  306. partition: true,
  307. ..Default::default()
  308. });
  309. }
  310. #[test]
  311. fn persist_one_client() {
  312. generic_test(GenericTestParams {
  313. clients: 1,
  314. crash: true,
  315. ..Default::default()
  316. });
  317. }