service_test.rs 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351
  1. #[macro_use]
  2. extern crate anyhow;
  3. extern crate kvraft;
  4. extern crate rand;
  5. #[macro_use]
  6. extern crate scopeguard;
  7. use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
  8. use std::sync::Arc;
  9. use std::thread::JoinHandle;
  10. use std::time::Duration;
  11. use anyhow::Context;
  12. use rand::{thread_rng, Rng};
  13. use kvraft::testing_utils::config::{
  14. make_config, sleep_election_timeouts, sleep_millis, Config,
  15. LONG_ELECTION_TIMEOUT_MILLIS,
  16. };
  17. use kvraft::Clerk;
  18. fn spawn_clients<T, Func>(
  19. config: Arc<Config>,
  20. clients: usize,
  21. func: Func,
  22. ) -> Vec<JoinHandle<T>>
  23. where
  24. T: 'static + Send,
  25. Func: 'static + Clone + Send + Sync + Fn(usize, Clerk) -> T,
  26. {
  27. let mut client_threads = vec![];
  28. for i in 0..clients {
  29. let clerk = config.make_clerk();
  30. let func = func.clone();
  31. client_threads.push(std::thread::spawn(move || func(i, clerk)))
  32. }
  33. eprintln!("spawning clients done.");
  34. client_threads
  35. }
  36. fn appending_client(
  37. index: usize,
  38. mut clerk: Clerk,
  39. stop: Arc<AtomicBool>,
  40. ) -> (usize, String) {
  41. eprintln!("client {} running.", index);
  42. let mut op_count = 0usize;
  43. let key = index.to_string();
  44. let mut last = String::new();
  45. let mut rng = thread_rng();
  46. clerk.put(&key, &last);
  47. while !stop.load(Ordering::Acquire) {
  48. eprintln!("client {} starting {}.", index, op_count);
  49. if rng.gen_ratio(1, 2) {
  50. let value = format!("({}, {}), ", index, op_count);
  51. last.push_str(&value);
  52. clerk.append(&key, &value);
  53. op_count += 1;
  54. } else {
  55. let value = clerk
  56. .get(&key)
  57. .expect(&format!("Key {} should exist.", index));
  58. assert_eq!(value, last);
  59. }
  60. eprintln!("client {} done {}.", index, op_count);
  61. }
  62. eprintln!("client {} done.", index);
  63. (op_count, last)
  64. }
  65. const PARTITION_MAX_DELAY_MILLIS: u64 = 200;
  66. fn run_partition(cfg: Arc<Config>, stop: Arc<AtomicBool>) {
  67. while !stop.load(Ordering::Acquire) {
  68. let mut indexes = cfg.shuffled_indexes();
  69. let len = indexes.len();
  70. cfg.partition(&(indexes.split_off(len / 2)), &indexes);
  71. let delay = thread_rng().gen_range(
  72. LONG_ELECTION_TIMEOUT_MILLIS
  73. ..LONG_ELECTION_TIMEOUT_MILLIS + PARTITION_MAX_DELAY_MILLIS,
  74. );
  75. std::thread::sleep(Duration::from_millis(delay));
  76. }
  77. }
  78. #[derive(Default)]
  79. struct GenericTestParams {
  80. clients: usize,
  81. unreliable: bool,
  82. partition: bool,
  83. maxraftstate: Option<usize>,
  84. }
  85. fn generic_test(test_params: GenericTestParams) {
  86. let GenericTestParams {
  87. clients,
  88. unreliable,
  89. partition,
  90. maxraftstate,
  91. } = test_params;
  92. let maxraftstate = maxraftstate.unwrap_or(usize::MAX);
  93. const SERVERS: usize = 5;
  94. let cfg = Arc::new(make_config(SERVERS, unreliable, maxraftstate));
  95. defer!(cfg.clean_up());
  96. cfg.begin("");
  97. let mut clerk = cfg.make_clerk();
  98. const ROUNDS: usize = 3;
  99. for _ in 0..ROUNDS {
  100. // Network partition thread.
  101. let partition_stop = Arc::new(AtomicBool::new(false));
  102. // KV server clients.
  103. let clients_stop = Arc::new(AtomicBool::new(false));
  104. let config = cfg.clone();
  105. let clients_stop_clone = clients_stop.clone();
  106. let spawn_client_results = std::thread::spawn(move || {
  107. spawn_clients(config, clients, move |index: usize, clerk: Clerk| {
  108. appending_client(index, clerk, clients_stop_clone.clone())
  109. })
  110. });
  111. let partition_result = if partition {
  112. let config = cfg.clone();
  113. let partition_stop_clone = partition_stop.clone();
  114. Some(std::thread::spawn(|| {
  115. run_partition(config, partition_stop_clone)
  116. }))
  117. } else {
  118. None
  119. };
  120. std::thread::sleep(Duration::from_secs(5));
  121. // Stop partitions.
  122. partition_stop.store(true, Ordering::Release);
  123. partition_result.map(|result| {
  124. result.join().expect("Partition thread should never fail");
  125. cfg.connect_all();
  126. sleep_election_timeouts(1);
  127. });
  128. // Tell all clients to stop.
  129. clients_stop.store(true, Ordering::Release);
  130. let client_results = spawn_client_results
  131. .join()
  132. .expect("Spawning clients should never fail.");
  133. for (index, client_result) in client_results.into_iter().enumerate() {
  134. let (op_count, last_result) =
  135. client_result.join().expect("Client should never fail.");
  136. let real_result = clerk
  137. .get(index.to_string())
  138. .expect(&format!("Key {} should exist.", index));
  139. assert_eq!(real_result, last_result);
  140. assert!(
  141. op_count > 10,
  142. "Client committed only {} operations",
  143. op_count
  144. );
  145. }
  146. }
  147. cfg.end();
  148. }
  149. fn check_concurrent_results(
  150. value: String,
  151. clients: usize,
  152. expected: Vec<usize>,
  153. ) -> anyhow::Result<()> {
  154. if !value.starts_with('(') || !value.ends_with(')') {
  155. bail!("Malformed value string {}", value)
  156. }
  157. let inner_value = &value[1..value.len() - 1];
  158. let mut progress = vec![0; clients];
  159. for pair_str in inner_value.split(")(") {
  160. let mut nums = vec![];
  161. for num_str in pair_str.split(", ") {
  162. let num: usize = num_str.parse().context(format!(
  163. "Parsing '{:?}' failed within '{:?}'",
  164. num_str, value,
  165. ))?;
  166. nums.push(num);
  167. }
  168. if nums.len() != 2 {
  169. bail!(
  170. concat!(
  171. "More than two numbers in the same group when",
  172. " parsing '{:?}' failed within '{:?}'",
  173. ),
  174. pair_str,
  175. value,
  176. );
  177. }
  178. let (client, curr) = (nums[0], nums[1]);
  179. if progress[client] != curr {
  180. bail!(
  181. "Client {} failed, expecting {}, got {}, others are {:?} in {}",
  182. client,
  183. progress[client],
  184. curr,
  185. progress,
  186. value,
  187. )
  188. }
  189. progress[client] = curr + 1;
  190. }
  191. assert_eq!(progress, expected, "Expecting progress in {}", value);
  192. Ok(())
  193. }
  194. #[test]
  195. fn basic_service() {
  196. generic_test(GenericTestParams {
  197. clients: 1,
  198. ..Default::default()
  199. });
  200. }
  201. #[test]
  202. fn concurrent_client() {
  203. generic_test(GenericTestParams {
  204. clients: 5,
  205. ..Default::default()
  206. });
  207. }
  208. #[test]
  209. fn unreliable_many_clients() {
  210. generic_test(GenericTestParams {
  211. clients: 5,
  212. unreliable: true,
  213. ..Default::default()
  214. });
  215. }
  216. #[test]
  217. fn unreliable_one_key_many_clients() -> anyhow::Result<()> {
  218. const SERVERS: usize = 5;
  219. let cfg = Arc::new(make_config(SERVERS, true, 0));
  220. defer!(cfg.clean_up());
  221. let mut clerk = cfg.make_clerk();
  222. cfg.begin("Test: concurrent append to same key, unreliable (3A)");
  223. clerk.put("k", "");
  224. const CLIENTS: usize = 5;
  225. const ATTEMPTS: usize = 10;
  226. let client_results = spawn_clients(cfg, CLIENTS, |index, mut clerk| {
  227. for i in 0..ATTEMPTS {
  228. clerk.append("k", format!("({}, {})", index, i));
  229. }
  230. });
  231. for client_result in client_results {
  232. client_result.join().expect("Client should never fail");
  233. }
  234. let value = clerk.get("k").expect("Key should exist");
  235. check_concurrent_results(value, CLIENTS, vec![ATTEMPTS; CLIENTS])
  236. }
  237. #[test]
  238. fn one_partition() -> anyhow::Result<()> {
  239. const SERVERS: usize = 5;
  240. let cfg = Arc::new(make_config(SERVERS, false, 0));
  241. defer!(cfg.clean_up());
  242. cfg.begin("Test: progress in majority (3A)");
  243. const KEY: &str = "1";
  244. let mut clerk = cfg.make_clerk();
  245. clerk.put(KEY, "13");
  246. let (majority, minority) = cfg.make_partition();
  247. assert!(minority.len() < majority.len());
  248. assert_eq!(minority.len() + majority.len(), SERVERS);
  249. cfg.partition(&majority, &minority);
  250. let mut clerk_majority = cfg.make_limited_clerk(&majority);
  251. let mut clerk_minority1 = cfg.make_limited_clerk(&minority);
  252. let mut clerk_minority2 = cfg.make_limited_clerk(&minority);
  253. clerk_majority.put(KEY, "14");
  254. assert_eq!(clerk_majority.get(KEY), Some("14".to_owned()));
  255. cfg.begin("Test: no progress in minority (3A)");
  256. let counter = Arc::new(AtomicUsize::new(0));
  257. let counter1 = counter.clone();
  258. std::thread::spawn(move || {
  259. clerk_minority1.put(KEY, "15");
  260. counter1.fetch_or(1, Ordering::SeqCst);
  261. });
  262. let counter2 = counter.clone();
  263. std::thread::spawn(move || {
  264. clerk_minority2.get(KEY);
  265. counter2.fetch_or(2, Ordering::SeqCst);
  266. });
  267. sleep_millis(1000);
  268. assert_eq!(counter.load(Ordering::SeqCst), 0);
  269. assert_eq!(clerk_majority.get(KEY), Some("14".to_owned()));
  270. clerk_majority.put(KEY, "16");
  271. assert_eq!(clerk_majority.get(KEY), Some("16".to_owned()));
  272. cfg.begin("Test: completion after heal (3A)");
  273. cfg.connect_all();
  274. cfg.connect_all_clerks();
  275. sleep_election_timeouts(1);
  276. for _ in 0..100 {
  277. sleep_millis(60);
  278. if counter.load(Ordering::SeqCst) == 3 {
  279. break;
  280. }
  281. }
  282. assert_eq!(counter.load(Ordering::SeqCst), 3);
  283. assert_eq!(clerk.get(KEY), Some("15".to_owned()));
  284. Ok(())
  285. }
  286. #[test]
  287. fn many_partitions_one_client() {
  288. generic_test(GenericTestParams {
  289. clients: 1,
  290. partition: true,
  291. ..Default::default()
  292. });
  293. }
  294. #[test]
  295. fn many_partitions_many_client() {
  296. generic_test(GenericTestParams {
  297. clients: 5,
  298. partition: true,
  299. ..Default::default()
  300. });
  301. }