From c83cc07b11a9e8a55b69be717fc9cc26ffd3509b Mon Sep 17 00:00:00 2001 From: Gina Yuan Date: Sat, 15 Sep 2018 10:45:54 -0400 Subject: [PATCH 01/41] Create 3 replicas per reader node in migration --- src/controller/migrate/mod.rs | 63 ++++++++++++++++++++++------------- src/integration.rs | 6 ++-- 2 files changed, 44 insertions(+), 25 deletions(-) diff --git a/src/controller/migrate/mod.rs b/src/controller/migrate/mod.rs index dbaed03ec..0338a9c07 100644 --- a/src/controller/migrate/mod.rs +++ b/src/controller/migrate/mod.rs @@ -44,6 +44,9 @@ pub(super) enum ColumnChange { Drop(usize), } +/// The number of replicas per reader node, including the original reader node. +pub const NUM_READER_REPLICAS: usize = 3; + /// A `Migration` encapsulates a number of changes to the Soup data flow graph. /// /// Only one `Migration` can be in effect at any point in time. No changes are made to the running @@ -52,7 +55,7 @@ pub struct Migration<'a> { pub(super) mainline: &'a mut ControllerInner, pub(super) added: Vec, pub(super) columns: Vec<(NodeIndex, ColumnChange)>, - pub(super) readers: HashMap, + pub(super) readers: HashMap>, pub(super) start: Instant, pub(super) log: slog::Logger, @@ -210,16 +213,24 @@ impl<'a> Migration<'a> { fn ensure_reader_for(&mut self, n: NodeIndex, name: Option) { if !self.readers.contains_key(&n) { - // make a reader - let r = node::special::Reader::new(n); - let r = if let Some(name) = name { - self.mainline.ingredients[n].named_mirror(r, name) - } else { - self.mainline.ingredients[n].mirror(r) - }; - let r = self.mainline.ingredients.add_node(r); - self.mainline.ingredients.add_edge(n, r, ()); - self.readers.insert(n, r); + // make a reader and its replicas + let mut readers = vec![]; + for i in 0..NUM_READER_REPLICAS { + let r = node::special::Reader::new(n); + let r = if let Some(name) = name.clone() { + if i == 0 { + self.mainline.ingredients[n].named_mirror(r, name) + } else { + self.mainline.ingredients[n].named_mirror(r, format!("{}_r{}", name, i)) + } + } else { + self.mainline.ingredients[n].mirror(r) + }; + let r = self.mainline.ingredients.add_node(r); + self.mainline.ingredients.add_edge(n, r, ()); + readers.push(r); + } + self.readers.insert(n, readers); } } @@ -227,15 +238,17 @@ impl<'a> Migration<'a> { /// /// To query into the maintained state, use `ControllerInner::get_getter`. #[cfg(test)] - pub fn maintain_anonymous(&mut self, n: NodeIndex, key: &[usize]) -> NodeIndex { + pub fn maintain_anonymous(&mut self, n: NodeIndex, key: &[usize]) -> Vec { self.ensure_reader_for(n, None); - let ri = self.readers[&n]; + let ris = &self.readers[&n]; - self.mainline.ingredients[ri] - .with_reader_mut(|r| r.set_key(key)) - .unwrap(); + for ri in ris { + self.mainline.ingredients[*ri] + .with_reader_mut(|r| r.set_key(key)) + .unwrap(); + } - ri + ris.to_vec() } /// Set up the given node such that its output can be efficiently queried. @@ -244,11 +257,13 @@ impl<'a> Migration<'a> { pub fn maintain(&mut self, name: String, n: NodeIndex, key: &[usize]) { self.ensure_reader_for(n, Some(name)); - let ri = self.readers[&n]; + let ris = &self.readers[&n]; - self.mainline.ingredients[ri] - .with_reader_mut(|r| r.set_key(key)) - .unwrap(); + for ri in ris { + self.mainline.ingredients[*ri] + .with_reader_mut(|r| r.set_key(key)) + .unwrap(); + } } /// Commit the changes introduced by this `Migration` to the master `Soup`. @@ -265,8 +280,10 @@ impl<'a> Migration<'a> { let mut new: HashSet<_> = self.added.into_iter().collect(); // Readers are nodes too. - for (_parent, reader) in self.readers { - new.insert(reader); + for (_parent, readers) in self.readers { + for reader in readers { + new.insert(reader); + } } // Shard the graph as desired diff --git a/src/integration.rs b/src/integration.rs index 8a0ee6fd9..2906aa65d 100644 --- a/src/integration.rs +++ b/src/integration.rs @@ -2188,7 +2188,7 @@ fn node_removal() { 1, )); let mut g = b.build_local().unwrap(); - let cid = g.migrate(|mig| { + let cids = g.migrate(|mig| { let a = mig.add_base("a", &["a", "b"], Base::new(vec![]).with_key(vec![0])); let b = mig.add_base("b", &["a", "b"], Base::new(vec![]).with_key(vec![0])); @@ -2220,7 +2220,9 @@ fn node_removal() { vec![vec![1.into(), 2.into()]] ); - g.remove_node(cid).unwrap(); + for cid in cids { + g.remove_node(cid).unwrap(); + } // update value again mutb.insert(vec![id.clone(), 4.into()]).unwrap(); From 3716541480e8db923926ea94c862051ccaf843f3 Mon Sep 17 00:00:00 2001 From: Gina Yuan Date: Sat, 15 Sep 2018 10:57:04 -0400 Subject: [PATCH 02/41] Remove reader node replicas in apply recipe --- src/controller/inner.rs | 36 +++++++++++++++++++++++------------- src/integration.rs | 5 +++-- 2 files changed, 26 insertions(+), 15 deletions(-) diff --git a/src/controller/inner.rs b/src/controller/inner.rs index cc6bd4310..7862593f6 100644 --- a/src/controller/inner.rs +++ b/src/controller/inner.rs @@ -13,6 +13,7 @@ use std::{io, time}; use api::builders::*; use api::ActivationResult; use crate::controller::migrate::materialization::Materializations; +use crate::controller::migrate::NUM_READER_REPLICAS; use crate::controller::{ControllerState, DomainHandle, Migration, Recipe, WorkerIdentifier}; use crate::coordination::CoordinationMessage; @@ -898,7 +899,7 @@ impl ControllerInner { graphviz(&self.ingredients, &self.materializations) } - fn remove_leaf(&mut self, mut leaf: NodeIndex) -> Result<(), String> { + fn remove_leaf(&mut self, leaf: NodeIndex) -> Result<(), String> { let mut removals = vec![]; let start = leaf; assert!(!self.ingredients[leaf].is_source()); @@ -909,6 +910,7 @@ impl ControllerInner { leaf.index() ); + let mut nodes = vec![]; if self .ingredients .neighbors_directed(leaf, petgraph::EdgeDirection::Outgoing) @@ -938,30 +940,38 @@ impl ControllerInner { ); unreachable!(); } - // nodes can have only one reader attached - assert!(readers.len() <= 1); + // nodes can only have as many readers as the number of reader replicas + assert!(readers.len() <= NUM_READER_REPLICAS); debug!( self.log, "Removing query leaf \"{}\"", self.ingredients[leaf].name(); "node" => leaf.index(), ); if !readers.is_empty() { - removals.push(readers[0]); - leaf = readers[0]; + for reader in readers { + removals.push(reader); + nodes.push(reader); + } } else { unreachable!(); } } - // `node` now does not have any children any more - assert_eq!( - self.ingredients - .neighbors_directed(leaf, petgraph::EdgeDirection::Outgoing) - .count(), - 0 - ); + // If the start node didn't have any children, it can be removed immediately + if nodes.is_empty() { + nodes.push(leaf); + } + + // The nodes we remove first do not have children any more + for node in &nodes { + assert_eq!( + self.ingredients + .neighbors_directed(*node, petgraph::EdgeDirection::Outgoing) + .count(), + 0 + ); + } - let mut nodes = vec![leaf]; while let Some(node) = nodes.pop() { let mut parents = self .ingredients diff --git a/src/integration.rs b/src/integration.rs index 2906aa65d..38514b19b 100644 --- a/src/integration.rs +++ b/src/integration.rs @@ -1,5 +1,6 @@ use basics::DataType; use consensus::LocalAuthority; +use crate::controller::migrate::NUM_READER_REPLICAS; use crate::controller::recipe::Recipe; use crate::controller::sql::SqlIncorporator; use crate::controller::{ControllerBuilder, LocalControllerHandle}; @@ -2260,7 +2261,7 @@ fn remove_query() { let mut g = ControllerBuilder::default().build_local().unwrap(); g.install_recipe(r_txt).unwrap(); assert_eq!(g.inputs().unwrap().len(), 1); - assert_eq!(g.outputs().unwrap().len(), 2); + assert_eq!(g.outputs().unwrap().len(), 2 * NUM_READER_REPLICAS); let mut mutb = g.table("b").unwrap(); let mut qa = g.view("qa").unwrap(); @@ -2277,7 +2278,7 @@ fn remove_query() { // Remove qb and check that the graph still functions as expected. g.install_recipe(r2_txt).unwrap(); assert_eq!(g.inputs().unwrap().len(), 1); - assert_eq!(g.outputs().unwrap().len(), 1); + assert_eq!(g.outputs().unwrap().len(), 1 * NUM_READER_REPLICAS); assert!(g.view("qb").is_err()); mutb.insert(vec![42.into(), "6".into(), "7".into()]) From f421f9c637ddd959fad1f535b41921b80f4608fb Mon Sep 17 00:00:00 2001 From: Gina Yuan Date: Sat, 15 Sep 2018 11:04:26 -0400 Subject: [PATCH 03/41] Hardcode number of expected nodes to pass tests --- src/controller/sql/mod.rs | 24 +++++++++++++----------- src/integration.rs | 4 ++-- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/src/controller/sql/mod.rs b/src/controller/sql/mod.rs index b6a9b711a..f10b84058 100644 --- a/src/controller/sql/mod.rs +++ b/src/controller/sql/mod.rs @@ -933,6 +933,8 @@ impl<'a> ToFlowParts for &'a str { #[cfg(test)] mod tests { use super::{SqlIncorporator, ToFlowParts}; + // TODO(ygina): don't hardcode number of replicas + use crate::controller::migrate::NUM_READER_REPLICAS; use crate::controller::Migration; use crate::integration; use dataflow::prelude::*; @@ -993,7 +995,7 @@ mod tests { ); // Should now have source, "users", a leaf projection node for the new selection, and // a reader node - assert_eq!(mig.graph().node_count(), ncount + 2); + assert_eq!(mig.graph().node_count(), ncount + NUM_READER_REPLICAS + 1); // Invalid query should fail parsing and add no nodes assert!( @@ -1002,7 +1004,7 @@ mod tests { .is_err() ); // Should still only have source, "users" and the two nodes for the above selection - assert_eq!(mig.graph().node_count(), ncount + 2); + assert_eq!(mig.graph().node_count(), ncount + NUM_READER_REPLICAS + 1); }); } @@ -1128,7 +1130,7 @@ mod tests { ); assert!(res.is_ok()); // added the aggregation and the edge view, and a reader - assert_eq!(mig.graph().node_count(), 5); + assert_eq!(mig.graph().node_count(), NUM_READER_REPLICAS + 4); // check aggregation view let f = Box::new(FunctionExpression::Count( Column::from("votes.userid"), @@ -1177,7 +1179,7 @@ mod tests { let qfp = res.unwrap(); assert_eq!(qfp.new_nodes.len(), 2); // expect three new nodes: filter, project, reader - assert_eq!(mig.graph().node_count(), ncount + 3); + assert_eq!(mig.graph().node_count(), ncount + NUM_READER_REPLICAS + 2); // should have ended up with a different leaf node assert_ne!(qfp.query_leaf, leaf); }); @@ -1222,7 +1224,7 @@ mod tests { assert!(res.is_ok()); // should have added two more nodes (project and reader) let qfp = res.unwrap(); - assert_eq!(mig.graph().node_count(), ncount + 2); + assert_eq!(mig.graph().node_count(), ncount + NUM_READER_REPLICAS + 1); // should NOT have ended up with the same leaf node assert_ne!(qfp.query_leaf, leaf); }); @@ -1267,7 +1269,7 @@ mod tests { assert!(res.is_ok()); // should have added two more nodes: one identity node and one reader node let qfp = res.unwrap(); - assert_eq!(mig.graph().node_count(), ncount + 2); + assert_eq!(mig.graph().node_count(), ncount + NUM_READER_REPLICAS + 1); // only the identity node is returned in the vector of new nodes assert_eq!(qfp.new_nodes.len(), 1); assert_eq!(get_node(&inc, mig, &qfp.name).description(), "≡"); @@ -1286,7 +1288,7 @@ mod tests { assert!(res.is_ok()); // should have added two more nodes: one projection node and one reader node let qfp = res.unwrap(); - assert_eq!(mig.graph().node_count(), ncount + 2); + assert_eq!(mig.graph().node_count(), ncount + NUM_READER_REPLICAS + 1); // only the projection node is returned in the vector of new nodes assert_eq!(qfp.new_nodes.len(), 1); assert_eq!(get_node(&inc, mig, &qfp.name).description(), "π[0, 1, 2]"); @@ -1341,7 +1343,7 @@ mod tests { assert!(res.is_ok()); // should have added three more nodes: a join, a projection, and a reader let qfp = res.unwrap(); - assert_eq!(mig.graph().node_count(), ncount + 3); + assert_eq!(mig.graph().node_count(), ncount + NUM_READER_REPLICAS + 2); // only the join and projection nodes are returned in the vector of new nodes assert_eq!(qfp.new_nodes.len(), 2); }); @@ -1367,7 +1369,7 @@ mod tests { let res = inc.add_query("SELECT COUNT(votes.userid) AS count FROM votes;", None, mig); assert!(res.is_ok()); // added the aggregation, a project helper, the edge view, and reader - assert_eq!(mig.graph().node_count(), 6); + assert_eq!(mig.graph().node_count(), NUM_READER_REPLICAS + 5); // check project helper node let f = Box::new(FunctionExpression::Count( Column::from("votes.userid"), @@ -1423,7 +1425,7 @@ mod tests { ); assert!(res.is_ok()); // added the aggregation, a project helper, the edge view, and reader - assert_eq!(mig.graph().node_count(), 5); + assert_eq!(mig.graph().node_count(), NUM_READER_REPLICAS + 4); // check aggregation view let f = Box::new(FunctionExpression::Count(Column::from("votes.aid"), false)); let qid = query_id_hash( @@ -1788,7 +1790,7 @@ mod tests { // should NOT have ended up with the same leaf node assert_ne!(qfp.query_leaf, leaf); // should have added three more nodes (filter, project and reader) - assert_eq!(mig.graph().node_count(), ncount + 3); + assert_eq!(mig.graph().node_count(), ncount + NUM_READER_REPLICAS + 2); }); } diff --git a/src/integration.rs b/src/integration.rs index 38514b19b..8964f299c 100644 --- a/src/integration.rs +++ b/src/integration.rs @@ -2044,7 +2044,7 @@ fn recipe_activates_and_migrates() { // still one base node assert_eq!(g.inputs().unwrap().len(), 1); // two leaf nodes - assert_eq!(g.outputs().unwrap().len(), 2); + assert_eq!(g.outputs().unwrap().len(), 2 * NUM_READER_REPLICAS); } #[test] @@ -2064,7 +2064,7 @@ fn recipe_activates_and_migrates_with_join() { // still two base nodes assert_eq!(g.inputs().unwrap().len(), 2); // one leaf node - assert_eq!(g.outputs().unwrap().len(), 1); + assert_eq!(g.outputs().unwrap().len(), 1 * NUM_READER_REPLICAS); } fn test_queries(test: &str, file: &'static str, shard: bool, reuse: bool, log: bool) { From fcfa832c723edebdd42c4daec72f12a7b7b6bcde Mon Sep 17 00:00:00 2001 From: Gina Yuan Date: Sat, 15 Sep 2018 13:52:52 -0400 Subject: [PATCH 04/41] Consider all replicas when returning a view --- dataflow/src/node/special/reader.rs | 11 ++++++++++- src/controller/inner.rs | 15 +++++++++++---- src/controller/migrate/mod.rs | 2 +- 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/dataflow/src/node/special/reader.rs b/dataflow/src/node/special/reader.rs index b5a8261b9..5fdfc7391 100644 --- a/dataflow/src/node/special/reader.rs +++ b/dataflow/src/node/special/reader.rs @@ -35,6 +35,8 @@ pub struct Reader { streamers: Vec>>, for_node: NodeIndex, + // TODO(ygina): derive number of replicas directly from graph structure + replicas: usize, state: Option>, } @@ -46,17 +48,19 @@ impl Clone for Reader { streamers: self.streamers.clone(), state: self.state.clone(), for_node: self.for_node, + replicas: self.replicas, } } } impl Reader { - pub fn new(for_node: NodeIndex) -> Self { + pub fn new(for_node: NodeIndex, replicas: usize) -> Self { Reader { writer: None, streamers: Vec::new(), state: None, for_node, + replicas, } } @@ -66,6 +70,10 @@ impl Reader { self.for_node } + pub fn replicas(&self) -> usize { + self.replicas + } + #[allow(dead_code)] pub(crate) fn writer(&self) -> Option<&backlog::WriteHandle> { self.writer.as_ref() @@ -82,6 +90,7 @@ impl Reader { streamers: mem::replace(&mut self.streamers, Vec::new()), state: self.state.clone(), for_node: self.for_node, + replicas: self.replicas, } } diff --git a/src/controller/inner.rs b/src/controller/inner.rs index 7862593f6..026a70144 100644 --- a/src/controller/inner.rs +++ b/src/controller/inner.rs @@ -525,18 +525,25 @@ impl ControllerInner { // "for", and we simply search for the appropriate reader by that metric. since we know // that the reader must be relatively close, a BFS search is the way to go. let mut bfs = Bfs::new(&self.ingredients, node); - let mut reader = None; + let mut readers = vec![]; while let Some(child) = bfs.next(&self.ingredients) { if self.ingredients[child] .with_reader(|r| r.is_for() == node) .unwrap_or(false) { - reader = Some(child); - break; + readers.push(child); + + // Exit the loop if we've found all the replicas. + let replicas = self.ingredients[child].with_reader(|r| r.replicas()).unwrap(); + if readers.len() == replicas { + break; + } } } - reader + // TODO(ygina): select a reader more smartly, or randomly. + // Always select the first reader in the list, if it exists. + readers.get(0).and_then(|r| Some(*r)) } /// Obtain a `ViewBuilder` that can be sent to a client and then used to query a given diff --git a/src/controller/migrate/mod.rs b/src/controller/migrate/mod.rs index 0338a9c07..4eb2d7b98 100644 --- a/src/controller/migrate/mod.rs +++ b/src/controller/migrate/mod.rs @@ -216,7 +216,7 @@ impl<'a> Migration<'a> { // make a reader and its replicas let mut readers = vec![]; for i in 0..NUM_READER_REPLICAS { - let r = node::special::Reader::new(n); + let r = node::special::Reader::new(n, NUM_READER_REPLICAS); let r = if let Some(name) = name.clone() { if i == 0 { self.mainline.ingredients[n].named_mirror(r, name) From 7cb0b2d31a369da9b35cf24b12a05641be06e217 Mon Sep 17 00:00:00 2001 From: Gina Yuan Date: Sat, 15 Sep 2018 14:57:11 -0400 Subject: [PATCH 05/41] Always allocate replicas in different domains --- dataflow/src/node/mod.rs | 5 +++++ dataflow/src/node/special/reader.rs | 10 +++++++++- src/controller/migrate/assignment.rs | 6 ++++++ src/controller/migrate/mod.rs | 2 +- 4 files changed, 21 insertions(+), 2 deletions(-) diff --git a/dataflow/src/node/mod.rs b/dataflow/src/node/mod.rs index 36de93404..3eadd7f97 100644 --- a/dataflow/src/node/mod.rs +++ b/dataflow/src/node/mod.rs @@ -373,6 +373,11 @@ impl Node { } } + pub fn is_reader_replica(&self) -> bool { + self.with_reader(|r| r.replica_index() > 0) + .unwrap_or(false) + } + pub fn is_ingress(&self) -> bool { if let NodeType::Ingress = self.inner { true diff --git a/dataflow/src/node/special/reader.rs b/dataflow/src/node/special/reader.rs index 5fdfc7391..9f039896a 100644 --- a/dataflow/src/node/special/reader.rs +++ b/dataflow/src/node/special/reader.rs @@ -36,6 +36,7 @@ pub struct Reader { for_node: NodeIndex, // TODO(ygina): derive number of replicas directly from graph structure + replica_i: usize, replicas: usize, state: Option>, } @@ -48,18 +49,20 @@ impl Clone for Reader { streamers: self.streamers.clone(), state: self.state.clone(), for_node: self.for_node, + replica_i: self.replica_i, replicas: self.replicas, } } } impl Reader { - pub fn new(for_node: NodeIndex, replicas: usize) -> Self { + pub fn new(for_node: NodeIndex, replica_i: usize, replicas: usize) -> Self { Reader { writer: None, streamers: Vec::new(), state: None, for_node, + replica_i, replicas, } } @@ -70,6 +73,10 @@ impl Reader { self.for_node } + pub fn replica_index(&self) -> usize { + self.replica_i + } + pub fn replicas(&self) -> usize { self.replicas } @@ -90,6 +97,7 @@ impl Reader { streamers: mem::replace(&mut self.streamers, Vec::new()), state: self.state.clone(), for_node: self.for_node, + replica_i: self.replica_i, replicas: self.replicas, } } diff --git a/src/controller/migrate/assignment.rs b/src/controller/migrate/assignment.rs index 1ecf336c5..714a326a3 100644 --- a/src/controller/migrate/assignment.rs +++ b/src/controller/migrate/assignment.rs @@ -18,6 +18,7 @@ pub fn assign( // // - the child of a Sharder is always in a different domain from the sharder // - shard merge nodes are never in the same domain as their sharded ancestors + // - reader replicas are always in different domains from each other let mut topo_list = Vec::with_capacity(new.len()); let mut topo = petgraph::visit::Topo::new(&*graph); @@ -109,6 +110,11 @@ pub fn assign( }; } + if n.is_reader_replica() { + // replicas are always in their own domain to distribute the load. + return next_domain(); + } + if graph[node].name().starts_with("BOUNDARY_") { return next_domain(); } diff --git a/src/controller/migrate/mod.rs b/src/controller/migrate/mod.rs index 4eb2d7b98..6c6f2cfd6 100644 --- a/src/controller/migrate/mod.rs +++ b/src/controller/migrate/mod.rs @@ -216,7 +216,7 @@ impl<'a> Migration<'a> { // make a reader and its replicas let mut readers = vec![]; for i in 0..NUM_READER_REPLICAS { - let r = node::special::Reader::new(n, NUM_READER_REPLICAS); + let r = node::special::Reader::new(n, i, NUM_READER_REPLICAS); let r = if let Some(name) = name.clone() { if i == 0 { self.mainline.ingredients[n].named_mirror(r, name) From ec22c1c0680b27ee5b7df9dc70887644f7421c28 Mon Sep 17 00:00:00 2001 From: Gina Yuan Date: Sat, 15 Sep 2018 16:27:06 -0400 Subject: [PATCH 06/41] Remove replicas on different domains --- src/controller/inner.rs | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/src/controller/inner.rs b/src/controller/inner.rs index 026a70144..577da40a2 100644 --- a/src/controller/inner.rs +++ b/src/controller/inner.rs @@ -13,7 +13,6 @@ use std::{io, time}; use api::builders::*; use api::ActivationResult; use crate::controller::migrate::materialization::Materializations; -use crate::controller::migrate::NUM_READER_REPLICAS; use crate::controller::{ControllerState, DomainHandle, Migration, Recipe, WorkerIdentifier}; use crate::coordination::CoordinationMessage; @@ -918,6 +917,7 @@ impl ControllerInner { ); let mut nodes = vec![]; + let mut egress_node = None; if self .ingredients .neighbors_directed(leaf, petgraph::EdgeDirection::Outgoing) @@ -934,12 +934,19 @@ impl ControllerInner { if self.ingredients[*ni].is_reader() { true } else { - has_non_reader_children = true; + if egress_node.is_some() || !self.ingredients[*ni].is_egress() { + has_non_reader_children = true; + } else { + egress_node = Some(ni.clone()); + } false } }).collect(); if has_non_reader_children { - // should never happen, since we remove nodes in reverse topological order + // The leaf node can only have non-reader children if its reader has replicas. + // Then the non-reader child is a single egress node that connects the leaf to + // replicas in other domains. Otherwise impossible, since we remove nodes in + // reverse topological order. crit!( self.log, "not removing node {} yet, as it still has non-reader children", @@ -947,8 +954,8 @@ impl ControllerInner { ); unreachable!(); } - // nodes can only have as many readers as the number of reader replicas - assert!(readers.len() <= NUM_READER_REPLICAS); + // nodes can only have one reader + assert!(readers.len() <= 1); debug!( self.log, "Removing query leaf \"{}\"", self.ingredients[leaf].name(); @@ -969,6 +976,24 @@ impl ControllerInner { nodes.push(leaf); } + // If there's an egress node, that means the reader replicas are on different domains. + // Remove all those leaf nodes as well. + match egress_node { + Some(ni) => { + let mut bfs = Bfs::new(&self.ingredients, ni); + while let Some(child) = bfs.next(&self.ingredients) { + if self.ingredients + .neighbors_directed(child, petgraph::EdgeDirection::Outgoing) + .count() == 0 + { + removals.push(child); + nodes.push(child); + } + } + }, + None => {}, + } + // The nodes we remove first do not have children any more for node in &nodes { assert_eq!( From d9dd04119feafcf531b9519630ebc3071e6057ab Mon Sep 17 00:00:00 2001 From: Gina Yuan Date: Sat, 15 Sep 2018 19:11:49 -0400 Subject: [PATCH 07/41] Cache readers in node --- dataflow/src/node/mod.rs | 31 +++++++++++++++++++++++----- dataflow/src/node/special/reader.rs | 23 +++++++-------------- src/controller/inner.rs | 27 ++++++------------------ src/controller/migrate/assignment.rs | 2 +- src/controller/migrate/mod.rs | 4 +++- 5 files changed, 43 insertions(+), 44 deletions(-) diff --git a/dataflow/src/node/mod.rs b/dataflow/src/node/mod.rs index 3eadd7f97..1963c6144 100644 --- a/dataflow/src/node/mod.rs +++ b/dataflow/src/node/mod.rs @@ -28,6 +28,7 @@ pub struct Node { taken: bool, sharded_by: Sharding, + replicas: Vec, } // constructors @@ -50,6 +51,7 @@ impl Node { taken: false, sharded_by: Sharding::None, + replicas: Vec::new(), } } @@ -106,6 +108,11 @@ impl Node { self.sharded_by = s; } + /// Add a reader replica to this node. + pub fn add_replica(&mut self, ni: NodeIndex) { + self.replicas.push(ni) + } + pub fn on_commit(&mut self, remap: &HashMap) { // this is *only* overwritten for these asserts. assert!(!self.taken); @@ -331,6 +338,25 @@ impl Node { } } +// reader replication +impl Node { + pub fn has_replicas(&self) -> bool { + !self.replicas.is_empty() + } + + pub fn num_replicas(&self) -> usize { + self.replicas.len() + } + + pub fn get_replicas(&self) -> &[NodeIndex] { + &self.replicas[..] + } + + pub fn is_replica(&self) -> bool { + self.with_reader(|r| r.is_replica()).unwrap_or(false) + } +} + // is this or that? impl Node { pub fn is_source(&self) -> bool { @@ -373,11 +399,6 @@ impl Node { } } - pub fn is_reader_replica(&self) -> bool { - self.with_reader(|r| r.replica_index() > 0) - .unwrap_or(false) - } - pub fn is_ingress(&self) -> bool { if let NodeType::Ingress = self.inner { true diff --git a/dataflow/src/node/special/reader.rs b/dataflow/src/node/special/reader.rs index 9f039896a..80fc6dfec 100644 --- a/dataflow/src/node/special/reader.rs +++ b/dataflow/src/node/special/reader.rs @@ -35,9 +35,7 @@ pub struct Reader { streamers: Vec>>, for_node: NodeIndex, - // TODO(ygina): derive number of replicas directly from graph structure - replica_i: usize, - replicas: usize, + is_replica: bool, state: Option>, } @@ -49,21 +47,19 @@ impl Clone for Reader { streamers: self.streamers.clone(), state: self.state.clone(), for_node: self.for_node, - replica_i: self.replica_i, - replicas: self.replicas, + is_replica: self.is_replica, } } } impl Reader { - pub fn new(for_node: NodeIndex, replica_i: usize, replicas: usize) -> Self { + pub fn new(for_node: NodeIndex, is_replica: bool) -> Self { Reader { writer: None, streamers: Vec::new(), state: None, for_node, - replica_i, - replicas, + is_replica, } } @@ -73,12 +69,8 @@ impl Reader { self.for_node } - pub fn replica_index(&self) -> usize { - self.replica_i - } - - pub fn replicas(&self) -> usize { - self.replicas + pub fn is_replica(&self) -> bool { + self.is_replica } #[allow(dead_code)] @@ -97,8 +89,7 @@ impl Reader { streamers: mem::replace(&mut self.streamers, Vec::new()), state: self.state.clone(), for_node: self.for_node, - replica_i: self.replica_i, - replicas: self.replicas, + is_replica: self.is_replica, } } diff --git a/src/controller/inner.rs b/src/controller/inner.rs index 577da40a2..fa7a8bc17 100644 --- a/src/controller/inner.rs +++ b/src/controller/inner.rs @@ -518,27 +518,12 @@ impl ControllerInner { } fn find_view_for(&self, node: NodeIndex) -> Option { - // reader should be a child of the given node. however, due to sharding, it may not be an - // *immediate* child. furthermore, once we go beyond depth 1, we may accidentally hit an - // *unrelated* reader node. to account for this, readers keep track of what node they are - // "for", and we simply search for the appropriate reader by that metric. since we know - // that the reader must be relatively close, a BFS search is the way to go. - let mut bfs = Bfs::new(&self.ingredients, node); - let mut readers = vec![]; - while let Some(child) = bfs.next(&self.ingredients) { - if self.ingredients[child] - .with_reader(|r| r.is_for() == node) - .unwrap_or(false) - { - readers.push(child); - - // Exit the loop if we've found all the replicas. - let replicas = self.ingredients[child].with_reader(|r| r.replicas()).unwrap(); - if readers.len() == replicas { - break; - } - } - } + // reader should be a child of the given node. however, due to sharding and replication, + // it may not be an *immediate* child. furthermore, once we go beyond depth 1, we may + // accidentally hit an *unrelated* reader node. to account for this, nodes cache their + // readers so we can easily query data. + // TODO(ygina): Does this break sharding? + let readers = self.ingredients[node].get_replicas(); // TODO(ygina): select a reader more smartly, or randomly. // Always select the first reader in the list, if it exists. diff --git a/src/controller/migrate/assignment.rs b/src/controller/migrate/assignment.rs index 714a326a3..2d7f51ac4 100644 --- a/src/controller/migrate/assignment.rs +++ b/src/controller/migrate/assignment.rs @@ -110,7 +110,7 @@ pub fn assign( }; } - if n.is_reader_replica() { + if n.is_replica() { // replicas are always in their own domain to distribute the load. return next_domain(); } diff --git a/src/controller/migrate/mod.rs b/src/controller/migrate/mod.rs index 6c6f2cfd6..b0336e5c8 100644 --- a/src/controller/migrate/mod.rs +++ b/src/controller/migrate/mod.rs @@ -216,7 +216,8 @@ impl<'a> Migration<'a> { // make a reader and its replicas let mut readers = vec![]; for i in 0..NUM_READER_REPLICAS { - let r = node::special::Reader::new(n, i, NUM_READER_REPLICAS); + let is_replica = i > 0; + let r = node::special::Reader::new(n, is_replica); let r = if let Some(name) = name.clone() { if i == 0 { self.mainline.ingredients[n].named_mirror(r, name) @@ -228,6 +229,7 @@ impl<'a> Migration<'a> { }; let r = self.mainline.ingredients.add_node(r); self.mainline.ingredients.add_edge(n, r, ()); + self.mainline.ingredients[n].add_replica(r); readers.push(r); } self.readers.insert(n, readers); From cc73d86cb36e9e367b6febfb44be98e3cd01c9ed Mon Sep 17 00:00:00 2001 From: Gina Yuan Date: Thu, 20 Sep 2018 17:10:12 -0400 Subject: [PATCH 08/41] Ability to obtain view to any replica --- api/src/controller.rs | 40 +++++++++++++++++++--------- src/controller/inner.rs | 58 ++++++++++++++++++++++------------------- 2 files changed, 58 insertions(+), 40 deletions(-) diff --git a/api/src/controller.rs b/api/src/controller.rs index 51da63b38..ef1ad9478 100644 --- a/api/src/controller.rs +++ b/api/src/controller.rs @@ -203,28 +203,42 @@ impl ControllerHandle { } /// Obtain a `View` that allows you to query the given external view. + /// The `View` is of an arbitrary replica. pub fn view(&mut self, name: &str) -> Result { + let replicas = self.view_replicas(name)?; + match replicas.get(0) { + Some(r) => Ok(r.clone()), + None => Err(format_err!("view {} does not exist", name)), + } + } + + /// Obtain a list of `View`s that allow you to query the given external view. + /// Each view corresponds to a single reader replica. + pub fn view_replicas(&mut self, name: &str) -> Result, failure::Error> { // This call attempts to detect if this function is being called in a loop. If this // is getting false positives, then it is safe to increase the allowed hit count. #[cfg(debug_assertions)] assert_infrequent::at_most(200); - self.rpc::<_, Option>("view_builder", name) - .context(format!("building View for {}", name))? - .ok_or_else(|| format_err!("view {} does not exist", name)) - .and_then(|mut g| { - if let Some(port) = self.local_port { - g = g.with_local_port(port); - } + let view_builders = self.rpc::<_, Vec>("view_builder", name) + .context(format!("building View for {}", name))?; + + let mut views = Vec::new(); + for mut g in view_builders { + if let Some(port) = self.local_port { + g = g.with_local_port(port); + } - let g = g.build(&mut self.views)?; + let g = g.build(&mut self.views)?; - if self.local_port.is_none() { - self.local_port = Some(g.local_addr().unwrap().port()); - } + if self.local_port.is_none() { + self.local_port = Some(g.local_addr().unwrap().port()); + } - Ok(g) - }) + views.push(g); + } + + Ok(views) } /// Obtain a `Table` that allows you to perform writes, deletes, and other operations on the diff --git a/src/controller/inner.rs b/src/controller/inner.rs index fa7a8bc17..4e00f5922 100644 --- a/src/controller/inner.rs +++ b/src/controller/inner.rs @@ -517,47 +517,51 @@ impl ControllerInner { }).collect() } - fn find_view_for(&self, node: NodeIndex) -> Option { + fn find_views_for(&self, node: NodeIndex) -> &[NodeIndex] { // reader should be a child of the given node. however, due to sharding and replication, // it may not be an *immediate* child. furthermore, once we go beyond depth 1, we may // accidentally hit an *unrelated* reader node. to account for this, nodes cache their // readers so we can easily query data. - // TODO(ygina): Does this break sharding? - let readers = self.ingredients[node].get_replicas(); - - // TODO(ygina): select a reader more smartly, or randomly. - // Always select the first reader in the list, if it exists. - readers.get(0).and_then(|r| Some(*r)) + self.ingredients[node].get_replicas() } - /// Obtain a `ViewBuilder` that can be sent to a client and then used to query a given - /// (already maintained) reader node called `name`. - pub fn view_builder(&self, name: &str) -> Option { + /// Obtain a list of `ViewBuilder`s that can be sent to a client. Each `ViewBuilder` + /// corresponds to a single replica, which can be used to query a given (already maintained) + /// reader node called `name`, `name_r1`, `name_r2`, etc. + pub fn view_builder(&self, name: &str) -> Vec { // first try to resolve the node via the recipe, which handles aliasing between identical // queries. let node = match self.recipe.node_addr_for(name) { - Ok(ni) => ni, + Ok(ni) => Some(ni), Err(_) => { // if the recipe doesn't know about this query, traverse the graph. // we need this do deal with manually constructed graphs (e.g., in tests). - *self.outputs().get(name)? + self.outputs().get(name).map(|ni| *ni) } }; - self.find_view_for(node).map(|r| { - let domain = self.ingredients[r].domain(); - let columns = self.ingredients[r].fields().to_vec(); - let shards = (0..self.domains[&domain].shards()) - .map(|i| self.read_addrs[&self.domains[&domain].assignment(i)].clone()) - .collect(); - - ViewBuilder { - local_ports: vec![], - node: r, - columns, - shards, - } - }) + if node.is_none() { + return Vec::new(); + } + + let node = node.unwrap(); + self.find_views_for(node) + .iter() + .map(|r| { + let domain = self.ingredients[*r].domain(); + let columns = self.ingredients[*r].fields().to_vec(); + let shards = (0..self.domains[&domain].shards()) + .map(|i| self.read_addrs[&self.domains[&domain].assignment(i)].clone()) + .collect(); + + ViewBuilder { + local_ports: vec![], + node: *r, + columns, + shards, + } + }) + .collect::>() } /// Obtain a TableBuild that can be used to construct a Table to perform writes and deletes @@ -720,7 +724,7 @@ impl ControllerInner { let uid = &[uid]; if context.get("group").is_none() { for g in groups { - let rgb: Option = self.view_builder(&g); + let rgb: Option = self.view_builder(&g).get(0).map(|v| v.clone()); let mut view = rgb.map(|rgb| rgb.build_exclusive().unwrap()).unwrap(); let my_groups: Vec = view .lookup(uid, true) From 70f6ccb6436a684b7bf61cfee957fd01f0219914 Mon Sep 17 00:00:00 2001 From: Gina Yuan Date: Tue, 25 Sep 2018 08:55:53 -0400 Subject: [PATCH 09/41] Assign readers a replica index --- dataflow/src/node/mod.rs | 4 ++-- dataflow/src/node/special/reader.rs | 14 +++++++------- src/controller/migrate/assignment.rs | 8 +++++--- src/controller/migrate/mod.rs | 4 ++-- 4 files changed, 16 insertions(+), 14 deletions(-) diff --git a/dataflow/src/node/mod.rs b/dataflow/src/node/mod.rs index 1963c6144..539833418 100644 --- a/dataflow/src/node/mod.rs +++ b/dataflow/src/node/mod.rs @@ -352,8 +352,8 @@ impl Node { &self.replicas[..] } - pub fn is_replica(&self) -> bool { - self.with_reader(|r| r.is_replica()).unwrap_or(false) + pub fn replica_index(&self) -> Option { + self.with_reader(|r| r.replica_index()).unwrap_or(None) } } diff --git a/dataflow/src/node/special/reader.rs b/dataflow/src/node/special/reader.rs index 80fc6dfec..6ea0cd157 100644 --- a/dataflow/src/node/special/reader.rs +++ b/dataflow/src/node/special/reader.rs @@ -35,7 +35,7 @@ pub struct Reader { streamers: Vec>>, for_node: NodeIndex, - is_replica: bool, + replica_index: Option, state: Option>, } @@ -47,19 +47,19 @@ impl Clone for Reader { streamers: self.streamers.clone(), state: self.state.clone(), for_node: self.for_node, - is_replica: self.is_replica, + replica_index: self.replica_index, } } } impl Reader { - pub fn new(for_node: NodeIndex, is_replica: bool) -> Self { + pub fn new(for_node: NodeIndex, replica_index: Option) -> Self { Reader { writer: None, streamers: Vec::new(), state: None, for_node, - is_replica, + replica_index, } } @@ -69,8 +69,8 @@ impl Reader { self.for_node } - pub fn is_replica(&self) -> bool { - self.is_replica + pub fn replica_index(&self) -> Option { + self.replica_index } #[allow(dead_code)] @@ -89,7 +89,7 @@ impl Reader { streamers: mem::replace(&mut self.streamers, Vec::new()), state: self.state.clone(), for_node: self.for_node, - is_replica: self.is_replica, + replica_index: self.replica_index, } } diff --git a/src/controller/migrate/assignment.rs b/src/controller/migrate/assignment.rs index 2d7f51ac4..26e6fcb0f 100644 --- a/src/controller/migrate/assignment.rs +++ b/src/controller/migrate/assignment.rs @@ -110,9 +110,11 @@ pub fn assign( }; } - if n.is_replica() { - // replicas are always in their own domain to distribute the load. - return next_domain(); + // replicas are always in their own domain to distribute the load. + if let Some(index) = n.replica_index() { + if index > 0 { + return next_domain(); + } } if graph[node].name().starts_with("BOUNDARY_") { diff --git a/src/controller/migrate/mod.rs b/src/controller/migrate/mod.rs index b0336e5c8..f31ab69db 100644 --- a/src/controller/migrate/mod.rs +++ b/src/controller/migrate/mod.rs @@ -216,8 +216,8 @@ impl<'a> Migration<'a> { // make a reader and its replicas let mut readers = vec![]; for i in 0..NUM_READER_REPLICAS { - let is_replica = i > 0; - let r = node::special::Reader::new(n, is_replica); + // TODO: replica index is None if there is exactly one replica + let r = node::special::Reader::new(n, Some(i)); let r = if let Some(name) = name.clone() { if i == 0 { self.mainline.ingredients[n].named_mirror(r, name) From 119a1405ff275672ee24950eae913f9338436cc7 Mon Sep 17 00:00:00 2001 From: Gina Yuan Date: Tue, 25 Sep 2018 11:00:48 -0400 Subject: [PATCH 10/41] Assign replica domains to different workers maybe --- src/controller/migrate/mod.rs | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/src/controller/migrate/mod.rs b/src/controller/migrate/mod.rs index f31ab69db..be55ad0d1 100644 --- a/src/controller/migrate/mod.rs +++ b/src/controller/migrate/mod.rs @@ -355,8 +355,31 @@ impl<'a> Migration<'a> { } } let swapped = swapped0; - let mut sorted_new = new.iter().collect::>(); + + // First sort all the non replica nodes. + let mut new_non_replicas = HashSet::new(); + let mut replica_map = HashMap::new(); + for &ni in new.iter() { + if let Some(for_node) = mainline.ingredients[ni] + .with_reader(|r| Some(r.is_for())) + .unwrap_or(None) { + if let Some(_) = mainline.ingredients[ni].replica_index() { + if !replica_map.contains_key(&for_node) { + replica_map.insert(for_node, HashSet::new()); + } + replica_map.get_mut(&for_node).unwrap().insert(ni); + } + } else { + new_non_replicas.insert(ni); + } + } + + // Then group all the replica readers together. + let mut sorted_new = new_non_replicas.iter().collect::>(); sorted_new.sort(); + for (_, readers) in &mut replica_map { + sorted_new.append(&mut readers.iter().collect::>()); + } // Find all nodes for domains that have changed let changed_domains: HashSet = sorted_new From f995181d8cdf8c149629911777e5b904794b33e2 Mon Sep 17 00:00:00 2001 From: Gina Yuan Date: Wed, 26 Sep 2018 18:05:45 -0400 Subject: [PATCH 11/41] Refactor local address assignment --- src/controller/migrate/mod.rs | 131 ++++++++++++++++++---------------- 1 file changed, 69 insertions(+), 62 deletions(-) diff --git a/src/controller/migrate/mod.rs b/src/controller/migrate/mod.rs index be55ad0d1..ec4aa54a0 100644 --- a/src/controller/migrate/mod.rs +++ b/src/controller/migrate/mod.rs @@ -268,6 +268,74 @@ impl<'a> Migration<'a> { } } + fn assign_local_addresses( + mainline: &'a mut ControllerInner, + log: &slog::Logger, + domain_new_nodes: &mut HashMap>, + swapped: &HashMap<(NodeIndex, NodeIndex), NodeIndex>) { + for (domain, nodes) in &mut domain_new_nodes.iter() { + // Number of pre-existing nodes + let mut nnodes = mainline.remap.get(domain).map(HashMap::len).unwrap_or(0); + + if nodes.is_empty() { + // Nothing to do here + continue; + } + + let log = log.new(o!("domain" => domain.index())); + + // Give local addresses to every (new) node + for &ni in nodes.iter() { + debug!(log, + "assigning local index"; + "type" => format!("{:?}", mainline.ingredients[ni]), + "node" => ni.index(), + "local" => nnodes + ); + + let mut ip: IndexPair = ni.into(); + ip.set_local(unsafe { LocalNodeIndex::make(nnodes as u32) }); + mainline.ingredients[ni].set_finalized_addr(ip); + mainline + .remap + .entry(*domain) + .or_insert_with(HashMap::new) + .insert(ni, ip); + nnodes += 1; + } + + // Initialize each new node + for &ni in nodes.iter() { + if mainline.ingredients[ni].is_internal() { + // Figure out all the remappings that have happened + // NOTE: this has to be *per node*, since a shared parent may be remapped + // differently to different children (due to sharding for example). we just + // allocate it once though. + let mut remap = mainline.remap[domain].clone(); + + // Parents in other domains have been swapped for ingress nodes. + // Those ingress nodes' indices are now local. + for (&(dst, src), &instead) in swapped.iter() { + if dst != ni { + // ignore mappings for other nodes + continue; + } + + let old = remap.insert(src, mainline.remap[domain][&instead]); + assert_eq!(old, None); + } + + trace!(log, "initializing new node"; "node" => ni.index()); + mainline + .ingredients + .node_weight_mut(ni) + .unwrap() + .on_commit(&remap); + } + } + } + } + /// Commit the changes introduced by this `Migration` to the master `Soup`. /// /// This will spin up an execution thread for each new thread domain, and hook those new @@ -399,68 +467,7 @@ impl<'a> Migration<'a> { }); // Assign local addresses to all new nodes, and initialize them - for (domain, nodes) in &mut domain_new_nodes { - // Number of pre-existing nodes - let mut nnodes = mainline.remap.get(domain).map(HashMap::len).unwrap_or(0); - - if nodes.is_empty() { - // Nothing to do here - continue; - } - - let log = log.new(o!("domain" => domain.index())); - - // Give local addresses to every (new) node - for &ni in nodes.iter() { - debug!(log, - "assigning local index"; - "type" => format!("{:?}", mainline.ingredients[ni]), - "node" => ni.index(), - "local" => nnodes - ); - - let mut ip: IndexPair = ni.into(); - ip.set_local(unsafe { LocalNodeIndex::make(nnodes as u32) }); - mainline.ingredients[ni].set_finalized_addr(ip); - mainline - .remap - .entry(*domain) - .or_insert_with(HashMap::new) - .insert(ni, ip); - nnodes += 1; - } - - // Initialize each new node - for &ni in nodes.iter() { - if mainline.ingredients[ni].is_internal() { - // Figure out all the remappings that have happened - // NOTE: this has to be *per node*, since a shared parent may be remapped - // differently to different children (due to sharding for example). we just - // allocate it once though. - let mut remap = mainline.remap[domain].clone(); - - // Parents in other domains have been swapped for ingress nodes. - // Those ingress nodes' indices are now local. - for (&(dst, src), &instead) in &swapped { - if dst != ni { - // ignore mappings for other nodes - continue; - } - - let old = remap.insert(src, mainline.remap[domain][&instead]); - assert_eq!(old, None); - } - - trace!(log, "initializing new node"; "node" => ni.index()); - mainline - .ingredients - .node_weight_mut(ni) - .unwrap() - .on_commit(&remap); - } - } - } - + Self::assign_local_addresses(&mut mainline, &log, &mut domain_new_nodes, &swapped); if let Some(shards) = mainline.sharding { sharding::validate(&log, &mainline.ingredients, mainline.source, &new, shards) }; From db1afbbffe105452b78c9d8f1840a18d28e039e8 Mon Sep 17 00:00:00 2001 From: Gina Yuan Date: Thu, 27 Sep 2018 02:00:41 -0400 Subject: [PATCH 12/41] Refactor round robin worker assignment method --- src/controller/migrate/mod.rs | 71 +++++++++++++++++++++++------------ 1 file changed, 47 insertions(+), 24 deletions(-) diff --git a/src/controller/migrate/mod.rs b/src/controller/migrate/mod.rs index ec4aa54a0..9ddf7b6c5 100644 --- a/src/controller/migrate/mod.rs +++ b/src/controller/migrate/mod.rs @@ -336,6 +336,45 @@ impl<'a> Migration<'a> { } } + /// Places the domains and their nodes on workers according to the round robin iterator. This + /// is used to ensure, for example, that replicas of the same reader and shards of the same + /// node end up on different workers. It is also currently used to approximately distribute + /// domains across the remaining workers equally. In the future, we'd like to place the domain + /// on the machine with the fewest domains already assigned. This does not take into account + /// replicas and shards that were NOT created for their very first time. + fn place_round_robin( + mainline: &'a mut ControllerInner, + log: &slog::Logger, + mut placer: &'a mut Box>, + mut workers: &'a mut Vec, + uninformed_domain_nodes: &mut HashMap>, + changed_domains: &HashSet) { + for domain in changed_domains { + if mainline.domains.contains_key(&domain) { + // this is not a new domain + continue; + } + + let nodes = uninformed_domain_nodes.remove(&domain).unwrap(); + let d = DomainHandle::new( + *domain, + mainline.ingredients[nodes[0].0].sharded_by().shards(), + &log, + &mut mainline.ingredients, + &mainline.domain_config, + nodes, + &mainline.persistence, + &mainline.listen_addr, + &mainline.channel_coordinator, + &mainline.debug_channel, + &mut placer, + &mut workers, + mainline.epoch, + ); + mainline.domains.insert(*domain, d); + } + } + /// Commit the changes introduced by this `Migration` to the master `Soup`. /// /// This will spin up an execution thread for each new thread domain, and hook those new @@ -522,30 +561,14 @@ impl<'a> Migration<'a> { // Boot up new domains (they'll ignore all updates for now) debug!(log, "booting new domains"); - for domain in changed_domains { - if mainline.domains.contains_key(&domain) { - // this is not a new domain - continue; - } - - let nodes = uninformed_domain_nodes.remove(&domain).unwrap(); - let d = DomainHandle::new( - domain, - mainline.ingredients[nodes[0].0].sharded_by().shards(), - &log, - &mut mainline.ingredients, - &mainline.domain_config, - nodes, - &mainline.persistence, - &mainline.listen_addr, - &mainline.channel_coordinator, - &mainline.debug_channel, - &mut placer, - &mut workers, - mainline.epoch, - ); - mainline.domains.insert(domain, d); - } + Self::place_round_robin( + mainline, + &log, + &mut placer, + &mut workers, + &mut uninformed_domain_nodes, + &changed_domains, + ); // Add any new nodes to existing domains (they'll also ignore all updates for now) debug!(log, "mutating existing domains"); From 1534991ccb3d6453e3ca729e41ee70bfaef82562 Mon Sep 17 00:00:00 2001 From: Gina Yuan Date: Thu, 27 Sep 2018 15:15:02 -0400 Subject: [PATCH 13/41] Put the first reader in a separate domain as well --- src/controller/inner.rs | 18 +++++++++--------- src/controller/migrate/assignment.rs | 6 ++---- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/src/controller/inner.rs b/src/controller/inner.rs index 4e00f5922..737a75d0f 100644 --- a/src/controller/inner.rs +++ b/src/controller/inner.rs @@ -956,19 +956,14 @@ impl ControllerInner { nodes.push(reader); } } else { - unreachable!(); + // The reader had replicas, which were all removed along with the egress node. } } - // If the start node didn't have any children, it can be removed immediately - if nodes.is_empty() { - nodes.push(leaf); - } - - // If there's an egress node, that means the reader replicas are on different domains. - // Remove all those leaf nodes as well. match egress_node { Some(ni) => { + // If there's an egress node, that means the reader replicas are on different + // domains. Remove all those leaf nodes as well. let mut bfs = Bfs::new(&self.ingredients, ni); while let Some(child) = bfs.next(&self.ingredients) { if self.ingredients @@ -980,7 +975,12 @@ impl ControllerInner { } } }, - None => {}, + None => { + // If the start node didn't have any children, it can be removed immediately + if nodes.is_empty() { + nodes.push(leaf); + } + }, } // The nodes we remove first do not have children any more diff --git a/src/controller/migrate/assignment.rs b/src/controller/migrate/assignment.rs index 26e6fcb0f..e544dc77d 100644 --- a/src/controller/migrate/assignment.rs +++ b/src/controller/migrate/assignment.rs @@ -111,10 +111,8 @@ pub fn assign( } // replicas are always in their own domain to distribute the load. - if let Some(index) = n.replica_index() { - if index > 0 { - return next_domain(); - } + if let Some(_) = n.replica_index() { + return next_domain(); } if graph[node].name().starts_with("BOUNDARY_") { From a35748b8d7cb24a41af174418450ca5e2f9e197b Mon Sep 17 00:00:00 2001 From: Gina Yuan Date: Thu, 27 Sep 2018 16:02:35 -0400 Subject: [PATCH 14/41] Assign replicas and non-replicas to workers separately --- src/controller/migrate/mod.rs | 75 ++++++++++++++++++++++++++++------- 1 file changed, 60 insertions(+), 15 deletions(-) diff --git a/src/controller/migrate/mod.rs b/src/controller/migrate/mod.rs index 9ddf7b6c5..07ec39d1a 100644 --- a/src/controller/migrate/mod.rs +++ b/src/controller/migrate/mod.rs @@ -342,13 +342,15 @@ impl<'a> Migration<'a> { /// domains across the remaining workers equally. In the future, we'd like to place the domain /// on the machine with the fewest domains already assigned. This does not take into account /// replicas and shards that were NOT created for their very first time. + /// + /// Domains are placed round robin in the order that they are provided. fn place_round_robin( mainline: &'a mut ControllerInner, log: &slog::Logger, mut placer: &'a mut Box>, mut workers: &'a mut Vec, uninformed_domain_nodes: &mut HashMap>, - changed_domains: &HashSet) { + changed_domains: &Vec) { for domain in changed_domains { if mainline.domains.contains_key(&domain) { // this is not a new domain @@ -463,13 +465,16 @@ impl<'a> Migration<'a> { } let swapped = swapped0; - // First sort all the non replica nodes. - let mut new_non_replicas = HashSet::new(); + // First bucket the replica and non-replica nodes. + // TODO(ygina): There's a lot of duplicated ops and it's kinda ugly how we later manage + // to group the reader node with its ingress node. + let mut new_non_replicas_set = HashSet::new(); let mut replica_map = HashMap::new(); for &ni in new.iter() { if let Some(for_node) = mainline.ingredients[ni] .with_reader(|r| Some(r.is_for())) .unwrap_or(None) { + // This is an ingress node to a reader replica. if let Some(_) = mainline.ingredients[ni].replica_index() { if !replica_map.contains_key(&for_node) { replica_map.insert(for_node, HashSet::new()); @@ -477,25 +482,29 @@ impl<'a> Migration<'a> { replica_map.get_mut(&for_node).unwrap().insert(ni); } } else { - new_non_replicas.insert(ni); + new_non_replicas_set.insert(ni); } } - // Then group all the replica readers together. - let mut sorted_new = new_non_replicas.iter().collect::>(); - sorted_new.sort(); + // TODO(ygina): why was this sorted before? + let new_non_replicas = new_non_replicas_set.iter().collect::>(); + let mut new_replicas = Vec::new(); for (_, readers) in &mut replica_map { - sorted_new.append(&mut readers.iter().collect::>()); + new_replicas.append(&mut readers.iter().collect::>()); } - // Find all nodes for domains that have changed - let changed_domains: HashSet = sorted_new + let mut domain_new_non_replicas = new_non_replicas .iter() + .filter(|&&&ni| ni != mainline.source) .filter(|&&&ni| !mainline.ingredients[ni].is_dropped()) - .map(|&&ni| mainline.ingredients[ni].domain()) - .collect(); + .map(|&&ni| (mainline.ingredients[ni].domain(), ni)) + .fold(HashMap::new(), |mut dns, (d, ni)| { + debug!(log, "pushing {} {}", d.index(), ni.index()); + dns.entry(d).or_insert_with(Vec::new).push(ni); + dns + }); - let mut domain_new_nodes = sorted_new + let mut domain_new_replicas = new_replicas .iter() .filter(|&&&ni| ni != mainline.source) .filter(|&&&ni| !mainline.ingredients[ni].is_dropped()) @@ -505,8 +514,36 @@ impl<'a> Migration<'a> { dns }); + for (domain, nodes) in &mut domain_new_replicas { + // Each replica (an ingress node and the reader node) should be in its own domain. + // This domain should be new, and no other nodes should be placed in this domain. + // Move the reader node from non replicas to be with the ingress node in replicas. + let mut reader = domain_new_non_replicas.remove(&domain).unwrap(); + assert_eq!(reader.len(), 1); + assert_eq!(nodes.len(), 1); + assert!(!mainline.domains.contains_key(&domain)); + nodes.append(&mut reader); + } + + // Find all nodes for domains that have changed + let changed_domains_non_replicas: Vec = new_non_replicas + .iter() + .filter(|&&&ni| !mainline.ingredients[ni].is_dropped()) + .map(|&&ni| mainline.ingredients[ni].domain()) + .collect::>() + .into_iter() + .collect::>(); + let changed_domains_replicas: Vec = new_replicas + .iter() + .filter(|&&&ni| !mainline.ingredients[ni].is_dropped()) + .map(|&&ni| mainline.ingredients[ni].domain()) + .collect::>() + .into_iter() + .collect::>(); + // Assign local addresses to all new nodes, and initialize them - Self::assign_local_addresses(&mut mainline, &log, &mut domain_new_nodes, &swapped); + Self::assign_local_addresses(&mut mainline, &log, &mut domain_new_non_replicas, &swapped); + Self::assign_local_addresses(&mut mainline, &log, &mut domain_new_replicas, &swapped); if let Some(shards) = mainline.sharding { sharding::validate(&log, &mainline.ingredients, mainline.source, &new, shards) }; @@ -567,7 +604,15 @@ impl<'a> Migration<'a> { &mut placer, &mut workers, &mut uninformed_domain_nodes, - &changed_domains, + &changed_domains_replicas, + ); + Self::place_round_robin( + mainline, + &log, + &mut placer, + &mut workers, + &mut uninformed_domain_nodes, + &changed_domains_non_replicas, ); // Add any new nodes to existing domains (they'll also ignore all updates for now) From 04bd3f7a0e741023d6e6d701f7725a54f552ebcc Mon Sep 17 00:00:00 2001 From: Gina Yuan Date: Fri, 28 Sep 2018 14:38:31 -0400 Subject: [PATCH 15/41] Clean up assignment code a bunch, though it still panics sometimes --- src/controller/migrate/mod.rs | 148 +++++++++++++++++----------------- 1 file changed, 73 insertions(+), 75 deletions(-) diff --git a/src/controller/migrate/mod.rs b/src/controller/migrate/mod.rs index 07ec39d1a..762f30967 100644 --- a/src/controller/migrate/mod.rs +++ b/src/controller/migrate/mod.rs @@ -465,85 +465,17 @@ impl<'a> Migration<'a> { } let swapped = swapped0; - // First bucket the replica and non-replica nodes. - // TODO(ygina): There's a lot of duplicated ops and it's kinda ugly how we later manage - // to group the reader node with its ingress node. - let mut new_non_replicas_set = HashSet::new(); - let mut replica_map = HashMap::new(); - for &ni in new.iter() { - if let Some(for_node) = mainline.ingredients[ni] - .with_reader(|r| Some(r.is_for())) - .unwrap_or(None) { - // This is an ingress node to a reader replica. - if let Some(_) = mainline.ingredients[ni].replica_index() { - if !replica_map.contains_key(&for_node) { - replica_map.insert(for_node, HashSet::new()); - } - replica_map.get_mut(&for_node).unwrap().insert(ni); - } - } else { - new_non_replicas_set.insert(ni); - } - } - - // TODO(ygina): why was this sorted before? - let new_non_replicas = new_non_replicas_set.iter().collect::>(); - let mut new_replicas = Vec::new(); - for (_, readers) in &mut replica_map { - new_replicas.append(&mut readers.iter().collect::>()); - } - - let mut domain_new_non_replicas = new_non_replicas + // Assign local addresses to all new nodes, and initialize them. + let mut domain_new_nodes = new .iter() - .filter(|&&&ni| ni != mainline.source) - .filter(|&&&ni| !mainline.ingredients[ni].is_dropped()) - .map(|&&ni| (mainline.ingredients[ni].domain(), ni)) + .filter(|&ni| *ni != mainline.source) + .filter(|&ni| !mainline.ingredients[*ni].is_dropped()) + .map(|&ni| (mainline.ingredients[ni].domain(), ni)) .fold(HashMap::new(), |mut dns, (d, ni)| { - debug!(log, "pushing {} {}", d.index(), ni.index()); dns.entry(d).or_insert_with(Vec::new).push(ni); dns }); - - let mut domain_new_replicas = new_replicas - .iter() - .filter(|&&&ni| ni != mainline.source) - .filter(|&&&ni| !mainline.ingredients[ni].is_dropped()) - .map(|&&ni| (mainline.ingredients[ni].domain(), ni)) - .fold(HashMap::new(), |mut dns, (d, ni)| { - dns.entry(d).or_insert_with(Vec::new).push(ni); - dns - }); - - for (domain, nodes) in &mut domain_new_replicas { - // Each replica (an ingress node and the reader node) should be in its own domain. - // This domain should be new, and no other nodes should be placed in this domain. - // Move the reader node from non replicas to be with the ingress node in replicas. - let mut reader = domain_new_non_replicas.remove(&domain).unwrap(); - assert_eq!(reader.len(), 1); - assert_eq!(nodes.len(), 1); - assert!(!mainline.domains.contains_key(&domain)); - nodes.append(&mut reader); - } - - // Find all nodes for domains that have changed - let changed_domains_non_replicas: Vec = new_non_replicas - .iter() - .filter(|&&&ni| !mainline.ingredients[ni].is_dropped()) - .map(|&&ni| mainline.ingredients[ni].domain()) - .collect::>() - .into_iter() - .collect::>(); - let changed_domains_replicas: Vec = new_replicas - .iter() - .filter(|&&&ni| !mainline.ingredients[ni].is_dropped()) - .map(|&&ni| mainline.ingredients[ni].domain()) - .collect::>() - .into_iter() - .collect::>(); - - // Assign local addresses to all new nodes, and initialize them - Self::assign_local_addresses(&mut mainline, &log, &mut domain_new_non_replicas, &swapped); - Self::assign_local_addresses(&mut mainline, &log, &mut domain_new_replicas, &swapped); + Self::assign_local_addresses(&mut mainline, &log, &mut domain_new_nodes, &swapped); if let Some(shards) = mainline.sharding { sharding::validate(&log, &mainline.ingredients, mainline.source, &new, shards) }; @@ -596,6 +528,72 @@ impl<'a> Migration<'a> { let mut placer: Box> = Box::new(placer_workers.into_iter().cycle()); + // Bucket reader replica nodes and the non-replica nodes separately. + let mut replica_map = HashMap::new(); + let mut other_nodes = HashSet::new(); + for &ni in new + .iter() + .filter(|&&ni| ni != mainline.source) + .filter(|&&ni| !mainline.ingredients[ni].is_dropped()) + .into_iter() { + // Check if the node is a reader, and then insert it in the replica map. The value + // is a list of readers that are for the same node, and the key is the node the + // readers are for. Later, we will also add the associated ingress node to the replica + // map, since readers are all located on their own domains. + if let Some(for_node) = mainline.ingredients[ni] + .with_reader(|r| Some(r.is_for())) + .unwrap_or(None) { + if mainline.ingredients[ni].replica_index().is_some() { + replica_map.entry(for_node).or_insert(HashSet::new()); + replica_map.get_mut(&for_node).unwrap().insert(ni); + } + } else { + other_nodes.insert(ni); + } + } + + // Obtain a vec of DomainIndexes in the order that we want to assign them to workers. + // For replicas, all replicas for the same reader should be consecutive. For non-replicas, + // it doesn't really matter unless we want the ordering to be deterministic. + let mut changed_domains_replicas = Vec::new(); + for (_, readers) in &mut replica_map { + let mut domains = readers + .iter() + .filter(|&ni| !mainline.ingredients[*ni].is_dropped()) + .map(|&ni| mainline.ingredients[ni].domain()) + .collect::>(); + changed_domains_replicas.append(&mut domains); + } + let changed_domains_other = other_nodes + .iter() + .filter(|&ni| !mainline.ingredients[*ni].is_dropped()) + .map(|&ni| mainline.ingredients[ni].domain()) + .collect::>() + .into_iter() + .filter(|&domain| !changed_domains_replicas.contains(&domain)) + .collect::>(); + + // Check invariants on the (non)-replica data structures. Each changed replica domain + // should be just created. The domain should contain the reader node and its ingress node, + // and no other nodes. Also, each new reader node should be in a unique domain. + assert_eq!( + changed_domains_replicas.len(), + changed_domains_replicas.iter().collect::>().len() + ); + for domain in &changed_domains_replicas { + assert!(!mainline.domains.contains_key(&domain)); + let nodes: &Vec<_> = uninformed_domain_nodes.get(&domain).unwrap(); + assert_eq!(nodes.len(), 2); + let (ni_a, _) = nodes.get(0).unwrap(); + let (ni_b, _) = nodes.get(1).unwrap(); + assert!( + (mainline.ingredients[*ni_a].is_reader() + && mainline.ingredients[*ni_b].is_ingress()) + || (mainline.ingredients[*ni_a].is_ingress() + && mainline.ingredients[*ni_b].is_reader()) + ); + } + // Boot up new domains (they'll ignore all updates for now) debug!(log, "booting new domains"); Self::place_round_robin( @@ -612,7 +610,7 @@ impl<'a> Migration<'a> { &mut placer, &mut workers, &mut uninformed_domain_nodes, - &changed_domains_non_replicas, + &changed_domains_other, ); // Add any new nodes to existing domains (they'll also ignore all updates for now) From 837b1947b6b666a5a4cc7f1a1949be6296322ce9 Mon Sep 17 00:00:00 2001 From: Gina Yuan Date: Fri, 28 Sep 2018 18:32:04 -0400 Subject: [PATCH 16/41] Fix already mutable borrowed panic --- src/controller/migrate/mod.rs | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/controller/migrate/mod.rs b/src/controller/migrate/mod.rs index 8f3e9d0ba..dd7a8a5ec 100644 --- a/src/controller/migrate/mod.rs +++ b/src/controller/migrate/mod.rs @@ -465,11 +465,14 @@ impl<'a> Migration<'a> { let swapped = swapped0; // Assign local addresses to all new nodes, and initialize them. - let mut domain_new_nodes = new + let mut sorted_new = new.iter().collect::>(); + sorted_new.sort(); + + let mut domain_new_nodes = sorted_new .iter() - .filter(|&ni| *ni != mainline.source) - .filter(|&ni| !mainline.ingredients[*ni].is_dropped()) - .map(|&ni| (mainline.ingredients[ni].domain(), ni)) + .filter(|&&&ni| ni != mainline.source) + .filter(|&&&ni| !mainline.ingredients[ni].is_dropped()) + .map(|&&ni| (mainline.ingredients[ni].domain(), ni)) .fold(HashMap::new(), |mut dns, (d, ni)| { dns.entry(d).or_insert_with(Vec::new).push(ni); dns @@ -530,10 +533,10 @@ impl<'a> Migration<'a> { // Bucket reader replica nodes and the non-replica nodes separately. let mut replica_map = HashMap::new(); let mut other_nodes = HashSet::new(); - for &ni in new + for &&ni in sorted_new .iter() - .filter(|&&ni| ni != mainline.source) - .filter(|&&ni| !mainline.ingredients[ni].is_dropped()) + .filter(|&&&ni| ni != mainline.source) + .filter(|&&&ni| !mainline.ingredients[ni].is_dropped()) .into_iter() { // Check if the node is a reader, and then insert it in the replica map. The value // is a list of readers that are for the same node, and the key is the node the From 85c62785f43ffe12aba0ba0c4c909ab87fc899a6 Mon Sep 17 00:00:00 2001 From: Gina Yuan Date: Mon, 1 Oct 2018 11:49:20 -0400 Subject: [PATCH 17/41] Replica is not in distinct domain if there is only 1 --- src/controller/migrate/mod.rs | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/src/controller/migrate/mod.rs b/src/controller/migrate/mod.rs index 076f51895..c6ab6958e 100644 --- a/src/controller/migrate/mod.rs +++ b/src/controller/migrate/mod.rs @@ -211,13 +211,20 @@ impl<'a> Migration<'a> { self.mainline.graph() } - fn ensure_reader_for(&mut self, n: NodeIndex, name: Option) { + fn ensure_reader_for(&mut self, n: NodeIndex, name: Option, num_replicas: usize) { if !self.readers.contains_key(&n) { - // make a reader and its replicas let mut readers = vec![]; - for i in 0..NUM_READER_REPLICAS { - // TODO: replica index is None if there is exactly one replica - let r = node::special::Reader::new(n, Some(i)); + for i in 0..num_replicas { + let replica_i = { + if num_replicas == 0 { + None + } else { + Some(i) + } + }; + + // Make a reader node + let r = node::special::Reader::new(n, replica_i); let r = if let Some(name) = name.clone() { if i == 0 { self.mainline.ingredients[n].named_mirror(r, name) @@ -227,11 +234,20 @@ impl<'a> Migration<'a> { } else { self.mainline.ingredients[n].mirror(r) }; + + // Add it to the graph along with an edge to the node it reads for let r = self.mainline.ingredients.add_node(r); self.mainline.ingredients.add_edge(n, r, ()); self.mainline.ingredients[n].add_replica(r); + debug!(self.log, + "adding reader node"; + "node" => r.index(), + "for_node" => n.index(), + "replica_index" => replica_i + ); readers.push(r); } + self.readers.insert(n, readers); } } @@ -241,7 +257,7 @@ impl<'a> Migration<'a> { /// To query into the maintained state, use `ControllerInner::get_getter`. #[cfg(test)] pub fn maintain_anonymous(&mut self, n: NodeIndex, key: &[usize]) -> Vec { - self.ensure_reader_for(n, None); + self.ensure_reader_for(n, None, NUM_READER_REPLICAS); let ris = &self.readers[&n]; for ri in ris { @@ -257,7 +273,7 @@ impl<'a> Migration<'a> { /// /// To query into the maintained state, use `ControllerInner::get_getter`. pub fn maintain(&mut self, name: String, n: NodeIndex, key: &[usize]) { - self.ensure_reader_for(n, Some(name)); + self.ensure_reader_for(n, Some(name), NUM_READER_REPLICAS); let ris = &self.readers[&n]; From cb1cf8b19b3b0083867b1e44eb4d33540183bff0 Mon Sep 17 00:00:00 2001 From: Gina Yuan Date: Mon, 1 Oct 2018 12:31:18 -0400 Subject: [PATCH 18/41] Logging and clarifying comments --- src/controller/inner.rs | 45 +++++++++++++++++++++-------------- src/controller/migrate/mod.rs | 6 +++++ 2 files changed, 33 insertions(+), 18 deletions(-) diff --git a/src/controller/inner.rs b/src/controller/inner.rs index 4035c83c4..3087b4aeb 100644 --- a/src/controller/inner.rs +++ b/src/controller/inner.rs @@ -798,7 +798,7 @@ impl ControllerInner { topo_removals.reverse(); for leaf in topo_removals { - self.remove_leaf(leaf)?; + self.remove_query_node(leaf)?; } // now remove bases @@ -907,7 +907,7 @@ impl ControllerInner { graphviz(&self.ingredients, &self.materializations) } - fn remove_leaf(&mut self, leaf: NodeIndex) -> Result<(), String> { + fn remove_query_node(&mut self, leaf: NodeIndex) -> Result<(), String> { let mut removals = vec![]; let start = leaf; assert!(!self.ingredients[leaf].is_source()); @@ -926,38 +926,37 @@ impl ControllerInner { .count() > 0 { - // This query leaf node has children -- typically, these are readers, but they can also - // include egress nodes or other, dependent queries. - let mut has_non_reader_children = false; + // This query leaf node has children -- typically, these are readers, but they can + // also be egress nodes or other, dependent queries. If the reader has replicas, we + // are looking for a single egress node that connects the leaf to replicas in other + // domains. + let mut has_other_children = false; let readers: Vec<_> = self .ingredients .neighbors_directed(leaf, petgraph::EdgeDirection::Outgoing) - .filter(|ni| { - if self.ingredients[*ni].is_reader() { + .filter(|&ni| { + if self.ingredients[ni].is_reader() { true } else { - if egress_node.is_some() || !self.ingredients[*ni].is_egress() { - has_non_reader_children = true; + if egress_node.is_some() || !self.ingredients[ni].is_egress() { + has_other_children = true; } else { - egress_node = Some(ni.clone()); + egress_node = Some(ni); } false } }) .collect(); - if has_non_reader_children { - // The leaf node can only have non-reader children if its reader has replicas. - // Then the non-reader child is a single egress node that connects the leaf to - // replicas in other domains. Otherwise impossible, since we remove nodes in - // reverse topological order. + if has_other_children { + // should never happen, since we remove nodes in reverse topological order crit!( self.log, - "not removing node {} yet, as it still has non-reader children", + "not removing node {} yet, as it still has non-reader-related children", leaf.index() ); unreachable!(); } - // nodes can only have one reader + // nodes can have only one reader attached assert!(readers.len() <= 1); debug!( self.log, @@ -968,9 +967,14 @@ impl ControllerInner { for reader in readers { removals.push(reader); nodes.push(reader); + debug!( + self.log, "Removing reader"; + "node" => reader.index(), + "leaf" => leaf.index(), + ); } } else { - // The reader had replicas, which were all removed along with the egress node. + // The reader had replicas, which will all be removed along with the egress node. } } @@ -988,6 +992,11 @@ impl ControllerInner { nodes.push(child); } } + debug!( + self.log, "Removing egress node and its children"; + "node" => ni.index(), + "leaf" => leaf.index(), + ); }, None => { // If the start node didn't have any children, it can be removed immediately diff --git a/src/controller/migrate/mod.rs b/src/controller/migrate/mod.rs index c6ab6958e..83229d1a2 100644 --- a/src/controller/migrate/mod.rs +++ b/src/controller/migrate/mod.rs @@ -590,6 +590,12 @@ impl<'a> Migration<'a> { .into_iter() .filter(|&domain| !changed_domains_replicas.contains(&domain)) .collect::>(); + debug!( + log, + "found changed domains"; + "replica domains" => format!("{:?}", changed_domains_replicas), + "other domains" => format!("{:?}", changed_domains_other), + ); // Check invariants on the (non)-replica data structures. Each changed replica domain // should be just created. The domain should contain the reader node and its ingress node, From 6e54552311ff698413c204dd8a1f4580595d2ec7 Mon Sep 17 00:00:00 2001 From: Gina Yuan Date: Mon, 1 Oct 2018 13:03:18 -0400 Subject: [PATCH 19/41] Test to query any replica --- src/integration.rs | 55 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/src/integration.rs b/src/integration.rs index 0118e39b2..f427dfa37 100644 --- a/src/integration.rs +++ b/src/integration.rs @@ -434,6 +434,61 @@ fn it_works_w_partial_mat_below_empty() { assert_eq!(cq.len().unwrap(), 1); } +#[test] +fn it_works_with_replicas() { + // set up graph + let mut g = build_local("it_works_with_replicas"); + let (a, b) = g.migrate(|mig| { + let a = mig.add_base("a", &["a", "b"], Base::default()); + let b = mig.add_base("b", &["a", "b"], Base::default()); + (a, b) + }); + + let mut muta = g.table("a").unwrap(); + let id: DataType = 1.into(); + + // send a few values on a + muta.insert(vec![id.clone(), 1.into()]).unwrap(); + muta.insert(vec![id.clone(), 2.into()]).unwrap(); + muta.insert(vec![id.clone(), 3.into()]).unwrap(); + + // give it some time to propagate + sleep(); + + let _ = g.migrate(move |mig| { + let mut emits = HashMap::new(); + emits.insert(a, vec![0, 1]); + emits.insert(b, vec![0, 1]); + let u = Union::new(emits); + let c = mig.add_ingredient("c", &["a", "b"], u); + mig.maintain_anonymous(c, &[0]); + c + }); + + // give it some time to propagate + sleep(); + + let cqs = g.view_replicas("c").unwrap(); + + // should have multiple replicas + assert!(cqs.len() > 1); + + for mut cq in cqs { + // because the reader is partial, we should have no key until we read + assert_eq!(cq.len().unwrap(), 0); + + // now do some reads + let res = cq.lookup(&[id.clone()], true).unwrap(); + assert_eq!(res.len(), 3); + assert!(res.iter().any(|r| r == &vec![id.clone(), 1.into()])); + assert!(res.iter().any(|r| r == &vec![id.clone(), 2.into()])); + assert!(res.iter().any(|r| r == &vec![id.clone(), 3.into()])); + + // should have one key in the reader now + assert_eq!(cq.len().unwrap(), 1); + } +} + #[test] fn it_works_deletion() { // set up graph From 8853aee5a902cb747ebe818e6d6bfda8d1b501f5 Mon Sep 17 00:00:00 2001 From: Gina Yuan Date: Wed, 3 Oct 2018 15:31:55 -0400 Subject: [PATCH 20/41] View api returns view replicas in round robin --- api/src/controller.rs | 35 +++++++---------------- dataflow/src/node/mod.rs | 13 +++++++++ src/controller/inner.rs | 62 +++++++++++++++++----------------------- src/integration.rs | 55 ----------------------------------- 4 files changed, 49 insertions(+), 116 deletions(-) diff --git a/api/src/controller.rs b/api/src/controller.rs index 275be02a6..295a0152f 100644 --- a/api/src/controller.rs +++ b/api/src/controller.rs @@ -204,43 +204,28 @@ impl ControllerHandle { self.rpc("outputs", &()) } - /// Obtain a `View` that allows you to query the given external view. - /// The `View` is of an arbitrary replica. + /// Obtain a `View` that allows you to query the given external view. If the view has + /// replicas, returns a `View` in round robin order each time the api is called. pub fn view(&mut self, name: &str) -> Result { - let replicas = self.view_replicas(name)?; - match replicas.get(0) { - Some(r) => Ok(r.clone()), - None => Err(format_err!("view {} does not exist", name)), - } - } - - /// Obtain a list of `View`s that allow you to query the given external view. - /// Each view corresponds to a single reader replica. - pub fn view_replicas(&mut self, name: &str) -> Result, failure::Error> { // This call attempts to detect if this function is being called in a loop. If this // is getting false positives, then it is safe to increase the allowed hit count. #[cfg(debug_assertions)] assert_infrequent::at_most(200); - let view_builders = self.rpc::<_, Vec>("view_builder", name) + let mut g = self.rpc::<_, ViewBuilder>("view_builder", name) .context(format!("building View for {}", name))?; - let mut views = Vec::new(); - for mut g in view_builders { - if let Some(port) = self.local_port { - g = g.with_local_port(port); - } - - let g = g.build(&mut self.views)?; + if let Some(port) = self.local_port { + g = g.with_local_port(port); + } - if self.local_port.is_none() { - self.local_port = Some(g.local_addr().unwrap().port()); - } + let g = g.build(&mut self.views)?; - views.push(g); + if self.local_port.is_none() { + self.local_port = Some(g.local_addr().unwrap().port()); } - Ok(views) + Ok(g) } /// Obtain a `Table` that allows you to perform writes, deletes, and other operations on the diff --git a/dataflow/src/node/mod.rs b/dataflow/src/node/mod.rs index 539833418..563353a29 100644 --- a/dataflow/src/node/mod.rs +++ b/dataflow/src/node/mod.rs @@ -28,6 +28,7 @@ pub struct Node { taken: bool, sharded_by: Sharding, + replica_index: usize, replicas: Vec, } @@ -51,6 +52,7 @@ impl Node { taken: false, sharded_by: Sharding::None, + replica_index: 0, replicas: Vec::new(), } } @@ -352,6 +354,17 @@ impl Node { &self.replicas[..] } + /// Returns replicas in round robin order each time the method is called. + pub fn next_replica(&mut self) -> Option { + if self.num_replicas() > 0 { + self.replica_index += 1; + self.replica_index %= self.num_replicas(); + Some(*self.replicas.get(self.replica_index).unwrap()) + } else { + None + } + } + pub fn replica_index(&self) -> Option { self.with_reader(|r| r.replica_index()).unwrap_or(None) } diff --git a/src/controller/inner.rs b/src/controller/inner.rs index 3087b4aeb..df731e71e 100644 --- a/src/controller/inner.rs +++ b/src/controller/inner.rs @@ -522,51 +522,41 @@ impl ControllerInner { .collect() } - fn find_views_for(&self, node: NodeIndex) -> &[NodeIndex] { - // reader should be a child of the given node. however, due to sharding and replication, - // it may not be an *immediate* child. furthermore, once we go beyond depth 1, we may - // accidentally hit an *unrelated* reader node. to account for this, nodes cache their - // readers so we can easily query data. - self.ingredients[node].get_replicas() - } - - /// Obtain a list of `ViewBuilder`s that can be sent to a client. Each `ViewBuilder` - /// corresponds to a single replica, which can be used to query a given (already maintained) - /// reader node called `name`, `name_r1`, `name_r2`, etc. - pub fn view_builder(&self, name: &str) -> Vec { + /// Obtain a `ViewBuilder` that can be sent to a client and then used to query a given + /// (already maintained) reader node. If the view has replicas, a `ViewBuilder` is returned + /// for each view in round robin order. + /// + /// The name of the reader node is `name`, or if it is a replica, `name_r[REPLICA_NUMBER]`. + pub fn view_builder(&mut self, name: &str) -> Option { // first try to resolve the node via the recipe, which handles aliasing between identical // queries. let node = match self.recipe.node_addr_for(name) { - Ok(ni) => Some(ni), + Ok(ni) => ni, Err(_) => { // if the recipe doesn't know about this query, traverse the graph. // we need this do deal with manually constructed graphs (e.g., in tests). - self.outputs().get(name).map(|ni| *ni) + *self.outputs().get(name)? } }; - if node.is_none() { - return Vec::new(); - } - - let node = node.unwrap(); - self.find_views_for(node) - .iter() - .map(|r| { - let domain = self.ingredients[*r].domain(); - let columns = self.ingredients[*r].fields().to_vec(); - let shards = (0..self.domains[&domain].shards()) - .map(|i| self.read_addrs[&self.domains[&domain].assignment(i)].clone()) - .collect(); + // reader should be a child of the given node. however, due to sharding and replication, + // it may not be an *immediate* child. furthermore, once we go beyond depth 1, we may + // accidentally hit an *unrelated* reader node. to account for this, nodes cache their + // readers so we can easily query data. + self.ingredients[node].next_replica().map(|ri| { + let domain = self.ingredients[ri].domain(); + let columns = self.ingredients[ri].fields().to_vec(); + let shards = (0..self.domains[&domain].shards()) + .map(|i| self.read_addrs[&self.domains[&domain].assignment(i)].clone()) + .collect(); - ViewBuilder { - local_ports: vec![], - node: *r, - columns, - shards, - } - }) - .collect::>() + ViewBuilder { + local_ports: vec![], + node: ri, + columns, + shards, + } + }) } /// Obtain a TableBuild that can be used to construct a Table to perform writes and deletes @@ -734,7 +724,7 @@ impl ControllerInner { let uid = &[uid]; if context.get("group").is_none() { for g in groups { - let rgb: Option = self.view_builder(&g).get(0).map(|v| v.clone()); + let rgb: Option = self.view_builder(&g); let mut view = rgb.map(|rgb| rgb.build_exclusive().unwrap()).unwrap(); let my_groups: Vec = view .lookup(uid, true) diff --git a/src/integration.rs b/src/integration.rs index f427dfa37..0118e39b2 100644 --- a/src/integration.rs +++ b/src/integration.rs @@ -434,61 +434,6 @@ fn it_works_w_partial_mat_below_empty() { assert_eq!(cq.len().unwrap(), 1); } -#[test] -fn it_works_with_replicas() { - // set up graph - let mut g = build_local("it_works_with_replicas"); - let (a, b) = g.migrate(|mig| { - let a = mig.add_base("a", &["a", "b"], Base::default()); - let b = mig.add_base("b", &["a", "b"], Base::default()); - (a, b) - }); - - let mut muta = g.table("a").unwrap(); - let id: DataType = 1.into(); - - // send a few values on a - muta.insert(vec![id.clone(), 1.into()]).unwrap(); - muta.insert(vec![id.clone(), 2.into()]).unwrap(); - muta.insert(vec![id.clone(), 3.into()]).unwrap(); - - // give it some time to propagate - sleep(); - - let _ = g.migrate(move |mig| { - let mut emits = HashMap::new(); - emits.insert(a, vec![0, 1]); - emits.insert(b, vec![0, 1]); - let u = Union::new(emits); - let c = mig.add_ingredient("c", &["a", "b"], u); - mig.maintain_anonymous(c, &[0]); - c - }); - - // give it some time to propagate - sleep(); - - let cqs = g.view_replicas("c").unwrap(); - - // should have multiple replicas - assert!(cqs.len() > 1); - - for mut cq in cqs { - // because the reader is partial, we should have no key until we read - assert_eq!(cq.len().unwrap(), 0); - - // now do some reads - let res = cq.lookup(&[id.clone()], true).unwrap(); - assert_eq!(res.len(), 3); - assert!(res.iter().any(|r| r == &vec![id.clone(), 1.into()])); - assert!(res.iter().any(|r| r == &vec![id.clone(), 2.into()])); - assert!(res.iter().any(|r| r == &vec![id.clone(), 3.into()])); - - // should have one key in the reader now - assert_eq!(cq.len().unwrap(), 1); - } -} - #[test] fn it_works_deletion() { // set up graph From 77913d7d9d4eee8a644a05f9995630d02614f628 Mon Sep 17 00:00:00 2001 From: Gina Yuan Date: Mon, 15 Oct 2018 16:53:56 -0400 Subject: [PATCH 21/41] CLI option to update replica count on the fly --- noria-server/src/controller/builder.rs | 5 +++++ noria-server/src/controller/inner.rs | 2 ++ noria-server/src/controller/migrate/mod.rs | 9 +++----- noria-server/src/controller/mod.rs | 2 ++ noria-server/src/controller/sql/mod.rs | 24 ++++++++++------------ noria-server/src/integration.rs | 18 ++++++++-------- noria-server/src/main.rs | 9 ++++++++ 7 files changed, 42 insertions(+), 27 deletions(-) diff --git a/noria-server/src/controller/builder.rs b/noria-server/src/controller/builder.rs index 132d04023..fa291df51 100644 --- a/noria-server/src/controller/builder.rs +++ b/noria-server/src/controller/builder.rs @@ -57,6 +57,11 @@ impl ControllerBuilder { self.config.sharding = shards; } + /// Set number of reader replicas + pub fn set_replicas(&mut self, replicas: usize) { + self.config.replicas = replicas; + } + /// Set how many workers the controller should wait for before starting. More workers can join /// later, but they won't be assigned any of the initial domains. pub fn set_quorum(&mut self, quorum: usize) { diff --git a/noria-server/src/controller/inner.rs b/noria-server/src/controller/inner.rs index b586f6360..5ea13ff1c 100644 --- a/noria-server/src/controller/inner.rs +++ b/noria-server/src/controller/inner.rs @@ -36,6 +36,7 @@ pub struct ControllerInner { pub(super) source: NodeIndex, pub(super) ndomains: usize, pub(super) sharding: Option, + pub(super) replicas: usize, pub(super) domain_config: DomainConfig, @@ -433,6 +434,7 @@ impl ControllerInner { materializations, sharding: state.config.sharding, + replicas: state.config.replicas, domain_config: state.config.domain_config, persistence: state.config.persistence, heartbeat_every: state.config.heartbeat_every, diff --git a/noria-server/src/controller/migrate/mod.rs b/noria-server/src/controller/migrate/mod.rs index 50b2f14f1..f7a354877 100644 --- a/noria-server/src/controller/migrate/mod.rs +++ b/noria-server/src/controller/migrate/mod.rs @@ -41,9 +41,6 @@ pub(super) enum ColumnChange { Drop(usize), } -/// The number of replicas per reader node, including the original reader node. -pub const NUM_READER_REPLICAS: usize = 3; - /// A `Migration` encapsulates a number of changes to the Soup data flow graph. /// /// Only one `Migration` can be in effect at any point in time. No changes are made to the running @@ -236,7 +233,7 @@ impl<'a> Migration<'a> { let r = self.mainline.ingredients.add_node(r); self.mainline.ingredients.add_edge(n, r, ()); self.mainline.ingredients[n].add_replica(r); - debug!(self.log, + info!(self.log, "adding reader node"; "node" => r.index(), "for_node" => n.index(), @@ -254,7 +251,7 @@ impl<'a> Migration<'a> { /// To query into the maintained state, use `ControllerInner::get_getter`. #[cfg(test)] pub fn maintain_anonymous(&mut self, n: NodeIndex, key: &[usize]) -> Vec { - self.ensure_reader_for(n, None, NUM_READER_REPLICAS); + self.ensure_reader_for(n, None, self.mainline.replicas); let ris = &self.readers[&n]; for ri in ris { @@ -270,7 +267,7 @@ impl<'a> Migration<'a> { /// /// To query into the maintained state, use `ControllerInner::get_getter`. pub fn maintain(&mut self, name: String, n: NodeIndex, key: &[usize]) { - self.ensure_reader_for(n, Some(name), NUM_READER_REPLICAS); + self.ensure_reader_for(n, Some(name), self.mainline.replicas); let ris = &self.readers[&n]; diff --git a/noria-server/src/controller/mod.rs b/noria-server/src/controller/mod.rs index 04a6ff88a..c2463d0f1 100644 --- a/noria-server/src/controller/mod.rs +++ b/noria-server/src/controller/mod.rs @@ -104,6 +104,7 @@ where #[derive(Clone, Serialize, Deserialize, PartialEq)] pub(crate) struct ControllerConfig { pub sharding: Option, + pub replicas: usize, pub partial_enabled: bool, pub domain_config: DomainConfig, pub persistence: PersistenceParameters, @@ -120,6 +121,7 @@ impl Default for ControllerConfig { sharding: Some(2), #[cfg(not(test))] sharding: None, + replicas: 3, partial_enabled: true, domain_config: DomainConfig { concurrent_replays: 512, diff --git a/noria-server/src/controller/sql/mod.rs b/noria-server/src/controller/sql/mod.rs index 26cb78440..ebb15ad4e 100644 --- a/noria-server/src/controller/sql/mod.rs +++ b/noria-server/src/controller/sql/mod.rs @@ -942,8 +942,6 @@ impl<'a> ToFlowParts for &'a str { #[cfg(test)] mod tests { use super::{SqlIncorporator, ToFlowParts}; - // TODO(ygina): don't hardcode number of replicas - use crate::controller::migrate::NUM_READER_REPLICAS; use crate::controller::Migration; use crate::integration; use dataflow::prelude::*; @@ -1004,7 +1002,7 @@ mod tests { ); // Should now have source, "users", a leaf projection node for the new selection, and // a reader node - assert_eq!(mig.graph().node_count(), ncount + NUM_READER_REPLICAS + 1); + assert_eq!(mig.graph().node_count(), ncount + integration::DEFAULT_REPLICAS + 1); // Invalid query should fail parsing and add no nodes assert!( @@ -1013,7 +1011,7 @@ mod tests { .is_err() ); // Should still only have source, "users" and the two nodes for the above selection - assert_eq!(mig.graph().node_count(), ncount + NUM_READER_REPLICAS + 1); + assert_eq!(mig.graph().node_count(), ncount + integration::DEFAULT_REPLICAS + 1); }); } @@ -1140,7 +1138,7 @@ mod tests { ); assert!(res.is_ok()); // added the aggregation and the edge view, and a reader - assert_eq!(mig.graph().node_count(), NUM_READER_REPLICAS + 4); + assert_eq!(mig.graph().node_count(), integration::DEFAULT_REPLICAS + 4); // check aggregation view let f = Box::new(FunctionExpression::Count( Column::from("votes.userid"), @@ -1189,7 +1187,7 @@ mod tests { let qfp = res.unwrap(); assert_eq!(qfp.new_nodes.len(), 2); // expect three new nodes: filter, project, reader - assert_eq!(mig.graph().node_count(), ncount + NUM_READER_REPLICAS + 2); + assert_eq!(mig.graph().node_count(), ncount + integration::DEFAULT_REPLICAS + 2); // should have ended up with a different leaf node assert_ne!(qfp.query_leaf, leaf); }); @@ -1234,7 +1232,7 @@ mod tests { assert!(res.is_ok()); // should have added two more nodes (project and reader) let qfp = res.unwrap(); - assert_eq!(mig.graph().node_count(), ncount + NUM_READER_REPLICAS + 1); + assert_eq!(mig.graph().node_count(), ncount + integration::DEFAULT_REPLICAS + 1); // should NOT have ended up with the same leaf node assert_ne!(qfp.query_leaf, leaf); }); @@ -1280,7 +1278,7 @@ mod tests { assert!(res.is_ok()); // should have added two more nodes: one identity node and one reader node let qfp = res.unwrap(); - assert_eq!(mig.graph().node_count(), ncount + NUM_READER_REPLICAS + 1); + assert_eq!(mig.graph().node_count(), ncount + integration::DEFAULT_REPLICAS + 1); // only the identity node is returned in the vector of new nodes assert_eq!(qfp.new_nodes.len(), 1); assert_eq!(get_node(&inc, mig, &qfp.name).description(true), "≡"); @@ -1299,7 +1297,7 @@ mod tests { assert!(res.is_ok()); // should have added two more nodes: one projection node and one reader node let qfp = res.unwrap(); - assert_eq!(mig.graph().node_count(), ncount + NUM_READER_REPLICAS + 1); + assert_eq!(mig.graph().node_count(), ncount + integration::DEFAULT_REPLICAS + 1); // only the projection node is returned in the vector of new nodes assert_eq!(qfp.new_nodes.len(), 1); assert_eq!( @@ -1359,7 +1357,7 @@ mod tests { assert!(res.is_ok()); // should have added three more nodes: a join, a projection, and a reader let qfp = res.unwrap(); - assert_eq!(mig.graph().node_count(), ncount + NUM_READER_REPLICAS + 2); + assert_eq!(mig.graph().node_count(), ncount + integration::DEFAULT_REPLICAS + 2); // only the join and projection nodes are returned in the vector of new nodes assert_eq!(qfp.new_nodes.len(), 2); }); @@ -1385,7 +1383,7 @@ mod tests { let res = inc.add_query("SELECT COUNT(votes.userid) AS count FROM votes;", None, mig); assert!(res.is_ok()); // added the aggregation, a project helper, the edge view, and reader - assert_eq!(mig.graph().node_count(), NUM_READER_REPLICAS + 5); + assert_eq!(mig.graph().node_count(), integration::DEFAULT_REPLICAS + 5); // check project helper node let f = Box::new(FunctionExpression::Count( Column::from("votes.userid"), @@ -1441,7 +1439,7 @@ mod tests { ); assert!(res.is_ok()); // added the aggregation, a project helper, the edge view, and reader - assert_eq!(mig.graph().node_count(), NUM_READER_REPLICAS + 4); + assert_eq!(mig.graph().node_count(), integration::DEFAULT_REPLICAS + 4); // check aggregation view let f = Box::new(FunctionExpression::Count(Column::from("votes.aid"), false)); let qid = query_id_hash( @@ -1807,7 +1805,7 @@ mod tests { // should NOT have ended up with the same leaf node assert_ne!(qfp.query_leaf, leaf); // should have added three more nodes (filter, project and reader) - assert_eq!(mig.graph().node_count(), ncount + NUM_READER_REPLICAS + 2); + assert_eq!(mig.graph().node_count(), ncount + integration::DEFAULT_REPLICAS + 2); }); } diff --git a/noria-server/src/integration.rs b/noria-server/src/integration.rs index 489493344..15d084825 100644 --- a/noria-server/src/integration.rs +++ b/noria-server/src/integration.rs @@ -1,4 +1,3 @@ -use crate::controller::migrate::NUM_READER_REPLICAS; use crate::controller::recipe::Recipe; use crate::controller::sql::SqlIncorporator; use crate::controller::{ControllerBuilder, LocalControllerHandle}; @@ -20,6 +19,7 @@ use std::{env, thread}; const DEFAULT_SETTLE_TIME_MS: u64 = 200; const DEFAULT_SHARDING: Option = Some(2); +pub const DEFAULT_REPLICAS: usize = 3; // PersistenceParameters with a log_name on the form of `prefix` + timestamp, // avoiding collisions between separate test runs (in case an earlier panic causes clean-up to @@ -33,22 +33,23 @@ fn get_persistence_params(prefix: &str) -> PersistenceParameters { // Builds a local controller with the given log prefix. pub fn build_local(prefix: &str) -> LocalControllerHandle { - build(prefix, DEFAULT_SHARDING, false) + build(prefix, DEFAULT_SHARDING, DEFAULT_REPLICAS, false) } #[allow(dead_code)] pub fn build_local_unsharded(prefix: &str) -> LocalControllerHandle { - build(prefix, None, false) + build(prefix, None, DEFAULT_REPLICAS, false) } #[allow(dead_code)] pub fn build_local_logging(prefix: &str) -> LocalControllerHandle { - build(prefix, DEFAULT_SHARDING, true) + build(prefix, DEFAULT_SHARDING, DEFAULT_REPLICAS, true) } fn build( prefix: &str, sharding: Option, + replicas: usize, log: bool, ) -> LocalControllerHandle { use crate::logger_pls; @@ -58,6 +59,7 @@ fn build( } builder.set_sharding(sharding); builder.set_persistence(get_persistence_params(prefix)); + builder.set_replicas(replicas); builder.build_local().unwrap() } @@ -2044,7 +2046,7 @@ fn recipe_activates_and_migrates() { // still one base node assert_eq!(g.inputs().unwrap().len(), 1); // two leaf nodes - assert_eq!(g.outputs().unwrap().len(), 2 * NUM_READER_REPLICAS); + assert_eq!(g.outputs().unwrap().len(), 2 * DEFAULT_REPLICAS); } #[test] @@ -2064,7 +2066,7 @@ fn recipe_activates_and_migrates_with_join() { // still two base nodes assert_eq!(g.inputs().unwrap().len(), 2); // one leaf node - assert_eq!(g.outputs().unwrap().len(), 1 * NUM_READER_REPLICAS); + assert_eq!(g.outputs().unwrap().len(), 1 * DEFAULT_REPLICAS); } fn test_queries(test: &str, file: &'static str, shard: bool, reuse: bool, log: bool) { @@ -2263,7 +2265,7 @@ fn remove_query() { let mut g = ControllerBuilder::default().build_local().unwrap(); g.install_recipe(r_txt).unwrap(); assert_eq!(g.inputs().unwrap().len(), 1); - assert_eq!(g.outputs().unwrap().len(), 2 * NUM_READER_REPLICAS); + assert_eq!(g.outputs().unwrap().len(), 2 * DEFAULT_REPLICAS); let mut mutb = g.table("b").unwrap(); let mut qa = g.view("qa").unwrap(); @@ -2280,7 +2282,7 @@ fn remove_query() { // Remove qb and check that the graph still functions as expected. g.install_recipe(r2_txt).unwrap(); assert_eq!(g.inputs().unwrap().len(), 1); - assert_eq!(g.outputs().unwrap().len(), 1 * NUM_READER_REPLICAS); + assert_eq!(g.outputs().unwrap().len(), 1 * DEFAULT_REPLICAS); assert!(g.view("qb").is_err()); mutb.insert(vec![42.into(), "6".into(), "7".into()]) diff --git a/noria-server/src/main.rs b/noria-server/src/main.rs index 7debbef54..293b72a9c 100644 --- a/noria-server/src/main.rs +++ b/noria-server/src/main.rs @@ -104,6 +104,13 @@ fn main() { .default_value("0") .help("Shard the graph this many ways (0 = disable sharding)."), ) + .arg( + Arg::with_name("replicas") + .long("replicas") + .takes_value(true) + .default_value("3") + .help("Each reader has this many replicas (at least 1)."), + ) .arg( Arg::with_name("verbose") .short("v") @@ -127,6 +134,7 @@ fn main() { 0 => None, x => Some(x), }; + let replicas = value_t_or_exit!(matches, "replicas", usize); let verbose = matches.is_present("verbose"); let deployment_name = matches.value_of("deployment").unwrap(); @@ -138,6 +146,7 @@ fn main() { builder.set_memory_limit(memory, Duration::from_millis(memory_check_freq)); } builder.set_sharding(sharding); + builder.set_replicas(replicas); builder.set_quorum(quorum); if matches.is_present("nopartial") { builder.disable_partial(); From 576346f6f7075a8c4e7a2b37c28e56bcb5c2fc55 Mon Sep 17 00:00:00 2001 From: Gina Yuan Date: Wed, 17 Oct 2018 17:26:12 -0400 Subject: [PATCH 22/41] Logging for view builders --- noria-server/src/controller/inner.rs | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/noria-server/src/controller/inner.rs b/noria-server/src/controller/inner.rs index 5ea13ff1c..74d88196d 100644 --- a/noria-server/src/controller/inner.rs +++ b/noria-server/src/controller/inner.rs @@ -744,16 +744,23 @@ impl ControllerInner { // it may not be an *immediate* child. furthermore, once we go beyond depth 1, we may // accidentally hit an *unrelated* reader node. to account for this, nodes cache their // readers so we can easily query data. - self.ingredients[node].next_replica().map(|ri| { - let domain = self.ingredients[ri].domain(); - let columns = self.ingredients[ri].fields().to_vec(); + self.ingredients[node].next_replica().map(|ni| { + info!( + self.log, + "creating view builder"; + "name" => name, + "node_index" => ni.index(), + "replica_index" => self.ingredients[ni].replica_index().unwrap(), + ); + let domain = self.ingredients[ni].domain(); + let columns = self.ingredients[ni].fields().to_vec(); let shards = (0..self.domains[&domain].shards()) .map(|i| self.read_addrs[&self.domains[&domain].assignment(i)].clone()) .collect(); ViewBuilder { local_ports: vec![], - node: ri, + node: ni, columns, shards, } From 10c488577340e72f9c8e01fb644a6f4045f14742 Mon Sep 17 00:00:00 2001 From: Gina Yuan Date: Thu, 18 Oct 2018 15:23:23 -0400 Subject: [PATCH 23/41] Actually assign replicas to different workers --- noria-server/src/controller/inner.rs | 15 ++----------- noria-server/src/controller/migrate/mod.rs | 26 +++++++++++++++++++++- 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/noria-server/src/controller/inner.rs b/noria-server/src/controller/inner.rs index 74d88196d..21a5f9ec0 100644 --- a/noria-server/src/controller/inner.rs +++ b/noria-server/src/controller/inner.rs @@ -498,6 +498,7 @@ impl ControllerInner { pub(crate) fn place_domain( &mut self, idx: DomainIndex, + identifier: WorkerIdentifier, num_shards: Option, log: &Logger, nodes: Vec<(NodeIndex, bool)>, @@ -515,9 +516,6 @@ impl ControllerInner { .collect(), ); - // TODO(malte): simple round-robin placement for the moment - let mut wi = self.workers.iter_mut(); - // Send `AssignDomain` to each shard of the given domain for i in 0..num_shards.unwrap_or(1) { let nodes = if i == num_shards.unwrap_or(1) - 1 { @@ -535,17 +533,8 @@ impl ControllerInner { persistence_parameters: self.persistence.clone(), }; - let (identifier, w) = loop { - if let Some((i, w)) = wi.next() { - if w.healthy { - break (*i, w); - } - } else { - wi = self.workers.iter_mut(); - } - }; - // send domain to worker + let w = self.workers.get_mut(&identifier).unwrap(); info!( log, "sending domain {}.{} to worker {:?}", diff --git a/noria-server/src/controller/migrate/mod.rs b/noria-server/src/controller/migrate/mod.rs index f7a354877..4db1adfcd 100644 --- a/noria-server/src/controller/migrate/mod.rs +++ b/noria-server/src/controller/migrate/mod.rs @@ -20,7 +20,7 @@ //! //! Beware, Here be dragons™ -use crate::controller::ControllerInner; +use crate::controller::{ControllerInner, WorkerIdentifier}; use dataflow::prelude::*; use dataflow::{node, payload}; use std::collections::{HashMap, HashSet}; @@ -359,15 +359,39 @@ impl<'a> Migration<'a> { log: &slog::Logger, uninformed_domain_nodes: &mut HashMap>, changed_domains: &Vec) { + // TODO(malte): simple round-robin placement for the moment + let mut wis = mainline.workers + .keys() + .map(|wi| wi.clone()) + .collect::>() + .into_iter(); for domain in changed_domains { if mainline.domains.contains_key(&domain) { // this is not a new domain continue; } + // find a worker identifier + let wi = loop { + if let Some(wi) = wis.next() { + let w = mainline.workers.get(&wi).unwrap(); + if w.healthy { + break wi; + } + } else { + wis = mainline.workers + .keys() + .map(|wi| wi.clone()) + .collect::>() + .into_iter(); + } + }; + + // place the domain on the worker let nodes = uninformed_domain_nodes.remove(&domain).unwrap(); let d = mainline.place_domain( *domain, + wi, mainline.ingredients[nodes[0].0].sharded_by().shards(), &log, nodes, From 2ef42fb56718ac8b21e75304bdda8760c281839d Mon Sep 17 00:00:00 2001 From: Gina Yuan Date: Thu, 18 Oct 2018 17:57:18 -0400 Subject: [PATCH 24/41] Shards should be on separate workers again --- noria-server/src/controller/inner.rs | 6 ++-- noria-server/src/controller/migrate/mod.rs | 39 ++++++++++++---------- 2 files changed, 26 insertions(+), 19 deletions(-) diff --git a/noria-server/src/controller/inner.rs b/noria-server/src/controller/inner.rs index 21a5f9ec0..7c8cdd7f8 100644 --- a/noria-server/src/controller/inner.rs +++ b/noria-server/src/controller/inner.rs @@ -498,7 +498,7 @@ impl ControllerInner { pub(crate) fn place_domain( &mut self, idx: DomainIndex, - identifier: WorkerIdentifier, + identifiers: Vec, num_shards: Option, log: &Logger, nodes: Vec<(NodeIndex, bool)>, @@ -534,6 +534,8 @@ impl ControllerInner { }; // send domain to worker + let identifier = identifiers.get(i) + .expect("number of identifiers should match number of shards"); let w = self.workers.get_mut(&identifier).unwrap(); info!( log, @@ -609,7 +611,7 @@ impl ControllerInner { .enumerate() .map(|(i, worker)| { let tx = txs.remove(&i).unwrap(); - DomainShardHandle { worker, tx } + DomainShardHandle { worker: *worker, tx } }) .collect(); diff --git a/noria-server/src/controller/migrate/mod.rs b/noria-server/src/controller/migrate/mod.rs index 4db1adfcd..028b785c1 100644 --- a/noria-server/src/controller/migrate/mod.rs +++ b/noria-server/src/controller/migrate/mod.rs @@ -371,28 +371,33 @@ impl<'a> Migration<'a> { continue; } - // find a worker identifier - let wi = loop { - if let Some(wi) = wis.next() { - let w = mainline.workers.get(&wi).unwrap(); - if w.healthy { - break wi; + // find a worker identifier for each shard + let nodes = uninformed_domain_nodes.remove(&domain).unwrap(); + let num_shards = mainline.ingredients[nodes[0].0].sharded_by().shards(); + let mut identifiers = Vec::new(); + for _ in 0..num_shards.unwrap_or(1) { + let wi = loop { + if let Some(wi) = wis.next() { + let w = mainline.workers.get(&wi).unwrap(); + if w.healthy { + break wi; + } + } else { + wis = mainline.workers + .keys() + .map(|wi| wi.clone()) + .collect::>() + .into_iter(); } - } else { - wis = mainline.workers - .keys() - .map(|wi| wi.clone()) - .collect::>() - .into_iter(); - } - }; + }; + identifiers.push(wi); + } // place the domain on the worker - let nodes = uninformed_domain_nodes.remove(&domain).unwrap(); let d = mainline.place_domain( *domain, - wi, - mainline.ingredients[nodes[0].0].sharded_by().shards(), + identifiers, + num_shards, &log, nodes, ); From b69c3c05910522e937e1e3d9dd7d3d3ac49a58c5 Mon Sep 17 00:00:00 2001 From: Gina Yuan Date: Tue, 23 Oct 2018 11:52:50 -0400 Subject: [PATCH 25/41] CLI option to set number of pool threads --- noria-server/src/main.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/noria-server/src/main.rs b/noria-server/src/main.rs index 293b72a9c..bb9d64b96 100644 --- a/noria-server/src/main.rs +++ b/noria-server/src/main.rs @@ -104,6 +104,13 @@ fn main() { .default_value("0") .help("Shard the graph this many ways (0 = disable sharding)."), ) + .arg( + Arg::with_name("threads") + .long("threads") + .takes_value(true) + .default_value("0") + .help("Number of pool threads to use (0 = #cores)"), + ) .arg( Arg::with_name("replicas") .long("replicas") @@ -134,6 +141,10 @@ fn main() { 0 => None, x => Some(x), }; + let threads = match value_t_or_exit!(matches, "threads", usize) { + 0 => None, + x => Some(x), + }; let replicas = value_t_or_exit!(matches, "replicas", usize); let verbose = matches.is_present("verbose"); let deployment_name = matches.value_of("deployment").unwrap(); @@ -146,6 +157,9 @@ fn main() { builder.set_memory_limit(memory, Duration::from_millis(memory_check_freq)); } builder.set_sharding(sharding); + if let Some(threads) = threads { + builder.set_threads(threads); + } builder.set_replicas(replicas); builder.set_quorum(quorum); if matches.is_present("nopartial") { From 0c2bf3e4d98268e7fd34a34527bb2cbf772af95a Mon Sep 17 00:00:00 2001 From: Gina Yuan Date: Fri, 2 Nov 2018 14:50:16 -0400 Subject: [PATCH 26/41] Revert unnecessary refactor to controller view --- noria/src/controller.rs | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/noria/src/controller.rs b/noria/src/controller.rs index d0807c68c..02281953e 100644 --- a/noria/src/controller.rs +++ b/noria/src/controller.rs @@ -210,7 +210,7 @@ impl ControllerHandle { } /// Obtain a `View` that allows you to query the given external view. If the view has - /// replicas, returns a `View` in round robin order each time the api is called. + /// replicas, returns a new `View` replica in round robin order each time the api is called. pub fn view(&mut self, name: &str) -> Result { // This call attempts to detect if this function is being called in a loop. If this is // getting false positives, then it is safe to increase the allowed hit count, however, the @@ -218,20 +218,19 @@ impl ControllerHandle { #[cfg(debug_assertions)] assert_infrequent::at_most(200); - let mut g = self.rpc::<_, ViewBuilder>("view_builder", name) - .context(format!("building View for {}", name))?; - - if let Some(port) = self.local_port { - g = g.with_local_port(port); - } - - let g = g.build(&mut self.views)?; - - if self.local_port.is_none() { - self.local_port = Some(g.local_addr().unwrap().port()); - } - - Ok(g) + self.rpc::<_, Option>("view_builder", name) + .context(format!("building View for {}", name))? + .ok_or_else(|| format_err!("view {} does not exist", name)) + .and_then(|mut g| { + if let Some(port) = self.local_port { + g = g.with_local_port(port); + } + let g = g.build(&mut self.views)?; + if self.local_port.is_none() { + self.local_port = Some(g.local_addr().unwrap().port()); + } + Ok(g) + }) } /// Obtain a `Table` that allows you to perform writes, deletes, and other operations on the From 57d88047bcb3251093210aa1575db320ab7b6c48 Mon Sep 17 00:00:00 2001 From: Gina Yuan Date: Fri, 2 Nov 2018 14:59:51 -0400 Subject: [PATCH 27/41] Set default replicas to 1 when not a test, otherwise 3 --- noria-server/src/controller/mod.rs | 3 +++ noria-server/src/main.rs | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/noria-server/src/controller/mod.rs b/noria-server/src/controller/mod.rs index c2463d0f1..1c86c5990 100644 --- a/noria-server/src/controller/mod.rs +++ b/noria-server/src/controller/mod.rs @@ -121,7 +121,10 @@ impl Default for ControllerConfig { sharding: Some(2), #[cfg(not(test))] sharding: None, + #[cfg(test)] replicas: 3, + #[cfg(not(test))] + replicas: 1, partial_enabled: true, domain_config: DomainConfig { concurrent_replays: 512, diff --git a/noria-server/src/main.rs b/noria-server/src/main.rs index bb9d64b96..a385ac542 100644 --- a/noria-server/src/main.rs +++ b/noria-server/src/main.rs @@ -115,7 +115,7 @@ fn main() { Arg::with_name("replicas") .long("replicas") .takes_value(true) - .default_value("3") + .default_value("1") .help("Each reader has this many replicas (at least 1)."), ) .arg( From 692ce0a8d5cbf2b685dbac7fce5c54ccfeb71582 Mon Sep 17 00:00:00 2001 From: Gina Yuan Date: Fri, 2 Nov 2018 15:41:08 -0400 Subject: [PATCH 28/41] Edit comments in existing tests to reflect counting replicas --- noria-server/src/controller/sql/mod.rs | 22 +++++++++++----------- noria-server/src/integration.rs | 4 ++-- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/noria-server/src/controller/sql/mod.rs b/noria-server/src/controller/sql/mod.rs index ebb15ad4e..93b5b986d 100644 --- a/noria-server/src/controller/sql/mod.rs +++ b/noria-server/src/controller/sql/mod.rs @@ -1001,7 +1001,7 @@ mod tests { .is_ok() ); // Should now have source, "users", a leaf projection node for the new selection, and - // a reader node + // reader replicas assert_eq!(mig.graph().node_count(), ncount + integration::DEFAULT_REPLICAS + 1); // Invalid query should fail parsing and add no nodes @@ -1010,7 +1010,7 @@ mod tests { .to_flow_parts(&mut inc, None, mig) .is_err() ); - // Should still only have source, "users" and the two nodes for the above selection + // Should still only have source, "users" and the nodes for the above selection assert_eq!(mig.graph().node_count(), ncount + integration::DEFAULT_REPLICAS + 1); }); } @@ -1137,7 +1137,7 @@ mod tests { mig, ); assert!(res.is_ok()); - // added the aggregation and the edge view, and a reader + // added the aggregation and the edge view, and reader replicas assert_eq!(mig.graph().node_count(), integration::DEFAULT_REPLICAS + 4); // check aggregation view let f = Box::new(FunctionExpression::Count( @@ -1186,7 +1186,7 @@ mod tests { // should have added nodes for this query, too let qfp = res.unwrap(); assert_eq!(qfp.new_nodes.len(), 2); - // expect three new nodes: filter, project, reader + // expect 3+ new nodes: filter, project, reader replicas assert_eq!(mig.graph().node_count(), ncount + integration::DEFAULT_REPLICAS + 2); // should have ended up with a different leaf node assert_ne!(qfp.query_leaf, leaf); @@ -1230,7 +1230,7 @@ mod tests { let ncount = mig.graph().node_count(); let res = inc.add_query("SELECT name, id FROM users WHERE users.id = 42;", None, mig); assert!(res.is_ok()); - // should have added two more nodes (project and reader) + // should have added 2+ more nodes (project and reader replicas) let qfp = res.unwrap(); assert_eq!(mig.graph().node_count(), ncount + integration::DEFAULT_REPLICAS + 1); // should NOT have ended up with the same leaf node @@ -1276,7 +1276,7 @@ mod tests { mig, ); assert!(res.is_ok()); - // should have added two more nodes: one identity node and one reader node + // should have added 2+ more nodes: one identity node and reader replicas let qfp = res.unwrap(); assert_eq!(mig.graph().node_count(), ncount + integration::DEFAULT_REPLICAS + 1); // only the identity node is returned in the vector of new nodes @@ -1295,7 +1295,7 @@ mod tests { mig, ); assert!(res.is_ok()); - // should have added two more nodes: one projection node and one reader node + // should have added 2+ more nodes: one projection node and reader replicas let qfp = res.unwrap(); assert_eq!(mig.graph().node_count(), ncount + integration::DEFAULT_REPLICAS + 1); // only the projection node is returned in the vector of new nodes @@ -1355,7 +1355,7 @@ mod tests { mig, ); assert!(res.is_ok()); - // should have added three more nodes: a join, a projection, and a reader + // should have added 3+ more nodes: a join, a projection, and reader replicas let qfp = res.unwrap(); assert_eq!(mig.graph().node_count(), ncount + integration::DEFAULT_REPLICAS + 2); // only the join and projection nodes are returned in the vector of new nodes @@ -1382,7 +1382,7 @@ mod tests { // Try a simple COUNT function without a GROUP BY clause let res = inc.add_query("SELECT COUNT(votes.userid) AS count FROM votes;", None, mig); assert!(res.is_ok()); - // added the aggregation, a project helper, the edge view, and reader + // added the aggregation, a project helper, the edge view, and reader replicas assert_eq!(mig.graph().node_count(), integration::DEFAULT_REPLICAS + 5); // check project helper node let f = Box::new(FunctionExpression::Count( @@ -1438,7 +1438,7 @@ mod tests { mig, ); assert!(res.is_ok()); - // added the aggregation, a project helper, the edge view, and reader + // added the aggregation, a project helper, the edge view, and reader replicas assert_eq!(mig.graph().node_count(), integration::DEFAULT_REPLICAS + 4); // check aggregation view let f = Box::new(FunctionExpression::Count(Column::from("votes.aid"), false)); @@ -1804,7 +1804,7 @@ mod tests { let qfp = res.unwrap(); // should NOT have ended up with the same leaf node assert_ne!(qfp.query_leaf, leaf); - // should have added three more nodes (filter, project and reader) + // should have added 3+ more nodes (filter, project and reader replicas) assert_eq!(mig.graph().node_count(), ncount + integration::DEFAULT_REPLICAS + 2); }); } diff --git a/noria-server/src/integration.rs b/noria-server/src/integration.rs index 15d084825..182ed2e6b 100644 --- a/noria-server/src/integration.rs +++ b/noria-server/src/integration.rs @@ -2045,7 +2045,7 @@ fn recipe_activates_and_migrates() { g.extend_recipe(r1_txt).unwrap(); // still one base node assert_eq!(g.inputs().unwrap().len(), 1); - // two leaf nodes + // two leaf nodes * number of replicas assert_eq!(g.outputs().unwrap().len(), 2 * DEFAULT_REPLICAS); } @@ -2065,7 +2065,7 @@ fn recipe_activates_and_migrates_with_join() { // still two base nodes assert_eq!(g.inputs().unwrap().len(), 2); - // one leaf node + // one leaf node * number of replicas assert_eq!(g.outputs().unwrap().len(), 1 * DEFAULT_REPLICAS); } From eeecc1204372d2afad3d03a21e6aac57d51b7ea2 Mon Sep 17 00:00:00 2001 From: Gina Yuan Date: Fri, 2 Nov 2018 16:04:58 -0400 Subject: [PATCH 29/41] Propagate reader node name to view, integration test for replica writes --- noria-server/src/controller/inner.rs | 14 ++++++----- noria-server/src/integration.rs | 36 ++++++++++++++++++++++++++++ noria/src/view.rs | 11 +++++++++ 3 files changed, 55 insertions(+), 6 deletions(-) diff --git a/noria-server/src/controller/inner.rs b/noria-server/src/controller/inner.rs index 7c8cdd7f8..4af705a06 100644 --- a/noria-server/src/controller/inner.rs +++ b/noria-server/src/controller/inner.rs @@ -736,20 +736,22 @@ impl ControllerInner { // accidentally hit an *unrelated* reader node. to account for this, nodes cache their // readers so we can easily query data. self.ingredients[node].next_replica().map(|ni| { + let rname = self.ingredients[ni].name(); + let domain = self.ingredients[ni].domain(); + let columns = self.ingredients[ni].fields().to_vec(); + let shards = (0..self.domains[&domain].shards()) + .map(|i| self.read_addrs[&self.domains[&domain].assignment(i)].clone()) + .collect(); info!( self.log, "creating view builder"; - "name" => name, + "name" => rname, "node_index" => ni.index(), "replica_index" => self.ingredients[ni].replica_index().unwrap(), ); - let domain = self.ingredients[ni].domain(); - let columns = self.ingredients[ni].fields().to_vec(); - let shards = (0..self.domains[&domain].shards()) - .map(|i| self.read_addrs[&self.domains[&domain].assignment(i)].clone()) - .collect(); ViewBuilder { + name: rname.to_string(), local_ports: vec![], node: ni, columns, diff --git a/noria-server/src/integration.rs b/noria-server/src/integration.rs index 182ed2e6b..ba8f67bba 100644 --- a/noria-server/src/integration.rs +++ b/noria-server/src/integration.rs @@ -2292,3 +2292,39 @@ fn remove_query() { assert_eq!(qa.lookup(&[0.into()], true).unwrap().len(), 3); assert_eq!(qb.lookup(&[0.into()], true).unwrap().len(), 1); } + +#[test] +fn replica_writes() { + let txt = "CREATE TABLE x (a int);\n + QUERY q: SELECT a from x;\n"; + + let mut g = ControllerBuilder::default().build_local().unwrap(); + g.install_recipe(txt).unwrap(); + assert_eq!(g.inputs().unwrap().len(), 1); + assert_eq!(g.outputs().unwrap().len(), DEFAULT_REPLICAS); + assert!(DEFAULT_REPLICAS > 1); + + let mut mutx = g.table("x").unwrap(); + let mut q1 = g.view("q").unwrap().into_exclusive().unwrap(); + let mut q2 = g.view("q").unwrap().into_exclusive().unwrap(); + let mut q3 = g.view("q").unwrap().into_exclusive().unwrap(); + + // These are actually views to different replicas + assert_eq!(q1.name(), "q_r1"); + assert_eq!(q2.name(), "q_r2"); + assert_eq!(q3.name(), "q"); + + assert_eq!(q1.lookup(&[0.into()], true).unwrap().len(), 0); + assert_eq!(q2.lookup(&[0.into()], true).unwrap().len(), 0); + assert_eq!(q3.lookup(&[0.into()], true).unwrap().len(), 0); + + mutx.insert(vec![13.into()]).unwrap(); + mutx.insert(vec![21.into()]).unwrap(); + mutx.insert(vec![34.into()]).unwrap(); + sleep(); + + // Writes are reflected in all replicas + assert_eq!(q1.lookup(&[0.into()], true).unwrap().len(), 3); + assert_eq!(q2.lookup(&[0.into()], true).unwrap().len(), 3); + assert_eq!(q3.lookup(&[0.into()], true).unwrap().len(), 3); +} diff --git a/noria/src/view.rs b/noria/src/view.rs index d4dc0db37..27ca1bcf2 100644 --- a/noria/src/view.rs +++ b/noria/src/view.rs @@ -59,6 +59,7 @@ pub enum ReadReply { #[doc(hidden)] #[derive(Clone, Debug, Serialize, Deserialize)] pub struct ViewBuilder { + pub name: String, pub node: NodeIndex, pub columns: Vec, pub shards: Vec, @@ -76,6 +77,7 @@ impl ViewBuilder { .collect::>>()?; Ok(View { + name: self.name, node: self.node, columns: self.columns, shard_addrs: self.shards, @@ -124,6 +126,7 @@ impl ViewBuilder { .collect::>>()?; Ok(View { + name: self.name, node: self.node, columns: self.columns, shard_addrs: self.shards, @@ -140,6 +143,7 @@ impl ViewBuilder { /// get a handle that can be sent to a different thread (i.e., one with its own dedicated /// connections), call `View::into_exclusive`. pub struct View { + name: String, node: NodeIndex, columns: Vec, shards: Vec, @@ -152,6 +156,7 @@ pub struct View { impl Clone for View { fn clone(&self) -> Self { View { + name: self.name.clone(), node: self.node, columns: self.columns.clone(), shards: self.shards.clone(), @@ -168,6 +173,7 @@ impl View { /// threads. pub fn into_exclusive(self) -> io::Result> { ViewBuilder { + name: self.name, node: self.node, local_ports: vec![], columns: self.columns, @@ -182,6 +188,11 @@ impl View { allow(clippy::len_without_is_empty) )] impl View { + /// Get the name of the corresponding reader node. + pub fn name(&self) -> String { + self.name.clone() + } + /// Get the list of columns in this view. pub fn columns(&self) -> &[String] { self.columns.as_slice() From b9b7325b90fdd20811ddd9e6295f6d5d12f5e65d Mon Sep 17 00:00:00 2001 From: Gina Yuan Date: Sun, 11 Nov 2018 09:37:13 -0500 Subject: [PATCH 30/41] Documented place_domain, some minor Rust and whitespace things --- noria-server/src/controller/inner.rs | 9 +++++---- noria-server/src/controller/migrate/mod.rs | 5 ++--- noria/src/controller.rs | 3 +++ noria/src/view.rs | 4 ++-- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/noria-server/src/controller/inner.rs b/noria-server/src/controller/inner.rs index 4af705a06..23133d6fc 100644 --- a/noria-server/src/controller/inner.rs +++ b/noria-server/src/controller/inner.rs @@ -495,6 +495,11 @@ impl ControllerInner { self.persistence = params; } + // Assigns nodes to this domain, and shards the domain across multiple workers. + // + // Each worker identifier in `identifiers` corresponds to a single shard in `num_shards`, + // thus the logic of which worker gets which shard is determined by the code that calls + // this method. That code must also ensure the workers are healthy. pub(crate) fn place_domain( &mut self, idx: DomainIndex, @@ -731,10 +736,6 @@ impl ControllerInner { } }; - // reader should be a child of the given node. however, due to sharding and replication, - // it may not be an *immediate* child. furthermore, once we go beyond depth 1, we may - // accidentally hit an *unrelated* reader node. to account for this, nodes cache their - // readers so we can easily query data. self.ingredients[node].next_replica().map(|ni| { let rname = self.ingredients[ni].name(); let domain = self.ingredients[ni].domain(); diff --git a/noria-server/src/controller/migrate/mod.rs b/noria-server/src/controller/migrate/mod.rs index 028b785c1..d5a852e64 100644 --- a/noria-server/src/controller/migrate/mod.rs +++ b/noria-server/src/controller/migrate/mod.rs @@ -207,7 +207,7 @@ impl<'a> Migration<'a> { fn ensure_reader_for(&mut self, n: NodeIndex, name: Option, num_replicas: usize) { if !self.readers.contains_key(&n) { - let mut readers = vec![]; + let mut readers = Vec::with_capacity(num_replicas); for i in 0..num_replicas { let replica_i = { if num_replicas == 0 { @@ -548,8 +548,7 @@ impl<'a> Migration<'a> { for &&ni in sorted_new .iter() .filter(|&&&ni| ni != mainline.source) - .filter(|&&&ni| !mainline.ingredients[ni].is_dropped()) - .into_iter() { + .filter(|&&&ni| !mainline.ingredients[ni].is_dropped()) { // Check if the node is a reader, and then insert it in the replica map. The value // is a list of readers that are for the same node, and the key is the node the // readers are for. Later, we will also add the associated ingress node to the replica diff --git a/noria/src/controller.rs b/noria/src/controller.rs index 02281953e..8117bf3b8 100644 --- a/noria/src/controller.rs +++ b/noria/src/controller.rs @@ -225,10 +225,13 @@ impl ControllerHandle { if let Some(port) = self.local_port { g = g.with_local_port(port); } + let g = g.build(&mut self.views)?; + if self.local_port.is_none() { self.local_port = Some(g.local_addr().unwrap().port()); } + Ok(g) }) } diff --git a/noria/src/view.rs b/noria/src/view.rs index 27ca1bcf2..068e44950 100644 --- a/noria/src/view.rs +++ b/noria/src/view.rs @@ -189,8 +189,8 @@ impl View { )] impl View { /// Get the name of the corresponding reader node. - pub fn name(&self) -> String { - self.name.clone() + pub fn name(&self) -> &str { + &self.name } /// Get the list of columns in this view. From f8749fdd3ef5776697b9929e69c2b05a4774890f Mon Sep 17 00:00:00 2001 From: Gina Yuan Date: Sun, 11 Nov 2018 10:28:53 -0500 Subject: [PATCH 31/41] Small string fix --- noria-server/src/controller/inner.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/noria-server/src/controller/inner.rs b/noria-server/src/controller/inner.rs index 23133d6fc..1fa61c826 100644 --- a/noria-server/src/controller/inner.rs +++ b/noria-server/src/controller/inner.rs @@ -737,7 +737,7 @@ impl ControllerInner { }; self.ingredients[node].next_replica().map(|ni| { - let rname = self.ingredients[ni].name(); + let name = self.ingredients[ni].name().to_string(); let domain = self.ingredients[ni].domain(); let columns = self.ingredients[ni].fields().to_vec(); let shards = (0..self.domains[&domain].shards()) @@ -746,13 +746,13 @@ impl ControllerInner { info!( self.log, "creating view builder"; - "name" => rname, + "name" => &name, "node_index" => ni.index(), "replica_index" => self.ingredients[ni].replica_index().unwrap(), ); ViewBuilder { - name: rname.to_string(), + name, local_ports: vec![], node: ni, columns, From 0ad9f622820878d8167ae997c68224afa79ce733 Mon Sep 17 00:00:00 2001 From: Gina Yuan Date: Sun, 11 Nov 2018 20:08:53 -0500 Subject: [PATCH 32/41] Simplify commit(), no logical changes --- noria-server/src/controller/migrate/mod.rs | 76 ++++++++-------------- 1 file changed, 27 insertions(+), 49 deletions(-) diff --git a/noria-server/src/controller/migrate/mod.rs b/noria-server/src/controller/migrate/mod.rs index d5a852e64..134baea54 100644 --- a/noria-server/src/controller/migrate/mod.rs +++ b/noria-server/src/controller/migrate/mod.rs @@ -281,9 +281,17 @@ impl<'a> Migration<'a> { fn assign_local_addresses( mainline: &'a mut ControllerInner, log: &slog::Logger, - domain_new_nodes: &mut HashMap>, + sorted_new: &Vec<&NodeIndex>, swapped: &HashMap<(NodeIndex, NodeIndex), NodeIndex>) { - for (domain, nodes) in &mut domain_new_nodes.iter() { + let domain_new_nodes = sorted_new + .iter() + .map(|&&ni| (mainline.ingredients[ni].domain(), ni)) + .fold(HashMap::new(), |mut dns, (d, ni)| { + dns.entry(d).or_insert_with(Vec::new).push(ni); + dns + }); + + for (domain, nodes) in domain_new_nodes.iter() { // Number of pre-existing nodes let mut nnodes = mainline.remap.get(domain).map(HashMap::len).unwrap_or(0); @@ -419,9 +427,9 @@ impl<'a> Migration<'a> { let mut new: HashSet<_> = self.added.into_iter().collect(); // Readers are nodes too. - for (_parent, readers) in self.readers { + for (_parent, readers) in &self.readers { for reader in readers { - new.insert(reader); + new.insert(*reader); } } @@ -494,19 +502,14 @@ impl<'a> Migration<'a> { let swapped = swapped0; // Assign local addresses to all new nodes, and initialize them. - let mut sorted_new = new.iter().collect::>(); + let mut sorted_new = new + .iter() + .filter(|&&ni| ni != mainline.source) + .filter(|&&ni| !mainline.ingredients[ni].is_dropped()) + .collect::>(); sorted_new.sort(); - let mut domain_new_nodes = sorted_new - .iter() - .filter(|&&&ni| ni != mainline.source) - .filter(|&&&ni| !mainline.ingredients[ni].is_dropped()) - .map(|&&ni| (mainline.ingredients[ni].domain(), ni)) - .fold(HashMap::new(), |mut dns, (d, ni)| { - dns.entry(d).or_insert_with(Vec::new).push(ni); - dns - }); - Self::assign_local_addresses(&mut mainline, &log, &mut domain_new_nodes, &swapped); + Self::assign_local_addresses(&mut mainline, &log, &sorted_new, &swapped); if let Some(shards) = mainline.sharding { sharding::validate(&log, &mainline.ingredients, mainline.source, &new, shards) }; @@ -542,45 +545,20 @@ impl<'a> Migration<'a> { dns }); - // Bucket reader replica nodes and the non-replica nodes separately. - let mut replica_map = HashMap::new(); - let mut other_nodes = HashSet::new(); - for &&ni in sorted_new - .iter() - .filter(|&&&ni| ni != mainline.source) - .filter(|&&&ni| !mainline.ingredients[ni].is_dropped()) { - // Check if the node is a reader, and then insert it in the replica map. The value - // is a list of readers that are for the same node, and the key is the node the - // readers are for. Later, we will also add the associated ingress node to the replica - // map, since readers are all located on their own domains. - if let Some(for_node) = mainline.ingredients[ni] - .with_reader(|r| Some(r.is_for())) - .unwrap_or(None) { - if mainline.ingredients[ni].replica_index().is_some() { - replica_map.entry(for_node).or_insert(HashSet::new()); - replica_map.get_mut(&for_node).unwrap().insert(ni); - } - } else { - other_nodes.insert(ni); - } - } - - // Obtain a vec of DomainIndexes in the order that we want to assign them to workers. - // For replicas, all replicas for the same reader should be consecutive. For non-replicas, - // it doesn't really matter unless we want the ordering to be deterministic. + // Since we're using a round robin iterator, obtain a vec of DomainIndexes in the order + // that we want to assign them to workers. For replicas, all replicas for the same view + // should end up on different workers. For non-replicas, it doesn't really matter. let mut changed_domains_replicas = Vec::new(); - for (_, readers) in &mut replica_map { - let mut domains = readers + for (_, readers) in &self.readers { + changed_domains_replicas.extend( + readers .iter() - .filter(|&ni| !mainline.ingredients[*ni].is_dropped()) .map(|&ni| mainline.ingredients[ni].domain()) - .collect::>(); - changed_domains_replicas.append(&mut domains); + ); } - let changed_domains_other = other_nodes + let changed_domains_other = sorted_new .iter() - .filter(|&ni| !mainline.ingredients[*ni].is_dropped()) - .map(|&ni| mainline.ingredients[ni].domain()) + .map(|&&ni| mainline.ingredients[ni].domain()) .collect::>() .into_iter() .filter(|&domain| !changed_domains_replicas.contains(&domain)) From b2bef11a5cd085051e319db25e3f22dc2ccf7478 Mon Sep 17 00:00:00 2001 From: Gina Yuan Date: Sun, 11 Nov 2018 22:02:06 -0500 Subject: [PATCH 33/41] Name all readers with replica index in the suffix --- noria-server/src/controller/inner.rs | 15 +++++++++++---- noria-server/src/controller/migrate/mod.rs | 6 +----- noria-server/src/integration.rs | 6 +++--- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/noria-server/src/controller/inner.rs b/noria-server/src/controller/inner.rs index 1fa61c826..43d2e6053 100644 --- a/noria-server/src/controller/inner.rs +++ b/noria-server/src/controller/inner.rs @@ -720,10 +720,10 @@ impl ControllerInner { } /// Obtain a `ViewBuilder` that can be sent to a client and then used to query a given - /// (already maintained) reader node. If the view has replicas, a `ViewBuilder` is returned - /// for each view in round robin order. + /// (already maintained) reader node. If the reader has multiple replicas, a `ViewBuilder` + /// is returned for each replica in round robin order. /// - /// The name of the reader node is `name`, or if it is a replica, `name_r[REPLICA_NUMBER]`. + /// `name` is the name of the query. pub fn view_builder(&mut self, name: &str) -> Option { // first try to resolve the node via the recipe, which handles aliasing between identical // queries. @@ -732,7 +732,14 @@ impl ControllerInner { Err(_) => { // if the recipe doesn't know about this query, traverse the graph. // we need this do deal with manually constructed graphs (e.g., in tests). - *self.outputs().get(name)? + if let Some(ni) = self.outputs().get(name) { + *ni + } else { + // depending on how the graph was constructed, the outputs may be suffixed + // with the replica index. + let reader_name = format!("{}_0", name); + *self.outputs().get(&reader_name)? + } } }; diff --git a/noria-server/src/controller/migrate/mod.rs b/noria-server/src/controller/migrate/mod.rs index 134baea54..8139f4bca 100644 --- a/noria-server/src/controller/migrate/mod.rs +++ b/noria-server/src/controller/migrate/mod.rs @@ -220,11 +220,7 @@ impl<'a> Migration<'a> { // Make a reader node let r = node::special::Reader::new(n, replica_i); let r = if let Some(name) = name.clone() { - if i == 0 { - self.mainline.ingredients[n].named_mirror(r, name) - } else { - self.mainline.ingredients[n].named_mirror(r, format!("{}_r{}", name, i)) - } + self.mainline.ingredients[n].named_mirror(r, format!("{}_{}", name, i)) } else { self.mainline.ingredients[n].mirror(r) }; diff --git a/noria-server/src/integration.rs b/noria-server/src/integration.rs index ba8f67bba..a0cd46a68 100644 --- a/noria-server/src/integration.rs +++ b/noria-server/src/integration.rs @@ -2310,9 +2310,9 @@ fn replica_writes() { let mut q3 = g.view("q").unwrap().into_exclusive().unwrap(); // These are actually views to different replicas - assert_eq!(q1.name(), "q_r1"); - assert_eq!(q2.name(), "q_r2"); - assert_eq!(q3.name(), "q"); + assert_eq!(q1.name(), "q_1"); + assert_eq!(q2.name(), "q_2"); + assert_eq!(q3.name(), "q_0"); assert_eq!(q1.lookup(&[0.into()], true).unwrap().len(), 0); assert_eq!(q2.lookup(&[0.into()], true).unwrap().len(), 0); From 94c32257b88312a4cac2064c26eb0b378a49e55d Mon Sep 17 00:00:00 2001 From: Gina Yuan Date: Sun, 11 Nov 2018 23:06:10 -0500 Subject: [PATCH 34/41] Clarify language regarding the reader replication factor --- noria-server/dataflow/src/node/mod.rs | 42 +++++++++---------- .../dataflow/src/node/special/reader.rs | 14 +++---- noria-server/src/controller/builder.rs | 2 +- noria-server/src/controller/inner.rs | 23 +++++----- .../src/controller/migrate/assignment.rs | 4 +- noria-server/src/controller/migrate/mod.rs | 42 ++++++++----------- noria-server/src/integration.rs | 10 ++--- noria-server/src/main.rs | 2 +- noria/src/controller.rs | 3 +- 9 files changed, 66 insertions(+), 76 deletions(-) diff --git a/noria-server/dataflow/src/node/mod.rs b/noria-server/dataflow/src/node/mod.rs index 4ef056687..da5d01447 100644 --- a/noria-server/dataflow/src/node/mod.rs +++ b/noria-server/dataflow/src/node/mod.rs @@ -28,8 +28,8 @@ pub struct Node { taken: bool, sharded_by: Sharding, - replica_index: usize, - replicas: Vec, + next_reader: usize, + readers: Vec, } // constructors @@ -52,8 +52,8 @@ impl Node { taken: false, sharded_by: Sharding::None, - replica_index: 0, - replicas: Vec::new(), + next_reader: 0, + readers: Vec::new(), } } @@ -110,9 +110,9 @@ impl Node { self.sharded_by = s; } - /// Add a reader replica to this node. - pub fn add_replica(&mut self, ni: NodeIndex) { - self.replicas.push(ni) + /// The node being added is a reader for this node. + pub fn add_reader(&mut self, ni: NodeIndex) { + self.readers.push(ni) } pub fn on_commit(&mut self, remap: &HashMap) { @@ -342,32 +342,28 @@ impl Node { // reader replication impl Node { - pub fn has_replicas(&self) -> bool { - !self.replicas.is_empty() + pub fn has_readers(&self) -> bool { + !self.readers.is_empty() } - pub fn num_replicas(&self) -> usize { - self.replicas.len() + pub fn num_readers(&self) -> usize { + self.readers.len() } - pub fn get_replicas(&self) -> &[NodeIndex] { - &self.replicas[..] + pub fn get_readers(&self) -> &[NodeIndex] { + &self.readers[..] } - /// Returns replicas in round robin order each time the method is called. - pub fn next_replica(&mut self) -> Option { - if self.num_replicas() > 0 { - self.replica_index += 1; - self.replica_index %= self.num_replicas(); - Some(*self.replicas.get(self.replica_index).unwrap()) + /// Returns reader replicas in round robin order each time the method is called. + pub fn next_reader(&mut self) -> Option { + if self.num_readers() > 0 { + self.next_reader += 1; + self.next_reader %= self.num_readers(); + Some(*self.readers.get(self.next_reader).unwrap()) } else { None } } - - pub fn replica_index(&self) -> Option { - self.with_reader(|r| r.replica_index()).unwrap_or(None) - } } // is this or that? diff --git a/noria-server/dataflow/src/node/special/reader.rs b/noria-server/dataflow/src/node/special/reader.rs index 42b2748c6..3ebc2b923 100644 --- a/noria-server/dataflow/src/node/special/reader.rs +++ b/noria-server/dataflow/src/node/special/reader.rs @@ -35,7 +35,7 @@ pub struct Reader { streamers: Vec>>, for_node: NodeIndex, - replica_index: Option, + reader_index: usize, state: Option>, } @@ -47,19 +47,19 @@ impl Clone for Reader { streamers: self.streamers.clone(), state: self.state.clone(), for_node: self.for_node, - replica_index: self.replica_index, + reader_index: self.reader_index, } } } impl Reader { - pub fn new(for_node: NodeIndex, replica_index: Option) -> Self { + pub fn new(for_node: NodeIndex, reader_index: usize) -> Self { Reader { writer: None, streamers: Vec::new(), state: None, for_node, - replica_index, + reader_index, } } @@ -69,8 +69,8 @@ impl Reader { self.for_node } - pub fn replica_index(&self) -> Option { - self.replica_index + pub fn reader_index(&self) -> usize { + self.reader_index } #[allow(dead_code)] @@ -89,7 +89,7 @@ impl Reader { streamers: mem::replace(&mut self.streamers, Vec::new()), state: self.state.clone(), for_node: self.for_node, - replica_index: self.replica_index, + reader_index: self.reader_index, } } diff --git a/noria-server/src/controller/builder.rs b/noria-server/src/controller/builder.rs index fa291df51..b621192aa 100644 --- a/noria-server/src/controller/builder.rs +++ b/noria-server/src/controller/builder.rs @@ -57,7 +57,7 @@ impl ControllerBuilder { self.config.sharding = shards; } - /// Set number of reader replicas + /// Set reader replication factor pub fn set_replicas(&mut self, replicas: usize) { self.config.replicas = replicas; } diff --git a/noria-server/src/controller/inner.rs b/noria-server/src/controller/inner.rs index 43d2e6053..b34e8e21a 100644 --- a/noria-server/src/controller/inner.rs +++ b/noria-server/src/controller/inner.rs @@ -720,8 +720,8 @@ impl ControllerInner { } /// Obtain a `ViewBuilder` that can be sent to a client and then used to query a given - /// (already maintained) reader node. If the reader has multiple replicas, a `ViewBuilder` - /// is returned for each replica in round robin order. + /// (already maintained) reader node. If there are multiple readers for any given query, + /// a `ViewBuilder` is returned for each reader in round robin order. /// /// `name` is the name of the query. pub fn view_builder(&mut self, name: &str) -> Option { @@ -736,14 +736,14 @@ impl ControllerInner { *ni } else { // depending on how the graph was constructed, the outputs may be suffixed - // with the replica index. + // with the reader index. let reader_name = format!("{}_0", name); *self.outputs().get(&reader_name)? } } }; - self.ingredients[node].next_replica().map(|ni| { + self.ingredients[node].next_reader().map(|ni| { let name = self.ingredients[ni].name().to_string(); let domain = self.ingredients[ni].domain(); let columns = self.ingredients[ni].fields().to_vec(); @@ -755,7 +755,7 @@ impl ControllerInner { "creating view builder"; "name" => &name, "node_index" => ni.index(), - "replica_index" => self.ingredients[ni].replica_index().unwrap(), + "reader_index" => self.ingredients[ni].with_reader(|r| r.reader_index()).unwrap(), ); ViewBuilder { @@ -1124,10 +1124,11 @@ impl ControllerInner { .count() > 0 { - // This query leaf node has children -- typically, these are readers, but they can - // also be egress nodes or other, dependent queries. If the reader has replicas, we - // are looking for a single egress node that connects the leaf to replicas in other - // domains. + // This query leaf node has children -- egress nodes or other, dependent queries. + // Typically, we are looking for a single egress node that connects the leaf to + // readers in other domains. + // TODO: if each reader (including the 1st) definitely gets its own domain, this + // code can be a lot simpler. let mut has_other_children = false; let readers: Vec<_> = self .ingredients @@ -1172,13 +1173,13 @@ impl ControllerInner { ); } } else { - // The reader had replicas, which will all be removed along with the egress node. + // The remaining readers will all be removed along with the egress node. } } match egress_node { Some(ni) => { - // If there's an egress node, that means the reader replicas are on different + // If there's an egress node, that means the remaining readers are on different // domains. Remove all those leaf nodes as well. let mut bfs = Bfs::new(&self.ingredients, ni); while let Some(child) = bfs.next(&self.ingredients) { diff --git a/noria-server/src/controller/migrate/assignment.rs b/noria-server/src/controller/migrate/assignment.rs index e544dc77d..1ea32b5e9 100644 --- a/noria-server/src/controller/migrate/assignment.rs +++ b/noria-server/src/controller/migrate/assignment.rs @@ -110,8 +110,8 @@ pub fn assign( }; } - // replicas are always in their own domain to distribute the load. - if let Some(_) = n.replica_index() { + // readers are always in their own domain to distribute the load. + if n.is_reader() { return next_domain(); } diff --git a/noria-server/src/controller/migrate/mod.rs b/noria-server/src/controller/migrate/mod.rs index 8139f4bca..9146cf19b 100644 --- a/noria-server/src/controller/migrate/mod.rs +++ b/noria-server/src/controller/migrate/mod.rs @@ -209,16 +209,8 @@ impl<'a> Migration<'a> { if !self.readers.contains_key(&n) { let mut readers = Vec::with_capacity(num_replicas); for i in 0..num_replicas { - let replica_i = { - if num_replicas == 0 { - None - } else { - Some(i) - } - }; - // Make a reader node - let r = node::special::Reader::new(n, replica_i); + let r = node::special::Reader::new(n, i); let r = if let Some(name) = name.clone() { self.mainline.ingredients[n].named_mirror(r, format!("{}_{}", name, i)) } else { @@ -228,12 +220,12 @@ impl<'a> Migration<'a> { // Add it to the graph along with an edge to the node it reads for let r = self.mainline.ingredients.add_node(r); self.mainline.ingredients.add_edge(n, r, ()); - self.mainline.ingredients[n].add_replica(r); + self.mainline.ingredients[n].add_reader(r); info!(self.log, "adding reader node"; "node" => r.index(), "for_node" => n.index(), - "replica_index" => replica_i + "reader_index" => i ); readers.push(r); } @@ -351,11 +343,11 @@ impl<'a> Migration<'a> { } /// Places the domains and their nodes on workers according to the round robin iterator. This - /// is used to ensure, for example, that replicas of the same reader and shards of the same + /// is used to ensure, for example, that readers for the same view and shards of the same /// node end up on different workers. It is also currently used to approximately distribute /// domains across the remaining workers equally. In the future, we'd like to place the domain /// on the machine with the fewest domains already assigned. This does not take into account - /// replicas and shards that were NOT created for their very first time. + /// readers and shards that were NOT created for their very first time. /// /// Domains are placed round robin in the order that they are provided. fn place_round_robin( @@ -542,11 +534,11 @@ impl<'a> Migration<'a> { }); // Since we're using a round robin iterator, obtain a vec of DomainIndexes in the order - // that we want to assign them to workers. For replicas, all replicas for the same view - // should end up on different workers. For non-replicas, it doesn't really matter. - let mut changed_domains_replicas = Vec::new(); + // that we want to assign them to workers. For readers, all readers for the same view + // should end up on different workers. For non-readers, it doesn't really matter. + let mut changed_domains_readers = Vec::new(); for (_, readers) in &self.readers { - changed_domains_replicas.extend( + changed_domains_readers.extend( readers .iter() .map(|&ni| mainline.ingredients[ni].domain()) @@ -557,23 +549,23 @@ impl<'a> Migration<'a> { .map(|&&ni| mainline.ingredients[ni].domain()) .collect::>() .into_iter() - .filter(|&domain| !changed_domains_replicas.contains(&domain)) + .filter(|&domain| !changed_domains_readers.contains(&domain)) .collect::>(); debug!( log, "found changed domains"; - "replica domains" => format!("{:?}", changed_domains_replicas), + "reader domains" => format!("{:?}", changed_domains_readers), "other domains" => format!("{:?}", changed_domains_other), ); - // Check invariants on the (non)-replica data structures. Each changed replica domain - // should be just created. The domain should contain the reader node and its ingress node, + // Check invariants on the changed reader domains. Each changed reader domain should be + // just created. The domain should contain the reader node and its ingress node, // and no other nodes. Also, each new reader node should be in a unique domain. assert_eq!( - changed_domains_replicas.len(), - changed_domains_replicas.iter().collect::>().len() + changed_domains_readers.len(), + changed_domains_readers.iter().collect::>().len() ); - for domain in &changed_domains_replicas { + for domain in &changed_domains_readers { assert!(!mainline.domains.contains_key(&domain)); let nodes: &Vec<_> = uninformed_domain_nodes.get(&domain).unwrap(); assert_eq!(nodes.len(), 2); @@ -593,7 +585,7 @@ impl<'a> Migration<'a> { mainline, &log, &mut uninformed_domain_nodes, - &changed_domains_replicas, + &changed_domains_readers, ); Self::place_round_robin( mainline, diff --git a/noria-server/src/integration.rs b/noria-server/src/integration.rs index a0cd46a68..a1e1ce5be 100644 --- a/noria-server/src/integration.rs +++ b/noria-server/src/integration.rs @@ -2045,7 +2045,7 @@ fn recipe_activates_and_migrates() { g.extend_recipe(r1_txt).unwrap(); // still one base node assert_eq!(g.inputs().unwrap().len(), 1); - // two leaf nodes * number of replicas + // two leaf nodes * replication factor assert_eq!(g.outputs().unwrap().len(), 2 * DEFAULT_REPLICAS); } @@ -2065,7 +2065,7 @@ fn recipe_activates_and_migrates_with_join() { // still two base nodes assert_eq!(g.inputs().unwrap().len(), 2); - // one leaf node * number of replicas + // one leaf node * replication factor assert_eq!(g.outputs().unwrap().len(), 1 * DEFAULT_REPLICAS); } @@ -2294,7 +2294,7 @@ fn remove_query() { } #[test] -fn replica_writes() { +fn reader_replica_writes() { let txt = "CREATE TABLE x (a int);\n QUERY q: SELECT a from x;\n"; @@ -2309,7 +2309,7 @@ fn replica_writes() { let mut q2 = g.view("q").unwrap().into_exclusive().unwrap(); let mut q3 = g.view("q").unwrap().into_exclusive().unwrap(); - // These are actually views to different replicas + // These are actually views to different readers assert_eq!(q1.name(), "q_1"); assert_eq!(q2.name(), "q_2"); assert_eq!(q3.name(), "q_0"); @@ -2323,7 +2323,7 @@ fn replica_writes() { mutx.insert(vec![34.into()]).unwrap(); sleep(); - // Writes are reflected in all replicas + // Writes are reflected in all readers assert_eq!(q1.lookup(&[0.into()], true).unwrap().len(), 3); assert_eq!(q2.lookup(&[0.into()], true).unwrap().len(), 3); assert_eq!(q3.lookup(&[0.into()], true).unwrap().len(), 3); diff --git a/noria-server/src/main.rs b/noria-server/src/main.rs index a385ac542..632194b5d 100644 --- a/noria-server/src/main.rs +++ b/noria-server/src/main.rs @@ -116,7 +116,7 @@ fn main() { .long("replicas") .takes_value(true) .default_value("1") - .help("Each reader has this many replicas (at least 1)."), + .help("The reader replication factor (at least 1)."), ) .arg( Arg::with_name("verbose") diff --git a/noria/src/controller.rs b/noria/src/controller.rs index 8117bf3b8..f203742e4 100644 --- a/noria/src/controller.rs +++ b/noria/src/controller.rs @@ -210,7 +210,8 @@ impl ControllerHandle { } /// Obtain a `View` that allows you to query the given external view. If the view has - /// replicas, returns a new `View` replica in round robin order each time the api is called. + /// multiple reader replicas, returns a new `View` reader replica in round robin order + /// each time the api is called. pub fn view(&mut self, name: &str) -> Result { // This call attempts to detect if this function is being called in a loop. If this is // getting false positives, then it is safe to increase the allowed hit count, however, the From 7a577c2720318be802922bccaa5a84e5dc073725 Mon Sep 17 00:00:00 2001 From: Gina Yuan Date: Tue, 13 Nov 2018 15:01:37 -0500 Subject: [PATCH 35/41] Naming, comments, and Rust things --- noria-server/dataflow/src/node/mod.rs | 2 +- .../dataflow/src/node/special/reader.rs | 1 + noria-server/src/controller/builder.rs | 4 +-- noria-server/src/controller/inner.rs | 4 +-- noria-server/src/controller/migrate/mod.rs | 25 ++++++++++--------- noria-server/src/controller/mod.rs | 6 ++--- noria-server/src/integration.rs | 4 +-- noria-server/src/main.rs | 8 +++--- 8 files changed, 28 insertions(+), 26 deletions(-) diff --git a/noria-server/dataflow/src/node/mod.rs b/noria-server/dataflow/src/node/mod.rs index da5d01447..318fda90b 100644 --- a/noria-server/dataflow/src/node/mod.rs +++ b/noria-server/dataflow/src/node/mod.rs @@ -110,7 +110,7 @@ impl Node { self.sharded_by = s; } - /// The node being added is a reader for this node. + /// The node index `ni` being added is the index of a reader for this node. pub fn add_reader(&mut self, ni: NodeIndex) { self.readers.push(ni) } diff --git a/noria-server/dataflow/src/node/special/reader.rs b/noria-server/dataflow/src/node/special/reader.rs index 3ebc2b923..10d061be6 100644 --- a/noria-server/dataflow/src/node/special/reader.rs +++ b/noria-server/dataflow/src/node/special/reader.rs @@ -35,6 +35,7 @@ pub struct Reader { streamers: Vec>>, for_node: NodeIndex, + /// The index of this Reader in the list of Readers for its ancestor reader_index: usize, state: Option>, } diff --git a/noria-server/src/controller/builder.rs b/noria-server/src/controller/builder.rs index b621192aa..086b211e2 100644 --- a/noria-server/src/controller/builder.rs +++ b/noria-server/src/controller/builder.rs @@ -58,8 +58,8 @@ impl ControllerBuilder { } /// Set reader replication factor - pub fn set_replicas(&mut self, replicas: usize) { - self.config.replicas = replicas; + pub fn set_replication_factor(&mut self, replication_factor: usize) { + self.config.replication_factor = replication_factor; } /// Set how many workers the controller should wait for before starting. More workers can join diff --git a/noria-server/src/controller/inner.rs b/noria-server/src/controller/inner.rs index b34e8e21a..fecd16a2e 100644 --- a/noria-server/src/controller/inner.rs +++ b/noria-server/src/controller/inner.rs @@ -36,7 +36,7 @@ pub struct ControllerInner { pub(super) source: NodeIndex, pub(super) ndomains: usize, pub(super) sharding: Option, - pub(super) replicas: usize, + pub(super) replication_factor: usize, pub(super) domain_config: DomainConfig, @@ -434,7 +434,7 @@ impl ControllerInner { materializations, sharding: state.config.sharding, - replicas: state.config.replicas, + replication_factor: state.config.replication_factor, domain_config: state.config.domain_config, persistence: state.config.persistence, heartbeat_every: state.config.heartbeat_every, diff --git a/noria-server/src/controller/migrate/mod.rs b/noria-server/src/controller/migrate/mod.rs index 9146cf19b..bd07f187c 100644 --- a/noria-server/src/controller/migrate/mod.rs +++ b/noria-server/src/controller/migrate/mod.rs @@ -205,13 +205,13 @@ impl<'a> Migration<'a> { self.mainline.graph() } - fn ensure_reader_for(&mut self, n: NodeIndex, name: Option, num_replicas: usize) { + fn ensure_reader_for(&mut self, n: NodeIndex, name: Option, replication_factor: usize) { if !self.readers.contains_key(&n) { - let mut readers = Vec::with_capacity(num_replicas); - for i in 0..num_replicas { + let mut readers = Vec::with_capacity(replication_factor); + for i in 0..replication_factor { // Make a reader node let r = node::special::Reader::new(n, i); - let r = if let Some(name) = name.clone() { + let r = if let Some(name) = &name { self.mainline.ingredients[n].named_mirror(r, format!("{}_{}", name, i)) } else { self.mainline.ingredients[n].mirror(r) @@ -239,7 +239,7 @@ impl<'a> Migration<'a> { /// To query into the maintained state, use `ControllerInner::get_getter`. #[cfg(test)] pub fn maintain_anonymous(&mut self, n: NodeIndex, key: &[usize]) -> Vec { - self.ensure_reader_for(n, None, self.mainline.replicas); + self.ensure_reader_for(n, None, self.mainline.replication_factor); let ris = &self.readers[&n]; for ri in ris { @@ -255,7 +255,7 @@ impl<'a> Migration<'a> { /// /// To query into the maintained state, use `ControllerInner::get_getter`. pub fn maintain(&mut self, name: String, n: NodeIndex, key: &[usize]) { - self.ensure_reader_for(n, Some(name), self.mainline.replicas); + self.ensure_reader_for(n, Some(name), self.mainline.replication_factor); let ris = &self.readers[&n]; @@ -269,11 +269,11 @@ impl<'a> Migration<'a> { fn assign_local_addresses( mainline: &'a mut ControllerInner, log: &slog::Logger, - sorted_new: &Vec<&NodeIndex>, + sorted_new: &Vec, swapped: &HashMap<(NodeIndex, NodeIndex), NodeIndex>) { let domain_new_nodes = sorted_new .iter() - .map(|&&ni| (mainline.ingredients[ni].domain(), ni)) + .map(|&ni| (mainline.ingredients[ni].domain(), ni)) .fold(HashMap::new(), |mut dns, (d, ni)| { dns.entry(d).or_insert_with(Vec::new).push(ni); dns @@ -490,10 +490,11 @@ impl<'a> Migration<'a> { let swapped = swapped0; // Assign local addresses to all new nodes, and initialize them. - let mut sorted_new = new + let mut sorted_new: Vec = new .iter() - .filter(|&&ni| ni != mainline.source) - .filter(|&&ni| !mainline.ingredients[ni].is_dropped()) + .map(|&ni| ni) + .filter(|&ni| ni != mainline.source) + .filter(|&ni| !mainline.ingredients[ni].is_dropped()) .collect::>(); sorted_new.sort(); @@ -546,7 +547,7 @@ impl<'a> Migration<'a> { } let changed_domains_other = sorted_new .iter() - .map(|&&ni| mainline.ingredients[ni].domain()) + .map(|&ni| mainline.ingredients[ni].domain()) .collect::>() .into_iter() .filter(|&domain| !changed_domains_readers.contains(&domain)) diff --git a/noria-server/src/controller/mod.rs b/noria-server/src/controller/mod.rs index 1c86c5990..11e9a35f4 100644 --- a/noria-server/src/controller/mod.rs +++ b/noria-server/src/controller/mod.rs @@ -104,7 +104,7 @@ where #[derive(Clone, Serialize, Deserialize, PartialEq)] pub(crate) struct ControllerConfig { pub sharding: Option, - pub replicas: usize, + pub replication_factor: usize, pub partial_enabled: bool, pub domain_config: DomainConfig, pub persistence: PersistenceParameters, @@ -122,9 +122,9 @@ impl Default for ControllerConfig { #[cfg(not(test))] sharding: None, #[cfg(test)] - replicas: 3, + replication_factor: 3, #[cfg(not(test))] - replicas: 1, + replication_factor: 1, partial_enabled: true, domain_config: DomainConfig { concurrent_replays: 512, diff --git a/noria-server/src/integration.rs b/noria-server/src/integration.rs index a1e1ce5be..1e8fcefa4 100644 --- a/noria-server/src/integration.rs +++ b/noria-server/src/integration.rs @@ -49,7 +49,7 @@ pub fn build_local_logging(prefix: &str) -> LocalControllerHandle, - replicas: usize, + replication_factor: usize, log: bool, ) -> LocalControllerHandle { use crate::logger_pls; @@ -59,7 +59,7 @@ fn build( } builder.set_sharding(sharding); builder.set_persistence(get_persistence_params(prefix)); - builder.set_replicas(replicas); + builder.set_replication_factor(replication_factor); builder.build_local().unwrap() } diff --git a/noria-server/src/main.rs b/noria-server/src/main.rs index 632194b5d..4e099ba62 100644 --- a/noria-server/src/main.rs +++ b/noria-server/src/main.rs @@ -112,8 +112,8 @@ fn main() { .help("Number of pool threads to use (0 = #cores)"), ) .arg( - Arg::with_name("replicas") - .long("replicas") + Arg::with_name("replication_factor") + .long("replication_factor") .takes_value(true) .default_value("1") .help("The reader replication factor (at least 1)."), @@ -145,7 +145,7 @@ fn main() { 0 => None, x => Some(x), }; - let replicas = value_t_or_exit!(matches, "replicas", usize); + let replication_factor = value_t_or_exit!(matches, "replication_factor", usize); let verbose = matches.is_present("verbose"); let deployment_name = matches.value_of("deployment").unwrap(); @@ -160,7 +160,7 @@ fn main() { if let Some(threads) = threads { builder.set_threads(threads); } - builder.set_replicas(replicas); + builder.set_replication_factor(replication_factor); builder.set_quorum(quorum); if matches.is_present("nopartial") { builder.disable_partial(); From aa7435b05bd72f5d970593a5e39ae37f70cc3a76 Mon Sep 17 00:00:00 2001 From: Gina Yuan Date: Tue, 13 Nov 2018 15:14:37 -0500 Subject: [PATCH 36/41] Store reader index instead of name in view --- noria-server/src/controller/inner.rs | 5 +++-- noria-server/src/integration.rs | 6 +++--- noria/src/view.rs | 18 +++++++++--------- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/noria-server/src/controller/inner.rs b/noria-server/src/controller/inner.rs index fecd16a2e..0bf4dc67b 100644 --- a/noria-server/src/controller/inner.rs +++ b/noria-server/src/controller/inner.rs @@ -750,16 +750,17 @@ impl ControllerInner { let shards = (0..self.domains[&domain].shards()) .map(|i| self.read_addrs[&self.domains[&domain].assignment(i)].clone()) .collect(); + let reader_index = self.ingredients[ni].with_reader(|r| r.reader_index()).unwrap(); info!( self.log, "creating view builder"; "name" => &name, "node_index" => ni.index(), - "reader_index" => self.ingredients[ni].with_reader(|r| r.reader_index()).unwrap(), + "reader_index" => reader_index, ); ViewBuilder { - name, + reader_index, local_ports: vec![], node: ni, columns, diff --git a/noria-server/src/integration.rs b/noria-server/src/integration.rs index 1e8fcefa4..cd0fd0812 100644 --- a/noria-server/src/integration.rs +++ b/noria-server/src/integration.rs @@ -2310,9 +2310,9 @@ fn reader_replica_writes() { let mut q3 = g.view("q").unwrap().into_exclusive().unwrap(); // These are actually views to different readers - assert_eq!(q1.name(), "q_1"); - assert_eq!(q2.name(), "q_2"); - assert_eq!(q3.name(), "q_0"); + assert_eq!(q1.reader_index(), 1); + assert_eq!(q2.reader_index(), 2); + assert_eq!(q3.reader_index(), 0); assert_eq!(q1.lookup(&[0.into()], true).unwrap().len(), 0); assert_eq!(q2.lookup(&[0.into()], true).unwrap().len(), 0); diff --git a/noria/src/view.rs b/noria/src/view.rs index 068e44950..00bddd3be 100644 --- a/noria/src/view.rs +++ b/noria/src/view.rs @@ -59,7 +59,7 @@ pub enum ReadReply { #[doc(hidden)] #[derive(Clone, Debug, Serialize, Deserialize)] pub struct ViewBuilder { - pub name: String, + pub reader_index: usize, pub node: NodeIndex, pub columns: Vec, pub shards: Vec, @@ -77,7 +77,7 @@ impl ViewBuilder { .collect::>>()?; Ok(View { - name: self.name, + reader_index: self.reader_index, node: self.node, columns: self.columns, shard_addrs: self.shards, @@ -126,7 +126,7 @@ impl ViewBuilder { .collect::>>()?; Ok(View { - name: self.name, + reader_index: self.reader_index, node: self.node, columns: self.columns, shard_addrs: self.shards, @@ -143,7 +143,7 @@ impl ViewBuilder { /// get a handle that can be sent to a different thread (i.e., one with its own dedicated /// connections), call `View::into_exclusive`. pub struct View { - name: String, + reader_index: usize, node: NodeIndex, columns: Vec, shards: Vec, @@ -156,7 +156,7 @@ pub struct View { impl Clone for View { fn clone(&self) -> Self { View { - name: self.name.clone(), + reader_index: self.reader_index, node: self.node, columns: self.columns.clone(), shards: self.shards.clone(), @@ -173,7 +173,7 @@ impl View { /// threads. pub fn into_exclusive(self) -> io::Result> { ViewBuilder { - name: self.name, + reader_index: self.reader_index, node: self.node, local_ports: vec![], columns: self.columns, @@ -188,9 +188,9 @@ impl View { allow(clippy::len_without_is_empty) )] impl View { - /// Get the name of the corresponding reader node. - pub fn name(&self) -> &str { - &self.name + /// Get the index of the corresponding Reader in the list of Readers for this view + pub fn reader_index(&self) -> usize { + self.reader_index } /// Get the list of columns in this view. From 232453f9bbd94383af312d432f5a41766f10f725 Mon Sep 17 00:00:00 2001 From: Gina Yuan Date: Tue, 13 Nov 2018 15:56:45 -0500 Subject: [PATCH 37/41] Simplify query node removal with assumption that query node has exactly one egress node as a child --- noria-server/src/controller/inner.rs | 109 ++++++++------------------- 1 file changed, 31 insertions(+), 78 deletions(-) diff --git a/noria-server/src/controller/inner.rs b/noria-server/src/controller/inner.rs index 0bf4dc67b..52cd67c3a 100644 --- a/noria-server/src/controller/inner.rs +++ b/noria-server/src/controller/inner.rs @@ -1117,94 +1117,47 @@ impl ControllerInner { leaf.index() ); + // We're looking for a single egress node that connects the query node to readers in + // other domains. let mut nodes = vec![]; - let mut egress_node = None; - if self - .ingredients - .neighbors_directed(leaf, petgraph::EdgeDirection::Outgoing) - .count() - > 0 - { - // This query leaf node has children -- egress nodes or other, dependent queries. - // Typically, we are looking for a single egress node that connects the leaf to - // readers in other domains. - // TODO: if each reader (including the 1st) definitely gets its own domain, this - // code can be a lot simpler. - let mut has_other_children = false; - let readers: Vec<_> = self + let egress_node = { + let num_children = self .ingredients .neighbors_directed(leaf, petgraph::EdgeDirection::Outgoing) - .filter(|&ni| { - if self.ingredients[ni].is_reader() { - true - } else { - if egress_node.is_some() || !self.ingredients[ni].is_egress() { - has_other_children = true; - } else { - egress_node = Some(ni); - } - false - } - }) - .collect(); - if has_other_children { - // should never happen, since we remove nodes in reverse topological order + .count(); + if num_children == 1 { + self.ingredients + .neighbors_directed(leaf, petgraph::EdgeDirection::Outgoing) + .next() + .unwrap() + } else { + // should not happen, since we remove nodes in reverse topological order crit!( self.log, - "not removing node {} yet, as it still has non-reader-related children", - leaf.index() + "not removing node {} yet, as it has unexpected children or none at all", + leaf.index(); + "num_children" => num_children, ); unreachable!(); } - // nodes can have only one reader attached - assert!(readers.len() <= 1); - debug!( - self.log, - "Removing query leaf \"{}\"", self.ingredients[leaf].name(); - "node" => leaf.index(), - ); - if !readers.is_empty() { - for reader in readers { - removals.push(reader); - nodes.push(reader); - debug!( - self.log, "Removing reader"; - "node" => reader.index(), - "leaf" => leaf.index(), - ); - } - } else { - // The remaining readers will all be removed along with the egress node. - } - } + }; - match egress_node { - Some(ni) => { - // If there's an egress node, that means the remaining readers are on different - // domains. Remove all those leaf nodes as well. - let mut bfs = Bfs::new(&self.ingredients, ni); - while let Some(child) = bfs.next(&self.ingredients) { - if self.ingredients - .neighbors_directed(child, petgraph::EdgeDirection::Outgoing) - .count() == 0 - { - removals.push(child); - nodes.push(child); - } - } - debug!( - self.log, "Removing egress node and its children"; - "node" => ni.index(), - "leaf" => leaf.index(), - ); - }, - None => { - // If the start node didn't have any children, it can be removed immediately - if nodes.is_empty() { - nodes.push(leaf); - } - }, + // Remove the egress node and its children + let mut bfs = Bfs::new(&self.ingredients, egress_node); + while let Some(child) = bfs.next(&self.ingredients) { + if self.ingredients + .neighbors_directed(child, petgraph::EdgeDirection::Outgoing) + .count() == 0 + { + removals.push(child); + nodes.push(child); + } } + debug!( + self.log, "Removing egress node and its children"; + "node" => egress_node.index(), + "leaf" => leaf.index(), + ); // The nodes we remove first do not have children any more for node in &nodes { From 85998f28baf4bac683e76a0bf011fac0893db2fb Mon Sep 17 00:00:00 2001 From: Gina Yuan Date: Tue, 13 Nov 2018 16:17:50 -0500 Subject: [PATCH 38/41] More comments --- noria-server/dataflow/src/node/mod.rs | 3 ++- noria-server/src/controller/inner.rs | 2 +- noria-server/src/controller/migrate/mod.rs | 9 ++++++--- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/noria-server/dataflow/src/node/mod.rs b/noria-server/dataflow/src/node/mod.rs index 318fda90b..fad11e9d8 100644 --- a/noria-server/dataflow/src/node/mod.rs +++ b/noria-server/dataflow/src/node/mod.rs @@ -354,7 +354,8 @@ impl Node { &self.readers[..] } - /// Returns reader replicas in round robin order each time the method is called. + /// Returns reader replicas in round robin order each time the method is called, + /// with the primary reader (reader index = 0) being returned last. pub fn next_reader(&mut self) -> Option { if self.num_readers() > 0 { self.next_reader += 1; diff --git a/noria-server/src/controller/inner.rs b/noria-server/src/controller/inner.rs index 52cd67c3a..c966a297a 100644 --- a/noria-server/src/controller/inner.rs +++ b/noria-server/src/controller/inner.rs @@ -723,7 +723,7 @@ impl ControllerInner { /// (already maintained) reader node. If there are multiple readers for any given query, /// a `ViewBuilder` is returned for each reader in round robin order. /// - /// `name` is the name of the query. + /// `name` is the name of the view the reader is for. pub fn view_builder(&mut self, name: &str) -> Option { // first try to resolve the node via the recipe, which handles aliasing between identical // queries. diff --git a/noria-server/src/controller/migrate/mod.rs b/noria-server/src/controller/migrate/mod.rs index bd07f187c..424b510ec 100644 --- a/noria-server/src/controller/migrate/mod.rs +++ b/noria-server/src/controller/migrate/mod.rs @@ -535,8 +535,9 @@ impl<'a> Migration<'a> { }); // Since we're using a round robin iterator, obtain a vec of DomainIndexes in the order - // that we want to assign them to workers. For readers, all readers for the same view - // should end up on different workers. For non-readers, it doesn't really matter. + // that we want to assign them to workers. For readers, we have to specifically list + // readers for the same node in consecutive order to ensure their respective domains end + // up on different workers. For non-readers, the order doesn't actually matter. let mut changed_domains_readers = Vec::new(); for (_, readers) in &self.readers { changed_domains_readers.extend( @@ -580,7 +581,9 @@ impl<'a> Migration<'a> { ); } - // Boot up new domains (they'll ignore all updates for now) + // Boot up new domains (they'll ignore all updates for now). + // We call `place_round_robin` twice to distinguish the changed domains for which the order + // matters (readers) from the changed domains for which it doesn't, as stated above. debug!(log, "booting new domains"); Self::place_round_robin( mainline, From d7c93f5d59250c502857307857ec382341d4891f Mon Sep 17 00:00:00 2001 From: Gina Yuan Date: Wed, 21 Nov 2018 14:28:08 -0500 Subject: [PATCH 39/41] It is ok to remove leaf nodes with no children --- noria-server/src/controller/inner.rs | 44 ++++++++++++++++------------ 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/noria-server/src/controller/inner.rs b/noria-server/src/controller/inner.rs index c966a297a..658e91e3a 100644 --- a/noria-server/src/controller/inner.rs +++ b/noria-server/src/controller/inner.rs @@ -997,7 +997,7 @@ impl ControllerInner { topo_removals.reverse(); for leaf in topo_removals { - self.remove_query_node(leaf)?; + self.remove_leaf(leaf)?; } // now remove bases @@ -1106,7 +1106,7 @@ impl ControllerInner { graphviz(&self.ingredients, detailed, &self.materializations) } - fn remove_query_node(&mut self, leaf: NodeIndex) -> Result<(), String> { + fn remove_leaf(&mut self, leaf: NodeIndex) -> Result<(), String> { let mut removals = vec![]; let start = leaf; assert!(!self.ingredients[leaf].is_source()); @@ -1125,16 +1125,20 @@ impl ControllerInner { .ingredients .neighbors_directed(leaf, petgraph::EdgeDirection::Outgoing) .count(); - if num_children == 1 { - self.ingredients + if num_children == 0 { + None + } else if num_children == 1 { + let child = self.ingredients .neighbors_directed(leaf, petgraph::EdgeDirection::Outgoing) .next() - .unwrap() + .unwrap(); + assert!(self.ingredients[child].is_egress()); + Some(child) } else { // should not happen, since we remove nodes in reverse topological order crit!( self.log, - "not removing node {} yet, as it has unexpected children or none at all", + "not removing node {} yet, as it still has non-reader-related children", leaf.index(); "num_children" => num_children, ); @@ -1143,21 +1147,23 @@ impl ControllerInner { }; // Remove the egress node and its children - let mut bfs = Bfs::new(&self.ingredients, egress_node); - while let Some(child) = bfs.next(&self.ingredients) { - if self.ingredients - .neighbors_directed(child, petgraph::EdgeDirection::Outgoing) - .count() == 0 - { - removals.push(child); - nodes.push(child); + if let Some(egress_node) = egress_node { + let mut bfs = Bfs::new(&self.ingredients, egress_node); + while let Some(child) = bfs.next(&self.ingredients) { + if self.ingredients + .neighbors_directed(child, petgraph::EdgeDirection::Outgoing) + .count() == 0 + { + removals.push(child); + nodes.push(child); + } } + debug!( + self.log, "Removing egress node and its children"; + "node" => egress_node.index(), + "leaf" => leaf.index(), + ); } - debug!( - self.log, "Removing egress node and its children"; - "node" => egress_node.index(), - "leaf" => leaf.index(), - ); // The nodes we remove first do not have children any more for node in &nodes { From 8b0fba5823e995971e9325c78b20200ba083936c Mon Sep 17 00:00:00 2001 From: Gina Yuan Date: Thu, 22 Nov 2018 15:28:02 -0500 Subject: [PATCH 40/41] Fix localsoup --- noria-benchmarks/vote/clients/localsoup/graph.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/noria-benchmarks/vote/clients/localsoup/graph.rs b/noria-benchmarks/vote/clients/localsoup/graph.rs index 10cef0256..38f8256e2 100644 --- a/noria-benchmarks/vote/clients/localsoup/graph.rs +++ b/noria-benchmarks/vote/clients/localsoup/graph.rs @@ -71,7 +71,7 @@ impl Setup { Graph { vote: inputs["Vote"], article: inputs["Article"], - end: outputs["ArticleWithVoteCount"], + end: outputs["ArticleWithVoteCount_0"], stupid: self.stupid, graph, } From 5cb51d26906ef17bddcf90b621625cfb92beea83 Mon Sep 17 00:00:00 2001 From: Gina Yuan Date: Tue, 8 Jan 2019 15:31:40 -0500 Subject: [PATCH 41/41] Stylistic changes for remove_leaf --- noria-server/src/controller/inner.rs | 57 ++++++++++------------------ 1 file changed, 20 insertions(+), 37 deletions(-) diff --git a/noria-server/src/controller/inner.rs b/noria-server/src/controller/inner.rs index 8a1335a39..f2c8cb4ee 100644 --- a/noria-server/src/controller/inner.rs +++ b/noria-server/src/controller/inner.rs @@ -1145,35 +1145,19 @@ impl ControllerInner { // We're looking for a single egress node that connects the query node to readers in // other domains. let mut nodes = vec![]; - let egress_node = { - let num_children = self - .ingredients + let num_children = self + .ingredients + .neighbors_directed(leaf, petgraph::EdgeDirection::Outgoing) + .count(); + if num_children == 1 { + let child = self.ingredients .neighbors_directed(leaf, petgraph::EdgeDirection::Outgoing) - .count(); - if num_children == 0 { - None - } else if num_children == 1 { - let child = self.ingredients - .neighbors_directed(leaf, petgraph::EdgeDirection::Outgoing) - .next() - .unwrap(); - assert!(self.ingredients[child].is_egress()); - Some(child) - } else { - // should not happen, since we remove nodes in reverse topological order - crit!( - self.log, - "not removing node {} yet, as it still has non-reader-related children", - leaf.index(); - "num_children" => num_children, - ); - unreachable!(); - } - }; + .next() + .unwrap(); + assert!(self.ingredients[child].is_egress()); - // Remove the egress node and its children - if let Some(egress_node) = egress_node { - let mut bfs = Bfs::new(&self.ingredients, egress_node); + // Remove the egress node and its children + let mut bfs = Bfs::new(&self.ingredients, child); while let Some(child) = bfs.next(&self.ingredients) { if self.ingredients .neighbors_directed(child, petgraph::EdgeDirection::Outgoing) @@ -1185,19 +1169,18 @@ impl ControllerInner { } debug!( self.log, "Removing egress node and its children"; - "node" => egress_node.index(), + "node" => child.index(), "leaf" => leaf.index(), ); - } - - // The nodes we remove first do not have children any more - for node in &nodes { - assert_eq!( - self.ingredients - .neighbors_directed(*node, petgraph::EdgeDirection::Outgoing) - .count(), - 0 + } else if num_children > 1 { + // should not happen, since we remove nodes in reverse topological order + crit!( + self.log, + "not removing node {} yet, as it still has non-reader-related children", + leaf.index(); + "num_children" => num_children, ); + unreachable!(); } while let Some(node) = nodes.pop() {