diff --git a/examples/mnist.d b/examples/mnist.d
index 4b9ae3d..9065537 100644
--- a/examples/mnist.d
+++ b/examples/mnist.d
@@ -24,7 +24,7 @@ auto load_data()
     import std.typecons;
     if(!exists(data_dir))
     {
-        auto root_url = "http://yann.lecun.com/exdb/mnist/"; 
+        auto root_url = "http://yann.lecun.com/exdb/mnist/";
         mkdir(data_dir);
         import std.net.curl;
         import std.process;
diff --git a/examples/rcv1.d b/examples/rcv1.d
index f1f5427..d87ac62 100644
--- a/examples/rcv1.d
+++ b/examples/rcv1.d
@@ -39,7 +39,7 @@ auto load_data()
     {
         auto root_url = "http://ae.nflximg.net/vectorflow/";
         auto url_data = root_url ~ "lyrl2004_vectors_";
-        auto url_topics = root_url ~ "rcv1v2.topics.qrels.gz"; 
+        auto url_topics = root_url ~ "rcv1v2.topics.qrels.gz";
         mkdir(data_dir);
         import std.net.curl;
         import std.process;
@@ -135,7 +135,7 @@ class RCV1Reader : DataFileReader!(Obs) {
             auto feat_val = to_float(t[feat_id_end+1..$]);
             features_buff[cnt++] = SparseF(feat_id, feat_val);
         }
-        
+
         _obs.features = features_buff[0..cnt];
         return true;
     }
diff --git a/src/vectorflow/dataset.d b/src/vectorflow/dataset.d
index bcada86..b23d49e 100644
--- a/src/vectorflow/dataset.d
+++ b/src/vectorflow/dataset.d
@@ -1,6 +1,6 @@
 /**
  This module provides utility classes to iterate over data.
- 
+
  It is not mandatory to use them when using vectorflow, but you might find them
  useful and slightly more intuitive to use than the built-in range mechanism if
  you're a beginner with D.
@@ -12,7 +12,7 @@
  std.range.evenChunks, which might or might not work depending on your
  specific reader. To explicitly shard the data, just specify an `evenChunks`
  function in your reader implementation (see MultiFilesReader for an example).
- 
+
  Copyright: 2017 Netflix, Inc.
  License: $(LINK2 http://www.apache.org/licenses/LICENSE-2.0, Apache License Version 2.0)
  */
@@ -110,7 +110,7 @@ class MultiFilesReader(T)
                     result = dg(obs);
                     if(result)
                         break;
-                }                
+                }
             }
         }
         rewind();
diff --git a/src/vectorflow/layers.d b/src/vectorflow/layers.d
index 9291e9b..c00d486 100644
--- a/src/vectorflow/layers.d
+++ b/src/vectorflow/layers.d
@@ -90,7 +90,7 @@ class Linear : NeuralLayer {
                     case LayerT.DENSE:
                         dp += dotProd(row[offset..offset+l.dim_out], l.out_d);
                         break;
-                    
+
                     case LayerT.SPARSE:
                         foreach(ref f; l.out_s)
                             dp += row[offset + f.id] * f.val;
@@ -107,7 +107,7 @@ class Linear : NeuralLayer {
         // gradient
         foreach(k; 0..dim_out)
         {
-            auto row = grad[k]; 
+            auto row = grad[k];
             float g = ext_grad[k];
             _accumulate_grad_row(row, g, k);
         }
@@ -147,7 +147,7 @@ class Linear : NeuralLayer {
             foreach(ref SparseF fg; ext_grad)
                 axpy(fg.val, W[fg.id][offset..offset+b.length], b);
             offset += b.length;
-        }   
+        }
     }
 
     final protected void _accumulate_grad_row(float[] row, float g, ulong index)
@@ -166,7 +166,7 @@ class Linear : NeuralLayer {
             }
             offset += l.dim_out;
         }
-        row[0] += _with_intercept * g;        
+        row[0] += _with_intercept * g;
     }
 
     override void serialize(Serializer s)
@@ -193,7 +193,7 @@ class Linear : NeuralLayer {
             cp.priors ~= p.dup;
         if(prox !is null)
             cp.prox = prox.dup;
-              
+
         return cp;
     }
 
@@ -262,7 +262,7 @@ class DropOut : NeuralLayer {
         auto all_sparse = parents.all!(x => x.type == LayerT.SPARSE);
         if(!all_dense && !all_sparse)
             throw new Exception(
-                "DropOut layer parents have all to be of the same kind " ~ 
+                "DropOut layer parents have all to be of the same kind " ~
                 "(sparse or dense outputs).");
         if(all_dense)
         {
@@ -309,7 +309,7 @@ class DropOut : NeuralLayer {
             offset += p.dim_out;
         }
     }
-    
+
     void _predict_train_sparse()
     {
         // @TODO: this is currently very slow because of allocations
@@ -709,7 +709,7 @@ class SparseKernelExpander : InputLayer
     {
         super(dim_out, LayerT.SPARSE);
         _learnable = false;
-        
+
         if(max_group_id > ushort.max)
             throw new Exception(
                 "Doesn't support group ids bigger than %d".format(ushort.max));
@@ -925,7 +925,7 @@ class DenseData : Data!(LayerT.DENSE)
     {
         super(dim_out);
     }
-    mixin opCallNew; 
+    mixin opCallNew;
 }
 
 /**
diff --git a/src/vectorflow/losses.d b/src/vectorflow/losses.d
index b6b667f..a32dae5 100644
--- a/src/vectorflow/losses.d
+++ b/src/vectorflow/losses.d
@@ -1,7 +1,7 @@
 /**
  The library supports already implemented loss functions, as well as
  a callback-based way to specify a custom loss.
- 
+
  The losses pre-implemented are: `logistic`, `square`, `multinomial`.
 
  For these losses, if an attribute `.weight` is found in the row, it will
@@ -15,7 +15,7 @@
  `S` is `void` or numeric (float, double, int...).
     If numeric, the callback is expected to return the loss value on
     training sample `ex` for monitoring purposes.
-    
+
  `R` is `float[]` or `NeuralNet`. If `float[]`, the net is expected to have
     a single leaf and the callback receives the predictions of the leaf after
     forward-prop. If `NeuralNet`, the callback receives a reference of the net
@@ -40,7 +40,7 @@
         grads[0] = 1.0f;
     else
         grads[0] = -1.0f;
-    
+
     return fabs(pred - o.label); // return loss value so it's monitored
     // during training
  }
diff --git a/src/vectorflow/monitor.d b/src/vectorflow/monitor.d
index dc38531..684a019 100644
--- a/src/vectorflow/monitor.d
+++ b/src/vectorflow/monitor.d
@@ -70,7 +70,7 @@ class SGDMonitor {
         _buff_stdout_line = new char[240];
 
         if(with_loss_)
-            _pattern = 
+            _pattern =
             "Progress: %s | Elapsed: %s | Remaining: %s | %04d passes " ~
             "| Loss: %.4e | %.2e obs/sec | %.2e features/sec";
         else
diff --git a/src/vectorflow/neurallayer.d b/src/vectorflow/neurallayer.d
index e7eab5c..a18ac27 100644
--- a/src/vectorflow/neurallayer.d
+++ b/src/vectorflow/neurallayer.d
@@ -86,7 +86,7 @@ abstract class NeuralLayer {
                 backgrads ~= new float[p.dim_out];
             else
                 backgrads ~= new float[0];
-        }        
+        }
     }
     void allocate_params(){}
     void allocate_grad_params(){}
@@ -95,7 +95,7 @@ abstract class NeuralLayer {
     final @property optimizer_set(){return optimizer !is null;}
 
     this(){}
-    
+
     this(ulong dim_out_, LayerT type_)
     {
         dim_out = dim_out_.to!size_t;
@@ -174,7 +174,7 @@ abstract class NeuralLayer {
         dim_in = s.read!ulong().to!size_t;
         dim_out = s.read!ulong().to!size_t;
         type = s.read!string().to!LayerT;
-        
+
         deserialize(s);
     }
 
@@ -239,7 +239,7 @@ abstract class InputLayer : NeuralLayer
     }
 
     abstract override void predict();
-    
+
     override void accumulate_grad(V)(V[] grad) pure
         if ((is(V == float) || is(V == SparseF))) {}
 
diff --git a/src/vectorflow/neuralnet.d b/src/vectorflow/neuralnet.d
index 41e5d1e..80e31e1 100644
--- a/src/vectorflow/neuralnet.d
+++ b/src/vectorflow/neuralnet.d
@@ -137,7 +137,7 @@ class NeuralNet {
             if(layer.name == "")
                 layer.name = generate_name();
             if(layer.name in layers_map)
-                throw new Exception("A layer with the name `" ~ 
+                throw new Exception("A layer with the name `" ~
                         layer.name ~ "` already exist.");
             layers_map[layer.name] = layer;
             layers ~= layer;
@@ -200,7 +200,7 @@ class NeuralNet {
     *        which have the types expected by the roots in proper order
     *        (i.e: float[], SparseF[], SparseFG[], custom roots types...)
     *
-    * Returns: array of last layer neurons values 
+    * Returns: array of last layer neurons values
     *
     * Example:
     * ---
@@ -278,7 +278,7 @@ class NeuralNet {
     *    child = destination layer
     *    with_alloc = whether or not both layers should allocate internal
     *                 parameters
-    */    
+    */
     void wire(NeuralLayer parent, NeuralLayer child, bool with_alloc = true)
     {
         check_layer_here(parent.name);
@@ -316,7 +316,7 @@ class NeuralNet {
             else
                 leaves = leaves[0..ind_leaf] ~ leaves[ind_leaf+1..$];
         }
-        
+
         optimize_graph(this);
     }
 
@@ -335,7 +335,7 @@ class NeuralNet {
     *
     * Params:
     *    rand_scale = parameters values drawn in ]-rand_scale, rand_scale[
-    */ 
+    */
     void initialize(double rand_scale)
     {
         _ever_initialized = true;
diff --git a/src/vectorflow/optimizers.d b/src/vectorflow/optimizers.d
index 929ca9d..ea611b3 100644
--- a/src/vectorflow/optimizers.d
+++ b/src/vectorflow/optimizers.d
@@ -88,7 +88,7 @@ class SGDOptimizer : Optimizer {
 
         auto monitor = new SGDMonitor(verbose, num_epochs, num_cores,
                 start_time, isNumeric!S);
-        
+
         void _learn(U)(NeuralNet net, U d, ulong n_passes, uint core_id)
         {
             foreach(l; net.layers)
@@ -225,7 +225,7 @@ class SGDOptimizer : Optimizer {
 /**
  AdaGrad stochastic optimizer.
 
- See $(LINK2 http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf, 
+ See $(LINK2 http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf,
  Adaptive Subgradient Methods for Online Learning and Stochastic Optimization)
 Examples:
 -----------------
@@ -247,7 +247,7 @@ class AdaGrad : SGDOptimizer {
 
     // local variables
     float eps;
-    float[][] acc_grad; 
+    float[][] acc_grad;
 
     void delegate(NeuralLayer, float[]) _update;
 
@@ -483,7 +483,7 @@ class ADAM : SGDOptimizer {
         {
             foreach(p; priors)
                 p.accumulate_grad();
-            update_matrix();            
+            update_matrix();
             if(prox !is null)
                 prox.proxy_step();
             beta1 *= beta1_0;
diff --git a/src/vectorflow/regularizers.d b/src/vectorflow/regularizers.d
index f212073..f51c949 100644
--- a/src/vectorflow/regularizers.d
+++ b/src/vectorflow/regularizers.d
@@ -21,7 +21,7 @@ import vectorflow.utils;
 abstract class LinearPrior
 {
     /// pointer to the layer weight matrix
-    float[][] W; 
+    float[][] W;
     /// pointer to the layer gradient matrix
     float[][] grad;
     /// pointer to the optimizer for this layer
@@ -32,7 +32,7 @@ abstract class LinearPrior
         W = layer.W;
         grad = layer.grad;
         if(layer.optimizer is null)
-           return; 
+           return;
         if(auto o = cast(SGDOptimizer)(layer.optimizer))
             opt = o;
         else
@@ -156,7 +156,7 @@ class L2Prior : AdditiveLinearPrior
             foreach(k; 0..W.length)
                 l2op_scal(_lambda, W[k], W_prior[k], grad[k], _ind_start);
         }
-        
+
         void _acc_grad_vec()
         {
             foreach(k; 0..W.length)
diff --git a/src/vectorflow/serde.d b/src/vectorflow/serde.d
index 6ba1340..07f1ed5 100644
--- a/src/vectorflow/serde.d
+++ b/src/vectorflow/serde.d
@@ -94,7 +94,7 @@ class Serializer {
         {
             string layer_type;
             try{ layer_type = read!string(); }
-            catch(EOFException e){ break; } 
+            catch(EOFException e){ break; }
 
             auto l = cast(NeuralLayer)Object.factory(layer_type);
             l.deser(this);
diff --git a/src/vectorflow/utils.d b/src/vectorflow/utils.d
index df43f83..464de31 100644
--- a/src/vectorflow/utils.d
+++ b/src/vectorflow/utils.d
@@ -53,7 +53,7 @@ long to_long(T)(in T str) pure
 
 /**
 * Fast but unsafe function to parse a string into a float.
-* 
+*
 * If you trust your input, this is much faster than to!float.
 * Doesn't handle Inf numbers nor Nan, and doesn't throw exceptions.
 * Adapted from Phobos std.conv source code. See NOTICE for licence details.
@@ -223,9 +223,9 @@ final class Hasher {
         k1 *= c1;
         k1 = _rotl32(k1,15);
         k1 *= c2;
-        
+
         h1 ^= k1;
-        h1 = _rotl32(h1,13); 
+        h1 = _rotl32(h1,13);
         h1 = h1*5+0xe6546b64;
       }
 
diff --git a/test/backprop.d b/test/backprop.d
index 39ed829..81bf118 100644
--- a/test/backprop.d
+++ b/test/backprop.d
@@ -32,12 +32,12 @@ unittest {
     foreach(i; 0..l2.W.length)
         foreach(j; 0..l2.W[i].length)
             l2.W[i][j] = 2.0;
-    
+
 
     nn.predict([1.0f, 1.0f]);
     double tanh2 = (exp(4.0) - 1)/(exp(4.0) + 1);
     assert(fequal(nn.output[0], 40 * tanh2, 1e-5));
-    
+
     nn.backward_prop([1.0f]);
     assert(fequal(l2.grad[0][0], tanh2, 1e-5));
 
diff --git a/test/dataset.d b/test/dataset.d
index be13f1c..b23e1ec 100644
--- a/test/dataset.d
+++ b/test/dataset.d
@@ -32,7 +32,7 @@ unittest {
         }
 
         override void rewind(){_cnt = 0;}
-        
+
         override @property D save()
         {
             auto cp = new D();
@@ -56,7 +56,7 @@ unittest {
     assert(sum == 6);
 
     d.cache();
-    
+
     sum = 0;
     foreach(v; d)
         sum += v;
diff --git a/test/layers.d b/test/layers.d
index 7b1534e..5c6730e 100644
--- a/test/layers.d
+++ b/test/layers.d
@@ -126,7 +126,7 @@ unittest {
     assert(l.out_s.length == 4);
     assert(l.out_s[$-1].id == (123 ^ 456 ^ 789));
     assert(fequal(l.out_s[$-1].val, 1.3 * (-2.7) * 0.2));
-    
+
     // bag cf order 2: 3 hashes for bag 1
     nn.predict([SparseFG(123, 1.3f, 1), SparseFG(456, -2.7f, 2), SparseFG(789, 0.4f, 1), SparseFG(333, -9.1f, 1)]);
     assert(l.out_s.length == 7);
@@ -161,7 +161,7 @@ unittest {
         .stack(DenseData(3))
         .stack(l);
     nn.initialize(0.0);
-    
+
     // forward prop test
     auto x = [1.3f, -2.7f, 0.1f];
     auto preds = nn.predict(x);
diff --git a/test/learning.d b/test/learning.d
index 1893430..09ea447 100644
--- a/test/learning.d
+++ b/test/learning.d
@@ -42,7 +42,7 @@ unittest {
     auto dataS = fake_data_sparse(30, 83);
 
     foreach(net; nets(false))
-    { 
+    {
         auto calls = [
             delegate void() {net().learn(data, "logistic", AdaGrad(3, 0.1, 1));},
             delegate void() {net().learn(data, "logistic", AdaGrad(3, 0.1, 1), false, 2);},
@@ -54,7 +54,7 @@ unittest {
     }
 
     foreach(net; nets(true))
-    { 
+    {
         auto calls = [
             delegate void() {net().learn(dataS, "logistic", AdaGrad(3, 0.1, 25));},
             delegate void() {net().learn(dataS, "logistic", AdaGrad(2, 0.1, 1), false, 2);},
@@ -190,7 +190,7 @@ unittest{
 
 /// Multi roots learning
 unittest{
-    
+
     struct O1 {
         float label;
         float[] features;