diff --git a/SpatialDivisiveNormalization.lua b/SpatialDivisiveNormalization.lua
index 92dfac7e2..8395e231b 100644
--- a/SpatialDivisiveNormalization.lua
+++ b/SpatialDivisiveNormalization.lua
@@ -34,7 +34,7 @@ function SpatialDivisiveNormalization:__init(nInputPlane, kernel, threshold, thr
       self.meanestimator:add(nn.SpatialConvolutionMap(nn.tables.oneToOne(self.nInputPlane), self.kernel:size(1), 1))
       self.meanestimator:add(nn.SpatialConvolution(self.nInputPlane, 1, 1, self.kernel:size(1)))
    end
-   self.meanestimator:add(nn.Replicate(self.nInputPlane))
+   self.meanestimator:add(nn.Replicate(self.nInputPlane,1,3))
 
    -- create convolutional std estimator
    self.stdestimator = nn.Sequential()
@@ -46,7 +46,7 @@ function SpatialDivisiveNormalization:__init(nInputPlane, kernel, threshold, thr
       self.stdestimator:add(nn.SpatialConvolutionMap(nn.tables.oneToOne(self.nInputPlane), self.kernel:size(1), 1))
       self.stdestimator:add(nn.SpatialConvolution(self.nInputPlane, 1, 1, self.kernel:size(1)))
    end
-   self.stdestimator:add(nn.Replicate(self.nInputPlane))
+   self.stdestimator:add(nn.Replicate(self.nInputPlane,1,3))
    self.stdestimator:add(nn.Sqrt())
 
    -- set kernel and bias
@@ -82,15 +82,28 @@ function SpatialDivisiveNormalization:__init(nInputPlane, kernel, threshold, thr
 end
 
 function SpatialDivisiveNormalization:updateOutput(input)
+   
+   self.localstds = self.stdestimator:updateOutput(input)
+
    -- compute side coefficients
-   if (input:size(3) ~= self.coef:size(3)) or (input:size(2) ~= self.coef:size(2)) then
-      local ones = input.new():resizeAs(input):fill(1)
-      self.coef = self.meanestimator:updateOutput(ones)
-      self.coef = self.coef:clone()
+   local dim = input:dim()
+   if self.localstds:dim() ~= self.coef:dim() or (input:size(dim) ~= self.coef:size(dim)) or (input:size(dim-1) ~= self.coef:size(dim-1)) then
+      self.ones = self.ones or input.new()
+      if dim == 4 then
+         -- batch mode
+         self.ones:resizeAs(input[1]):fill(1)
+         local coef = self.meanestimator:updateOutput(self.ones)
+         self._coef = self._coef or input.new()
+         self._coef:resizeAs(coef):copy(coef) -- make contiguous for view
+         self.coef = self._coef:view(1,table.unpack(self._coef:size():totable())):expandAs(self.localstds)
+      else
+         self.ones:resizeAs(input):fill(1)
+         self.coef = self.meanestimator:updateOutput(self.ones)
+      end
+      
    end
 
    -- normalize std dev
-   self.localstds = self.stdestimator:updateOutput(input)
    self.adjustedstds = self.divider:updateOutput{self.localstds, self.coef}
    self.thresholdedstds = self.thresholder:updateOutput(self.adjustedstds)
    self.output = self.normalizer:updateOutput{input, self.thresholdedstds}
diff --git a/SpatialSubtractiveNormalization.lua b/SpatialSubtractiveNormalization.lua
index 51bf23a4f..84d943ae9 100644
--- a/SpatialSubtractiveNormalization.lua
+++ b/SpatialSubtractiveNormalization.lua
@@ -35,7 +35,7 @@ function SpatialSubtractiveNormalization:__init(nInputPlane, kernel)
       self.meanestimator:add(nn.SpatialConvolutionMap(nn.tables.oneToOne(self.nInputPlane), self.kernel:size(1), 1))
       self.meanestimator:add(nn.SpatialConvolution(self.nInputPlane, 1, 1, self.kernel:size(1)))
    end
-   self.meanestimator:add(nn.Replicate(self.nInputPlane))
+   self.meanestimator:add(nn.Replicate(self.nInputPlane,1,3))
 
    -- set kernel and bias
    if kdim == 2 then
@@ -60,12 +60,27 @@ function SpatialSubtractiveNormalization:__init(nInputPlane, kernel)
    self.coef = torch.Tensor(1,1,1)
 end
 
-function SpatialSubtractiveNormalization:updateOutput(input)
+function SpatialSubtractiveNormalization:updateOutput(input)   
    -- compute side coefficients
-   if (input:size(3) ~= self.coef:size(3)) or (input:size(2) ~= self.coef:size(2)) then
-      local ones = input.new():resizeAs(input):fill(1)
-      self.coef = self.meanestimator:updateOutput(ones)
-      self.coef = self.coef:clone()
+   local dim = input:dim()
+   if input:dim()+1 ~= self.coef:dim() or (input:size(dim) ~= self.coef:size(dim)) or (input:size(dim-1) ~= self.coef:size(dim-1)) then
+      self.ones = self.ones or input.new()
+      self._coef = self._coef or self.coef.new()
+      if dim == 4 then
+         -- batch mode
+         self.ones:resizeAs(input[1]):fill(1)
+         local coef = self.meanestimator:updateOutput(self.ones)
+         self._coef:resizeAs(coef):copy(coef) -- make contiguous for view
+         local size = coef:size():totable()
+         table.insert(size,1,input:size(1))
+         self.coef = self._coef:view(1,table.unpack(self._coef:size():totable())):expand(table.unpack(size))
+      else
+         self.ones:resizeAs(input):fill(1)
+         local coef = self.meanestimator:updateOutput(self.ones)
+         self._coef:resizeAs(coef):copy(coef) -- copy meanestimator.output as it will be used below
+         self.coef = self._coef
+      end
+      
    end
 
    -- compute mean
@@ -84,6 +99,7 @@ function SpatialSubtractiveNormalization:updateGradInput(input, gradOutput)
    -- backprop through all modules
    local gradsub = self.subtractor:updateGradInput({input, self.adjustedsums}, gradOutput)
    local graddiv = self.divider:updateGradInput({self.localsums, self.coef}, gradsub[2])
+   local size = self.meanestimator:updateGradInput(input, graddiv[1]):size()
    self.gradInput:add(self.meanestimator:updateGradInput(input, graddiv[1]))
    self.gradInput:add(gradsub[1])
 
diff --git a/doc/simple.md b/doc/simple.md
index 7c97a6b8c..4eac36d38 100755
--- a/doc/simple.md
+++ b/doc/simple.md
@@ -511,10 +511,14 @@ Narrow is application of [narrow](https://github.com/torch/torch7/blob/master/do
 ## Replicate ##
 
 ```lua
-module = nn.Replicate(nFeature, dim)
+module = nn.Replicate(nFeature [, dim, ndim])
 ```
 
-This class creates an output where the input is replicated `nFeature` times along dimension `dim` (default 1). There is no memory allocation or memory copy in this module. It sets the [stride](https://github.com/torch/torch7/blob/master/doc/tensor.md#torch.Tensor.stride) along the `dim`th dimension to zero.
+This class creates an output where the input is replicated `nFeature` times along dimension `dim` (default 1). 
+There is no memory allocation or memory copy in this module. 
+It sets the [stride](https://github.com/torch/torch7/blob/master/doc/tensor.md#torch.Tensor.stride) along the `dim`th dimension to zero.
+When provided, `ndim` should specify the number of non-batch dimensions.
+This allows the module to replicate the same non-batch dimension `dim` for both batch and non-batch `inputs`.
 
 ```lua
 > x = torch.linspace(1, 5, 5)
diff --git a/test.lua b/test.lua
index 5f408995b..e69d3c8ea 100644
--- a/test.lua
+++ b/test.lua
@@ -1153,7 +1153,7 @@ function nntest.SpatialSubtractiveNormalization_2dkernel()
    local nbfeatures = math.random(3,5)
    local kernel = torch.Tensor(kersize,kersize):fill(1)
    local module = nn.SpatialSubtractiveNormalization(nbfeatures,kernel)
-   local input = torch.rand(nbfeatures,inputSize,inputSize)
+   local input = torch.rand(nbfeatures,inputSize,inputSize/2)
 
    local err = jac.testJacobian(module,input)
    mytester:assertlt(err,precision, 'error on state ')
@@ -1161,6 +1161,30 @@ function nntest.SpatialSubtractiveNormalization_2dkernel()
    local ferr,berr = jac.testIO(module,input)
    mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
    mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+   
+    -- test batch mode
+   local output = module:forward(input):clone()
+   local gradOutput = output:clone():uniform(0,1)
+   local gradInput = module:backward(input, gradOutput):clone()
+   local batchSize = 4
+   local input2 = torch.rand(batchSize,nbfeatures,inputSize,inputSize/2)
+   input2[2]:copy(input)
+   
+   local output2 = module:forward(input2)
+   local gradOutput2 = output2:clone():uniform(0,1)
+   gradOutput2[2]:copy(gradOutput)
+   local gradInput2 = module:backward(input2, gradOutput2)
+   
+   mytester:assertTensorEq(output2[2], output, 0.000001, "SpatialSubstractiveNormalization 2d forward batch err")
+   mytester:assertTensorEq(gradOutput2[2], gradOutput, 0.000001, "SpatialSubstractiveNormalization 2d backward batch err")
+   
+   local err = jac.testJacobian(module,input2)
+   mytester:assertlt(err,precision, 'error on state ')
+
+   local ferr,berr = jac.testIO(module,input2)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+   
 end
 
 function nntest.SpatialSubtractiveNormalization_1dkernel()
@@ -1169,7 +1193,7 @@ function nntest.SpatialSubtractiveNormalization_1dkernel()
    local nbfeatures = math.random(3,5)
    local kernel = torch.Tensor(kersize):fill(1)
    local module = nn.SpatialSubtractiveNormalization(nbfeatures,kernel)
-   local input = torch.rand(nbfeatures,inputSize,inputSize)
+   local input = torch.rand(nbfeatures,inputSize,inputSize/2)
 
    local err = jac.testJacobian(module,input)
    mytester:assertlt(err,precision, 'error on state ')
@@ -1177,6 +1201,29 @@ function nntest.SpatialSubtractiveNormalization_1dkernel()
    local ferr,berr = jac.testIO(module,input)
    mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
    mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+   
+    -- test batch mode
+   local output = module:forward(input):clone()
+   local gradOutput = output:clone():uniform(0,1)
+   local gradInput = module:backward(input, gradOutput):clone()
+   local batchSize = 4
+   local input2 = torch.rand(batchSize,nbfeatures,inputSize,inputSize/2)
+   input2[2]:copy(input)
+   
+   local output2 = module:forward(input2)
+   local gradOutput2 = output2:clone():uniform(0,1)
+   gradOutput2[2]:copy(gradOutput)
+   local gradInput2 = module:backward(input2, gradOutput2)
+   
+   mytester:assertTensorEq(output2[2], output, 0.000001, "SpatialSubstractiveNormalization 1d forward batch err")
+   mytester:assertTensorEq(gradOutput2[2], gradOutput, 0.000001, "SpatialSubstractiveNormalization 1d backward batch err")
+   
+   local err = jac.testJacobian(module,input2)
+   mytester:assertlt(err,precision, 'error on state ')
+
+   local ferr,berr = jac.testIO(module,input2)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
 end
 
 function nntest.SpatialDivisiveNormalization_2dkernel()
@@ -1185,7 +1232,7 @@ function nntest.SpatialDivisiveNormalization_2dkernel()
    local nbfeatures = math.random(3,5)
    local kernel = torch.Tensor(kersize,kersize):fill(1)
    local module = nn.SpatialDivisiveNormalization(nbfeatures,kernel)
-   local input = torch.rand(nbfeatures,inputSize,inputSize)
+   local input = torch.rand(nbfeatures,inputSize,inputSize/2)
 
    local err = jac.testJacobian(module,input)
    mytester:assertlt(err,precision, 'error on state ')
@@ -1193,6 +1240,29 @@ function nntest.SpatialDivisiveNormalization_2dkernel()
    local ferr,berr = jac.testIO(module,input)
    mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
    mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+   
+   -- test batch mode
+   local output = module:forward(input):clone()
+   local gradOutput = output:clone():uniform(0,1)
+   local gradInput = module:backward(input, gradOutput):clone()
+   local batchSize = 4
+   local input2 = torch.rand(batchSize,nbfeatures,inputSize,inputSize/2)
+   input2[2]:copy(input)
+   
+   local output2 = module:forward(input2)
+   local gradOutput2 = output2:clone():uniform(0,1)
+   gradOutput2[2]:copy(gradOutput)
+   local gradInput2 = module:backward(input2, gradOutput2)
+   
+   mytester:assertTensorEq(output2[2], output, 0.000001, "SpatialDivisiveNormalization 2d forward batch err")
+   mytester:assertTensorEq(gradOutput2[2], gradOutput, 0.000001, "SpatialDivisiveNormalization 2d backward batch err")
+   
+   local err = jac.testJacobian(module,input2)
+   mytester:assertlt(err,precision, 'error on state ')
+
+   local ferr,berr = jac.testIO(module,input2)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
 end
 
 function nntest.SpatialDivisiveNormalization_1dkernel()
@@ -1201,7 +1271,7 @@ function nntest.SpatialDivisiveNormalization_1dkernel()
    local nbfeatures = math.random(3,5)
    local kernel = torch.Tensor(kersize):fill(1)
    local module = nn.SpatialDivisiveNormalization(nbfeatures,kernel)
-   local input = torch.rand(nbfeatures,inputSize,inputSize)
+   local input = torch.rand(nbfeatures,inputSize,inputSize/2)
 
    local err = jac.testJacobian(module,input)
    mytester:assertlt(err,precision, 'error on state ')
@@ -1209,6 +1279,71 @@ function nntest.SpatialDivisiveNormalization_1dkernel()
    local ferr,berr = jac.testIO(module,input)
    mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
    mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+   
+    -- test batch mode
+   local output = module:forward(input):clone()
+   local gradOutput = output:clone():uniform(0,1)
+   local gradInput = module:backward(input, gradOutput):clone()
+   local batchSize = 4
+   local input2 = torch.rand(batchSize,nbfeatures,inputSize,inputSize/2)
+   input2[2]:copy(input)
+   
+   local output2 = module:forward(input2)
+   local gradOutput2 = output2:clone():uniform(0,1)
+   gradOutput2[2]:copy(gradOutput)
+   local gradInput2 = module:backward(input2, gradOutput2)
+   
+   mytester:assertTensorEq(output2[2], output, 0.000001, "SpatialDivisiveNormalization 1d forward batch err")
+   mytester:assertTensorEq(gradOutput2[2], gradOutput, 0.000001, "SpatialDivisiveNormalization 1d backward batch err")
+   
+   local err = jac.testJacobian(module,input2)
+   mytester:assertlt(err,precision, 'error on state ')
+
+   local ferr,berr = jac.testIO(module,input2)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.SpatialContrastiveNormalization()
+   local inputSize = math.random(6,9)
+   local kersize = 3
+   local nbfeatures = math.random(3,5)
+   local kernel = torch.Tensor(kersize,kersize):fill(1)
+   local module = nn.SpatialContrastiveNormalization(nbfeatures,kernel)
+   local input = torch.rand(nbfeatures,inputSize,inputSize/2)
+
+   local err = jac.testJacobian(module,input)
+   mytester:assertlt(err,precision, 'error on state ')
+
+   local ferr,berr = jac.testIO(module,input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+   
+   -- test batch mode and type
+   local output = module:forward(input):clone()
+   local gradOutput = output:clone():uniform(0,1)
+   local gradInput = module:backward(input, gradOutput):clone()
+   local batchSize = 4
+   local input2 = torch.rand(batchSize,nbfeatures,inputSize,inputSize/2):float()
+   input2[2]:copy(input)
+   
+   module:float() -- type-cast
+   local output2 = module:forward(input2)
+   local gradOutput2 = output2:clone():uniform(0,1)
+   gradOutput2[2]:copy(gradOutput)
+   local gradInput2 = module:backward(input2, gradOutput2)
+   
+   mytester:assertTensorEq(output2[2], output:float(), 0.000001, "SpatialContrastiveNormalization 2d forward batch err")
+   mytester:assertTensorEq(gradOutput2[2], gradOutput:float(), 0.000001, "SpatialContrastiveNormalization 2d backward batch err")
+   
+   module:double()
+   input2 = input2:double()
+   local err = jac.testJacobian(module,input2)
+   mytester:assertlt(err,precision, 'error on state ')
+
+   local ferr,berr = jac.testIO(module,input2)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
 end
 
 function nntest.SpatialConvolution()