diff --git a/urbansim/models/dcm.py b/urbansim/models/dcm.py
index b649ad1d..515f7983 100644
--- a/urbansim/models/dcm.py
+++ b/urbansim/models/dcm.py
@@ -234,13 +234,19 @@ class MNLDiscreteChoiceModel(DiscreteChoiceModel):
         Whether (and how much) to sample alternatives during prediction.
         Note that this can lead to multiple choosers picking the same
         alternative.
-    choice_column : optional
+    choice_column : str, optional
         Name of the column in the `alternatives` table that choosers
         should choose. e.g. the 'building_id' column. If not provided
         the alternatives index is used.
-    name : optional
+    name : str, optional
         Optional descriptive name for this model that may be used
         in output.
+    normalize : bool, optional default False
+        subtract the mean and divide by the standard deviation before fitting the Coefficients
+    l1 : float, optional default 0.0
+        the amount of l1 (Lasso) regularization when fitting the Coefficients
+    l2 : float, optional default 0.0
+        the amount of l2 (Ridge) regularization when fitting the Coefficients
 
     """
     def __init__(
@@ -251,7 +257,8 @@ def __init__(
             interaction_predict_filters=None,
             estimation_sample_size=None,
             prediction_sample_size=None,
-            choice_column=None, name=None):
+            choice_column=None, name=None,
+            normalize=False, l1=0.0, l2=0.0):
         self._check_prob_choice_mode_compat(probability_mode, choice_mode)
         self._check_prob_mode_interaction_compat(
             probability_mode, interaction_predict_filters)
@@ -270,6 +277,9 @@ def __init__(
         self.choice_column = choice_column
         self.name = name if name is not None else 'MNLDiscreteChoiceModel'
         self.sim_pdf = None
+        self.normalize = normalize
+        self.l1 = l1
+        self.l2 = l2
 
         self.log_likelihoods = None
         self.fit_parameters = None
@@ -308,7 +318,10 @@ def from_yaml(cls, yaml_str=None, str_or_buffer=None):
             estimation_sample_size=cfg.get('estimation_sample_size', None),
             prediction_sample_size=cfg.get('prediction_sample_size', None),
             choice_column=cfg.get('choice_column', None),
-            name=cfg.get('name', None)
+            name=cfg.get('name', None),
+            normalize=cfg.get('normalize', False),
+            l1=cfg.get('l1', 0.0),
+            l2=cfg.get('l2', 0.0),
         )
 
         if cfg.get('log_likelihoods', None):
@@ -420,7 +433,8 @@ def fit(self, choosers, alternatives, current_choice):
                 'the input columns.')
 
         self.log_likelihoods, self.fit_parameters = mnl.mnl_estimate(
-            model_design.as_matrix(), chosen, self.sample_size)
+            model_design.as_matrix(), chosen, self.sample_size,
+            normalize=self.normalize, l1=self.l1, l2=self.l2)
         self.fit_parameters.index = model_design.columns
 
         logger.debug('finish: fit LCM model {}'.format(self.name))
@@ -534,6 +548,18 @@ def probabilities(self, choosers, alternatives, filter_tables=True):
         coeffs = [self.fit_parameters['Coefficient'][x]
                   for x in model_design.columns]
 
+        normalization_mean = 0.0
+        if 'Normalization Mean' in self.fit_parameters.columns:
+            normalization_mean = self.fit_parameters['Normalization Mean']
+            normalization_mean = [normalization_mean[x]
+                                  for x in model_design.columns]
+
+        normalization_std = 1.0
+        if 'Normalization Std' in self.fit_parameters.columns:
+            normalization_std = self.fit_parameters['Normalization Std']
+            normalization_std = [normalization_std[x]
+                                 for x in model_design.columns]
+
         # probabilities are returned from mnl_simulate as a 2d array
         # with choosers along rows and alternatives along columns
         if self.probability_mode == 'single_chooser':
@@ -544,7 +570,10 @@ def probabilities(self, choosers, alternatives, filter_tables=True):
         probabilities = mnl.mnl_simulate(
             model_design.as_matrix(),
             coeffs,
-            numalts=numalts, returnprobs=True)
+            numalts,
+            normalization_mean,
+            normalization_std,
+            returnprobs=True)
 
         # want to turn probabilities into a Series with a MultiIndex
         # of chooser IDs and alternative IDs.
@@ -680,7 +709,10 @@ def to_dict(self):
             'fitted': self.fitted,
             'log_likelihoods': self.log_likelihoods,
             'fit_parameters': (yamlio.frame_to_yaml_safe(self.fit_parameters)
-                               if self.fitted else None)
+                               if self.fitted else None),
+            'normalize': self.normalize,
+            'l1': self.l1,
+            'l2': self.l2,
         }
 
     def to_yaml(self, str_or_buffer=None):
diff --git a/urbansim/models/tests/test_dcm.py b/urbansim/models/tests/test_dcm.py
index fa60ba06..e72f3c6b 100644
--- a/urbansim/models/tests/test_dcm.py
+++ b/urbansim/models/tests/test_dcm.py
@@ -45,7 +45,7 @@ def alternatives():
 
 
 @pytest.fixture
-def basic_dcm():
+def simple_dcm(request):
     model_exp = 'var2 + var1:var3'
     sample_size = 5
     probability_mode = 'full_product'
@@ -71,6 +71,21 @@ def basic_dcm():
     return model
 
 
+@pytest.fixture
+def simple_dcm_fit(simple_dcm, choosers, alternatives):
+    simple_dcm.fit(choosers, alternatives, choosers.thing_id)
+    return simple_dcm
+
+
+@pytest.fixture(params=[(False, 0.0, 0.0), (True, 0.0, 0.0),
+                        (True, 1.0e-2, 0.0), (True, 0.0, 1.0e-2), (True, 1.0e-2, 1.0e-2)])
+def basic_dcm(request, simple_dcm):
+    model = simple_dcm
+    model.normalize, model.l1, model.l2 = request.param
+
+    return model
+
+
 @pytest.fixture
 def basic_dcm_fit(basic_dcm, choosers, alternatives):
     basic_dcm.fit(choosers, alternatives, choosers.thing_id)
@@ -144,7 +159,7 @@ def test_mnl_dcm(seed, basic_dcm, choosers, alternatives):
     # involved, but can at least do a smoke test.
     assert len(loglik) == 3
     assert len(basic_dcm.fit_parameters) == 2
-    assert len(basic_dcm.fit_parameters.columns) == 3
+    assert len(basic_dcm.fit_parameters.columns) == 5 if basic_dcm.normalize else 3
 
     filtered_choosers, filtered_alts = basic_dcm.apply_predict_filters(
         choosers, alternatives)
@@ -160,10 +175,17 @@ def test_mnl_dcm(seed, basic_dcm, choosers, alternatives):
 
     choices = basic_dcm.predict(choosers.iloc[1:], alternatives)
 
-    pdt.assert_series_equal(
-        choices,
-        pd.Series(
-            ['h', 'c', 'f'], index=pd.Index([1, 3, 4], name='chooser_id')))
+    if basic_dcm.normalize:
+        # normalize should not change the predictions but we hit the limits of coeffrange
+        pdt.assert_series_equal(
+            choices,
+            pd.Series(
+                ['j', 'b', 'b'], index=pd.Index([1, 3, 4], name='chooser_id')))
+    else:
+        pdt.assert_series_equal(
+            choices,
+            pd.Series(
+                ['h', 'c', 'f'], index=pd.Index([1, 3, 4], name='chooser_id')))
 
     # check that we can do a YAML round-trip
     yaml_str = basic_dcm.to_yaml()
@@ -190,7 +212,7 @@ def test_mnl_dcm_repeated_alts(basic_dcm, choosers, alternatives):
     # involved, but can at least do a smoke test.
     assert len(loglik) == 3
     assert len(basic_dcm.fit_parameters) == 2
-    assert len(basic_dcm.fit_parameters.columns) == 3
+    assert len(basic_dcm.fit_parameters.columns) == 5 if basic_dcm.normalize else 3
 
     repeated_index = alternatives.index.repeat([1, 2, 3, 2, 4, 3, 2, 1, 5, 8])
     repeated_alts = alternatives.loc[repeated_index].reset_index()
@@ -219,7 +241,10 @@ def test_mnl_dcm_yaml(basic_dcm, choosers, alternatives):
         'choice_column': basic_dcm.choice_column,
         'fitted': False,
         'log_likelihoods': None,
-        'fit_parameters': None
+        'fit_parameters': None,
+        'normalize': basic_dcm.normalize,
+        'l1': basic_dcm.l1,
+        'l2': basic_dcm.l2,
     }
 
     assert yaml.load(basic_dcm.to_yaml()) == expected_dict
@@ -243,13 +268,13 @@ def test_mnl_dcm_yaml(basic_dcm, choosers, alternatives):
     assert new_mod.fitted is True
 
 
-def test_mnl_dcm_prob_mode_single(seed, basic_dcm_fit, choosers, alternatives):
-    basic_dcm_fit.probability_mode = 'single_chooser'
+def test_mnl_dcm_prob_mode_single(seed, simple_dcm_fit, choosers, alternatives):
+    simple_dcm_fit.probability_mode = 'single_chooser'
 
-    filtered_choosers, filtered_alts = basic_dcm_fit.apply_predict_filters(
+    filtered_choosers, filtered_alts = simple_dcm_fit.apply_predict_filters(
         choosers, alternatives)
 
-    probs = basic_dcm_fit.probabilities(choosers.iloc[1:], alternatives)
+    probs = simple_dcm_fit.probabilities(choosers.iloc[1:], alternatives)
 
     pdt.assert_series_equal(
         probs,
@@ -267,21 +292,21 @@ def test_mnl_dcm_prob_mode_single(seed, basic_dcm_fit, choosers, alternatives):
                 [[1], filtered_alts.index.values],
                 names=['chooser_id', 'alternative_id'])))
 
-    sprobs = basic_dcm_fit.summed_probabilities(choosers, alternatives)
+    sprobs = simple_dcm_fit.summed_probabilities(choosers, alternatives)
     pdt.assert_index_equal(
         sprobs.index, filtered_alts.index, check_names=False)
     npt.assert_allclose(sprobs.sum(), len(filtered_choosers))
 
 
 def test_mnl_dcm_prob_mode_single_prediction_sample_size(
-        seed, basic_dcm_fit, choosers, alternatives):
-    basic_dcm_fit.probability_mode = 'single_chooser'
-    basic_dcm_fit.prediction_sample_size = 5
+        seed, simple_dcm_fit, choosers, alternatives):
+    simple_dcm_fit.probability_mode = 'single_chooser'
+    simple_dcm_fit.prediction_sample_size = 5
 
-    filtered_choosers, filtered_alts = basic_dcm_fit.apply_predict_filters(
+    filtered_choosers, filtered_alts = simple_dcm_fit.apply_predict_filters(
         choosers, alternatives)
 
-    probs = basic_dcm_fit.probabilities(choosers.iloc[1:], alternatives)
+    probs = simple_dcm_fit.probabilities(choosers.iloc[1:], alternatives)
 
     pdt.assert_series_equal(
         probs,
@@ -295,7 +320,7 @@ def test_mnl_dcm_prob_mode_single_prediction_sample_size(
                 [[1], ['g', 'j', 'f', 'd', 'a']],
                 names=['chooser_id', 'alternative_id'])))
 
-    sprobs = basic_dcm_fit.summed_probabilities(choosers, alternatives)
+    sprobs = simple_dcm_fit.summed_probabilities(choosers, alternatives)
     pdt.assert_index_equal(
         sprobs.index,
         pd.Index(['d', 'g', 'a', 'c', 'd'], name='alternative_id'))
@@ -320,14 +345,14 @@ def test_mnl_dcm_prob_mode_full_prediction_sample_size(
     npt.assert_allclose(sprobs.sum(), len(filtered_choosers))
 
 
-def test_mnl_dcm_choice_mode_agg(seed, basic_dcm_fit, choosers, alternatives):
-    basic_dcm_fit.probability_mode = 'single_chooser'
-    basic_dcm_fit.choice_mode = 'aggregate'
+def test_mnl_dcm_choice_mode_agg(seed, simple_dcm_fit, choosers, alternatives):
+    simple_dcm_fit.probability_mode = 'single_chooser'
+    simple_dcm_fit.choice_mode = 'aggregate'
 
-    filtered_choosers, filtered_alts = basic_dcm_fit.apply_predict_filters(
+    filtered_choosers, filtered_alts = simple_dcm_fit.apply_predict_filters(
         choosers, alternatives)
 
-    choices = basic_dcm_fit.predict(choosers, alternatives)
+    choices = simple_dcm_fit.predict(choosers, alternatives)
 
     pdt.assert_series_equal(
         choices,
@@ -512,14 +537,20 @@ def test_mnl_dcm_segmented_yaml(grouped_choosers, alternatives):
                 'name': 'x',
                 'fitted': False,
                 'log_likelihoods': None,
-                'fit_parameters': None
+                'fit_parameters': None,
+                'normalize': False,
+                'l1': 0.0,
+                'l2': 0.0,
             },
             'y': {
                 'name': 'y',
                 'model_expression': 'var3 + var1:var2',
                 'fitted': False,
                 'log_likelihoods': None,
-                'fit_parameters': None
+                'fit_parameters': None,
+                'normalize': False,
+                'l1': 0.0,
+                'l2': 0.0,
             }
         }
     }
diff --git a/urbansim/urbanchoice/mnl.py b/urbansim/urbanchoice/mnl.py
index 463eb252..8ba33866 100644
--- a/urbansim/urbanchoice/mnl.py
+++ b/urbansim/urbanchoice/mnl.py
@@ -67,7 +67,7 @@ def get_standard_error(hessian):
 
 
 def mnl_loglik(beta, data, chosen, numalts, weights=None, lcgrad=False,
-               stderr=0):
+               stderr=0, l1=0.0, l2=0.0):
     logger.debug('start: calculate MNL log-likelihood')
     numvars = beta.size
     numobs = data.size() // numvars // numalts
@@ -117,11 +117,18 @@ def mnl_loglik(beta, data, chosen, numalts, weights=None, lcgrad=False,
         loglik = loglik.get_mat()[0, 0]
         gradarr = np.reshape(gradarr.get_mat(), (1, gradarr.size()))[0]
 
+    loglik -= l1 * np.abs(beta.get_mat()).sum()
+    gradarr -= l1 * np.sign(beta.get_mat())[0]
+
+    loglik -= l2 * np.square(beta.get_mat()).sum()
+    gradarr -= l2 * beta.get_mat()[0]
+
     logger.debug('finish: calculate MNL log-likelihood')
     return -1 * loglik, -1 * gradarr
 
 
-def mnl_simulate(data, coeff, numalts, GPU=False, returnprobs=True):
+def mnl_simulate(data, coeff, numalts, normalization_mean=0.0, normalization_std=1.0, GPU=False,
+                 returnprobs=True):
     """
     Get the probabilities for each chooser choosing between `numalts`
     alternatives.
@@ -136,6 +143,10 @@ def mnl_simulate(data, coeff, numalts, GPU=False, returnprobs=True):
         The model coefficients corresponding to each column in `data`.
     numalts : int
         The number of alternatives available to each chooser.
+    normalization_mean : 1D array, optional
+        The model normalization constant corresponding to each column in `data`.
+    normalization_std : 1D array, optional
+        The model normalization factor corresponding to each column in `data`.
     GPU : bool, optional
     returnprobs : bool, optional
         If True, return the probabilities for each chooser/alternative instead
@@ -153,6 +164,7 @@ def mnl_simulate(data, coeff, numalts, GPU=False, returnprobs=True):
             len(data), numalts))
     atype = 'numpy' if not GPU else 'cuda'
 
+    data = (data.copy() - normalization_mean) / normalization_std
     data = np.transpose(data)
     coeff = np.reshape(np.array(coeff), (1, len(coeff)))
 
@@ -176,7 +188,7 @@ def mnl_simulate(data, coeff, numalts, GPU=False, returnprobs=True):
 
 
 def mnl_estimate(data, chosen, numalts, GPU=False, coeffrange=(-3, 3),
-                 weights=None, lcgrad=False, beta=None):
+                 weights=None, lcgrad=False, beta=None, normalize=False, l1=0.0, l2=0.0):
     """
     Calculate coefficients of the MNL model.
 
@@ -200,6 +212,8 @@ def mnl_estimate(data, chosen, numalts, GPU=False, coeffrange=(-3, 3),
     lcgrad : bool, optional
     beta : 1D array, optional
         Any initial guess for the coefficients.
+    normalize : bool, optional default False
+        subtract the mean and divide by the standard deviation before fitting the Coefficients
 
     Returns
     -------
@@ -224,6 +238,12 @@ def mnl_estimate(data, chosen, numalts, GPU=False, coeffrange=(-3, 3),
     numvars = data.shape[1]
     numobs = data.shape[0] // numalts
 
+    if normalize:
+        normalization_mean = data.mean(0)
+        normalization_std = data.std(0, ddof=1)
+
+        data = (data.copy() - normalization_mean) / normalization_std
+
     if chosen is None:
         chosen = np.ones((numobs, numalts))  # used for latent classes
 
@@ -239,7 +259,7 @@ def mnl_estimate(data, chosen, numalts, GPU=False, coeffrange=(-3, 3),
     bounds = [coeffrange] * numvars
 
     with log_start_finish('scipy optimization for MNL fit', logger):
-        args = (data, chosen, numalts, weights, lcgrad)
+        args = (data, chosen, numalts, weights, lcgrad, 0, l1, l2)
         bfgs_result = scipy.optimize.fmin_l_bfgs_b(mnl_loglik,
                                                    beta,
                                                    args=args,
@@ -271,5 +291,9 @@ def mnl_estimate(data, chosen, numalts, GPU=False, coeffrange=(-3, 3),
         'Std. Error': stderr,
         'T-Score': beta / stderr})
 
+    if normalize:
+        fit_parameters['Normalization Mean'] = normalization_mean
+        fit_parameters['Normalization Std'] = normalization_std
+
     logger.debug('finish: MNL fit')
     return log_likelihood, fit_parameters
diff --git a/urbansim/urbanchoice/tests/test_mnl.py b/urbansim/urbanchoice/tests/test_mnl.py
index a939e462..50e6869d 100644
--- a/urbansim/urbanchoice/tests/test_mnl.py
+++ b/urbansim/urbanchoice/tests/test_mnl.py
@@ -111,30 +111,54 @@ def choosers_dm(choosers, test_data):
 @pytest.fixture
 def fit_coeffs(dm, chosen, num_alts):
     log_like, fit = mnl.mnl_estimate(dm.as_matrix(), chosen, num_alts)
-    return fit.Coefficient.values
+    return fit
 
 
-def test_mnl_estimate(dm, chosen, num_alts, test_data):
-    log_like, fit = mnl.mnl_estimate(dm.as_matrix(), chosen, num_alts)
-    result = pd.Series(fit.Coefficient.values, index=dm.columns)
+@pytest.fixture
+def fit_normalize(dm, chosen, num_alts):
+    log_like, fit = mnl.mnl_estimate(dm.as_matrix(), chosen, num_alts, normalize=True)
+    return fit
+
+
+def test_mnl_estimate(fit_coeffs, fit_normalize, dm, chosen, num_alts, test_data):
+    result = pd.Series(fit_coeffs.Coefficient.values, index=dm.columns)
     result, expected = result.align(test_data['est_expected'])
     npt.assert_allclose(result.values, expected.values, rtol=1e-4)
 
+    l1 = abs(fit_normalize.Coefficient).sum()
+    _, fit = mnl.mnl_estimate(dm.as_matrix(), chosen, num_alts, normalize=True, l1=0.1)
+    fit_l1 = abs(fit.Coefficient).sum()
+    assert fit_l1 < l1, "we asked that the l1 norm be minimized so it should be smaller"
+
+    l2 = np.square(fit_normalize.Coefficient).sum()
+    _, fit = mnl.mnl_estimate(dm.as_matrix(), chosen, num_alts, normalize=True, l1=0.2)
+    fit_l2 = np.square(fit.Coefficient).sum()
+    assert fit_l2 < l2, "we asked that the l2 norm be minimized so it should be smaller"
+
 
-def test_mnl_simulate(dm, fit_coeffs, num_alts, test_data, choosers_dm):
+def test_mnl_simulate(dm, fit_coeffs, fit_normalize, num_alts, test_data, choosers_dm):
     # check that if all the alternatives have the same numbers
     # we get an even probability distribution
     data = np.array(
         [[10 ** (x + 1) for x in range(len(dm.columns))]] * num_alts)
 
     probs = mnl.mnl_simulate(
-        data, fit_coeffs, num_alts, returnprobs=True)
+        data, fit_coeffs.Coefficient.values, num_alts, returnprobs=True)
 
     npt.assert_allclose(probs, [[1 / num_alts] * num_alts])
 
     # now test with real data
     probs = mnl.mnl_simulate(
-        choosers_dm.as_matrix(), fit_coeffs, num_alts, returnprobs=True)
+        choosers_dm.as_matrix(), fit_coeffs.Coefficient.values, num_alts, returnprobs=True)
+    results = pd.DataFrame(probs, columns=test_data['sim_expected'].columns)
+    results, expected = results.align(test_data['sim_expected'])
+    npt.assert_allclose(results.as_matrix(), expected.as_matrix(), rtol=1e-4)
+
+    # now test with real data
+    probs = mnl.mnl_simulate(
+        choosers_dm.as_matrix(), fit_normalize.Coefficient.values, num_alts, returnprobs=True,
+        normalization_mean=fit_normalize['Normalization Mean'].values,
+        normalization_std=fit_normalize['Normalization Std'].values)
     results = pd.DataFrame(probs, columns=test_data['sim_expected'].columns)
     results, expected = results.align(test_data['sim_expected'])
     npt.assert_allclose(results.as_matrix(), expected.as_matrix(), rtol=1e-4)