normalize data for dcm

UDST · Mar 30, 2018 · b36288b · b36288b
1 parent 2497039
commit b36288b
Show file tree

Hide file tree

Showing 2 changed files with 22 additions and 2 deletions.
diff --git a/urbansim/models/dcm.py b/urbansim/models/dcm.py
@@ -534,6 +534,11 @@ def probabilities(self, choosers, alternatives, filter_tables=True):
         coeffs = [self.fit_parameters['Coefficient'][x]
                   for x in model_design.columns]
 
+        normalization_mean = [self.fit_parameters['Normalization Mean'][x]
+                  for x in model_design.columns]
+        normalization_std = [self.fit_parameters['Normalization Std'][x]
+                  for x in model_design.columns]
+
         # probabilities are returned from mnl_simulate as a 2d array
         # with choosers along rows and alternatives along columns
         if self.probability_mode == 'single_chooser':
@@ -544,7 +549,10 @@ def probabilities(self, choosers, alternatives, filter_tables=True):
         probabilities = mnl.mnl_simulate(
             model_design.as_matrix(),
             coeffs,
-            numalts=numalts, returnprobs=True)
+            normalization_mean,
+            normalization_std,
+            numalts=numalts,
+            returnprobs=True)
 
         # want to turn probabilities into a Series with a MultiIndex
         # of chooser IDs and alternative IDs.

diff --git a/urbansim/urbanchoice/mnl.py b/urbansim/urbanchoice/mnl.py
@@ -118,7 +118,7 @@ def mnl_loglik(beta, data, chosen, numalts, weights=None, lcgrad=False,
     return -1 * loglik, -1 * gradarr
 
 
-def mnl_simulate(data, coeff, numalts, GPU=False, returnprobs=True):
+def mnl_simulate(data, coeff, normalization_mean, normalization_std, numalts, GPU=False, returnprobs=True):
     """
     Get the probabilities for each chooser choosing between `numalts`
     alternatives.
@@ -131,6 +131,10 @@ def mnl_simulate(data, coeff, numalts, GPU=False, returnprobs=True):
         choosers. Alternatives must be in the same order for each chooser.
     coeff : 1D array
         The model coefficients corresponding to each column in `data`.
+    normalization_mean : 1D array
+        The model normalization constant corresponding to each column in `data`.
+    normalization_std : 1D array
+        The model normalization factor corresponding to each column in `data`.
     numalts : int
         The number of alternatives available to each chooser.
     GPU : bool, optional
@@ -150,6 +154,7 @@ def mnl_simulate(data, coeff, numalts, GPU=False, returnprobs=True):
             len(data), numalts))
     atype = 'numpy' if not GPU else 'cuda'
 
+    data = (data.copy() - normalization_mean) / normalization_std
     data = np.transpose(data)
     coeff = np.reshape(np.array(coeff), (1, len(coeff)))
 
@@ -221,6 +226,11 @@ def mnl_estimate(data, chosen, numalts, GPU=False, coeffrange=(-3, 3),
     numvars = data.shape[1]
     numobs = data.shape[0] // numalts
 
+    normalization_mean = data.mean(0)
+    normalization_std = data.std(0, ddof=1)
+
+    data = (data.copy() - normalization_mean) / normalization_std
+
     if chosen is None:
         chosen = np.ones((numobs, numalts))  # used for latent classes
 
@@ -260,6 +270,8 @@ def mnl_estimate(data, chosen, numalts, GPU=False, coeffrange=(-3, 3),
     }
 
     fit_parameters = pd.DataFrame({
+        'Normalization Mean': normalization_mean,
+        'Normalization Std': normalization_std,
         'Coefficient': beta,
         'Std. Error': stderr,
         'T-Score': beta / stderr})