Skip to content

Commit

Permalink
normalize data for dcm
Browse files Browse the repository at this point in the history
  • Loading branch information
Eh2406 committed Mar 30, 2018
1 parent 2497039 commit b36288b
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 2 deletions.
10 changes: 9 additions & 1 deletion urbansim/models/dcm.py
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,11 @@ def probabilities(self, choosers, alternatives, filter_tables=True):
coeffs = [self.fit_parameters['Coefficient'][x]
for x in model_design.columns]

normalization_mean = [self.fit_parameters['Normalization Mean'][x]
for x in model_design.columns]
normalization_std = [self.fit_parameters['Normalization Std'][x]
for x in model_design.columns]

# probabilities are returned from mnl_simulate as a 2d array
# with choosers along rows and alternatives along columns
if self.probability_mode == 'single_chooser':
Expand All @@ -544,7 +549,10 @@ def probabilities(self, choosers, alternatives, filter_tables=True):
probabilities = mnl.mnl_simulate(
model_design.as_matrix(),
coeffs,
numalts=numalts, returnprobs=True)
normalization_mean,
normalization_std,
numalts=numalts,
returnprobs=True)

# want to turn probabilities into a Series with a MultiIndex
# of chooser IDs and alternative IDs.
Expand Down
14 changes: 13 additions & 1 deletion urbansim/urbanchoice/mnl.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def mnl_loglik(beta, data, chosen, numalts, weights=None, lcgrad=False,
return -1 * loglik, -1 * gradarr


def mnl_simulate(data, coeff, numalts, GPU=False, returnprobs=True):
def mnl_simulate(data, coeff, normalization_mean, normalization_std, numalts, GPU=False, returnprobs=True):
"""
Get the probabilities for each chooser choosing between `numalts`
alternatives.
Expand All @@ -131,6 +131,10 @@ def mnl_simulate(data, coeff, numalts, GPU=False, returnprobs=True):
choosers. Alternatives must be in the same order for each chooser.
coeff : 1D array
The model coefficients corresponding to each column in `data`.
normalization_mean : 1D array
The model normalization constant corresponding to each column in `data`.
normalization_std : 1D array
The model normalization factor corresponding to each column in `data`.
numalts : int
The number of alternatives available to each chooser.
GPU : bool, optional
Expand All @@ -150,6 +154,7 @@ def mnl_simulate(data, coeff, numalts, GPU=False, returnprobs=True):
len(data), numalts))
atype = 'numpy' if not GPU else 'cuda'

data = (data.copy() - normalization_mean) / normalization_std
data = np.transpose(data)
coeff = np.reshape(np.array(coeff), (1, len(coeff)))

Expand Down Expand Up @@ -221,6 +226,11 @@ def mnl_estimate(data, chosen, numalts, GPU=False, coeffrange=(-3, 3),
numvars = data.shape[1]
numobs = data.shape[0] // numalts

normalization_mean = data.mean(0)
normalization_std = data.std(0, ddof=1)

data = (data.copy() - normalization_mean) / normalization_std

if chosen is None:
chosen = np.ones((numobs, numalts)) # used for latent classes

Expand Down Expand Up @@ -260,6 +270,8 @@ def mnl_estimate(data, chosen, numalts, GPU=False, coeffrange=(-3, 3),
}

fit_parameters = pd.DataFrame({
'Normalization Mean': normalization_mean,
'Normalization Std': normalization_std,
'Coefficient': beta,
'Std. Error': stderr,
'T-Score': beta / stderr})
Expand Down

0 comments on commit b36288b

Please sign in to comment.