Skip to content

Commit

Permalink
fixed typing problems + other cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
NicoNeureiter committed Jul 19, 2022
1 parent 3cefe0d commit a3373ac
Show file tree
Hide file tree
Showing 8 changed files with 113 additions and 105 deletions.
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ setuptools
cartopy<0.20.0
typing_extensions
tables
pydantic
2 changes: 1 addition & 1 deletion sbayes/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import ruamel.yaml

from pydantic import BaseModel, Extra, Field
from pydantic import validator, root_validator, ValidationError
from pydantic import root_validator, ValidationError
from pydantic import FilePath, DirectoryPath
from pydantic import PositiveInt, PositiveFloat, confloat, NonNegativeFloat, NonNegativeInt

Expand Down
30 changes: 18 additions & 12 deletions sbayes/model/model_fast.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-
from __future__ import annotations

from typing import List, Dict, Sequence, Callable, Optional, Any
from typing import List, Dict, Sequence, Callable, Optional
from enum import Enum
from dataclasses import dataclass

Expand All @@ -16,8 +16,7 @@
from sbayes.util import (compute_delaunay, n_smallest_distances, log_multinom,
dirichlet_logpdf, log_expit)
from sbayes.config.config import ModelConfig, PriorConfig, DirichletPriorConfig

EPS = np.finfo(float).eps
from sbayes.load_data import Data, ComputeNetwork


@dataclass
Expand Down Expand Up @@ -49,7 +48,7 @@ class Model:
prior (Prior): Rhe prior of the model
"""
def __init__(self, data: 'Data', config: ModelConfig):
def __init__(self, data: Data, config: ModelConfig):
self.data = data
self.config = config
self.confounders = config.confounders
Expand Down Expand Up @@ -106,7 +105,7 @@ class Likelihood(object):
shape: (n_objects, n_features)
"""

def __init__(self, data: 'Data', shapes: ModelShapes):
def __init__(self, data: Data, shapes: ModelShapes):
self.features = data.features.values
self.confounders = data.confounders
self.shapes = shapes
Expand Down Expand Up @@ -370,7 +369,7 @@ class Prior:
prior_confounding_effects (ConfoundingEffectsPrior): prior on all confounding effects
"""

def __init__(self, shapes: ModelShapes, config: PriorConfig, data: 'Data'):
def __init__(self, shapes: ModelShapes, config: PriorConfig, data: Data):
self.shapes = shapes
self.config = config
self.data = data
Expand Down Expand Up @@ -437,16 +436,20 @@ class TYPES(Enum):
UNIFORM = 'uniform'
DIRICHLET = 'dirichlet'

def __init__(self, config, shapes, conf=None, initial_counts=1.):
config: DirichletPriorConfig | dict[str, DirichletPriorConfig]
shapes: ModelShapes
initial_counts: float
prior_type: Optional[TYPES]
counts: Optional[NDArray[float]]
concentration: Optional[list[NDArray[float]]]

def __init__(self, config, shapes, initial_counts=1.):
self.config = config
self.shapes = shapes
self.conf = conf

self.initial_counts = initial_counts
self.prior_type = None
self.counts = None
self.concentration: list[NDArray] = None
self.concentration = None

self.parse_attributes()

Expand Down Expand Up @@ -501,9 +504,12 @@ def invalid_prior_message(self, s):

class ConfoundingEffectsPrior(DirichletPrior):

conf: str

def __init__(self, config, shapes, conf, initial_counts=1.):
super(ConfoundingEffectsPrior, self).__init__(config, shapes, conf=conf,
super(ConfoundingEffectsPrior, self).__init__(config, shapes,
initial_counts=initial_counts)
self.conf = conf

def parse_attributes(self):
n_groups = len(self.config)
Expand Down Expand Up @@ -890,7 +896,7 @@ def get_setup_message(self):

def compute_gaussian_geo_prior(
cluster: np.array,
network: 'ComputeNetwork',
network: ComputeNetwork,
cov: np.array,
) -> float:
"""This function computes the 2D Gaussian geo-prior for all edges in the cluster.
Expand Down
25 changes: 13 additions & 12 deletions sbayes/model/model_slow.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-
from __future__ import annotations

from typing import List, Dict, Sequence, Callable, Optional, Any
from typing import List, Dict, Sequence, Callable, Optional
from enum import Enum
from dataclasses import dataclass

Expand All @@ -15,9 +15,8 @@
from sbayes.sampling.state import Sample
from sbayes.util import (compute_delaunay, n_smallest_distances, log_multinom,
dirichlet_logpdf, log_expit)
from sbayes.config.config import ModelConfig, PriorConfig, DirichletPriorConfig

EPS = np.finfo(float).eps
from sbayes.config.config import ModelConfig, PriorConfig
from sbayes.load_data import Data, ComputeNetwork


@dataclass
Expand Down Expand Up @@ -49,7 +48,7 @@ class Model:
prior (Prior): Rhe prior of the model
"""
def __init__(self, data: 'Data', config: ModelConfig):
def __init__(self, data: Data, config: ModelConfig):
self.data = data
self.config = config
self.confounders = config.confounders
Expand Down Expand Up @@ -106,7 +105,7 @@ class Likelihood(object):
shape: (n_objects, n_features)
"""

def __init__(self, data: 'Data', shapes: ModelShapes):
def __init__(self, data: Data, shapes: ModelShapes):
self.features = data.features.values
self.confounders = data.confounders
self.shapes = shapes
Expand Down Expand Up @@ -215,7 +214,7 @@ def compute_component_likelihood(
def update_weights(sample: Sample) -> NDArray[float]:
return normalize_weights(
weights=sample.weights.value,
has_components=compute_has_components(sample.clusters,sample.confounders)
has_components=compute_has_components(sample.clusters, sample.confounders)
)


Expand Down Expand Up @@ -251,7 +250,7 @@ class Prior:
prior_confounding_effects (ConfoundingEffectsPrior): prior on all confounding effects
"""

def __init__(self, shapes: ModelShapes, config: PriorConfig, data: 'Data'):
def __init__(self, shapes: ModelShapes, config: PriorConfig, data: Data):
self.shapes = shapes
self.config = config
self.data = data
Expand Down Expand Up @@ -309,11 +308,10 @@ class TYPES(Enum):
UNIFORM = 'uniform'
DIRICHLET = 'dirichlet'

def __init__(self, config, shapes, conf=None, initial_counts=1.):
def __init__(self, config, shapes: ModelShapes, initial_counts=1.):

self.config = config
self.shapes = shapes
self.conf = conf

self.initial_counts = initial_counts
self.prior_type = None
Expand Down Expand Up @@ -373,9 +371,12 @@ def invalid_prior_message(self, s):

class ConfoundingEffectsPrior(DirichletPrior):

conf: str

def __init__(self, config, shapes, conf, initial_counts=1.):
super(ConfoundingEffectsPrior, self).__init__(config, shapes, conf=conf,
super(ConfoundingEffectsPrior, self).__init__(config, shapes,
initial_counts=initial_counts)
self.conf = conf

def parse_attributes(self):
n_groups = len(self.config)
Expand Down Expand Up @@ -704,7 +705,7 @@ def get_setup_message(self):

def compute_gaussian_geo_prior(
cluster: np.array,
network: 'ComputeNetwork',
network: ComputeNetwork,
cov: np.array,
) -> float:
"""This function computes the 2D Gaussian geo-prior for all edges in the cluster.
Expand Down
56 changes: 0 additions & 56 deletions sbayes/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,8 @@
from numpy.typing import NDArray
import pyproj

from sbayes.model import normalize_weights
from sbayes.util import compute_delaunay, read_costs_from_csv, PathLike

EPS = np.finfo(float).eps


def load_canvas(config, logger=None):
""" This function reads sites from a csv, with the following columns:
Expand Down Expand Up @@ -210,59 +207,6 @@ def subset_features(features, subset):
return features[sub, :, :]


def simulate_features(clusters, confounders, probabilities, weights):
"""Simulate features from the likelihood.
Args:
clusters (np.array): Binary array indicating the assignment of sites to clusters.
shape: (n_clusters, n_sites)
confounders (dict): Includes binary arrays indicating the assignment of a site to a confounder
probabilities (dict): The probabilities of every state in each cluster and each group of a confounder
weights (np.array): The mixture coefficient controlling how much areal and confounding effects explain features
shape: (n_features, 1 + n_confounders)
Returns:
np.array: The sampled categories for all sites, features and states
shape: n_sites, n_features, n_states
"""

n_clusters, n_sites = clusters.shape
_, n_features, n_states = probabilities['cluster_effect'].shape

# Are the weights fine?
assert np.allclose(a=np.sum(weights, axis=-1), b=1., rtol=EPS)

# Retrieve the assignment of sites to areal and confounding effects
# not all sites need to be assigned to one of the clusters or a confounder
assignment = [np.any(clusters, axis=0)]
o = 0
assignment_order = {"cluster_effect": o}

for k, v in confounders.items():
o += 1
assignment.append(np.any(v['membership'], axis=0))
assignment_order[k] = o

# Normalize the weights for each site depending on whether clusters or confounder are relevant for that site
normed_weights = normalize_weights(weights, np.array(assignment).T)
normed_weights = np.transpose(normed_weights, (1, 0, 2))

features = np.zeros((n_sites, n_features), dtype=int)

for feat in range(n_features):

# Compute the feature likelihood matrix (for all sites and all states)
lh_cluster_effect = clusters.T.dot(probabilities['cluster_effect'][:, feat, :]).T
lh_feature = normed_weights[feat, :, assignment_order['cluster_effect']] * lh_cluster_effect

for k, v in confounders.items():
lh_confounder = v['membership'].T.dot(probabilities[k][:, feat, :]).T
lh_feature += normed_weights[feat, :, assignment_order[k]] * lh_confounder

# Sample from the categorical distribution defined by lh_feature
features[:, feat] = sample_categorical(lh_feature.T)

return features


EYES = {}


Expand Down
17 changes: 7 additions & 10 deletions sbayes/sampling/sbayes_sampling.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from __future__ import annotations
import logging
import random as _random

import numpy as np
import scipy.stats as stats

from sbayes.sampling.mcmc import MCMC
from sbayes.model import normalize_weights, ConfoundingEffectsPrior
from sbayes.sampling.state import Sample, Clusters, ArrayParameter, GroupedParameters
from sbayes.model import normalize_weights
from sbayes.sampling.state import Sample
from sbayes.sampling.operators import (
AlterWeights,
AlterClusterEffect,
Expand All @@ -21,8 +19,7 @@
GibbsSampleClusterEffect,
GibbsSampleConfoundingEffects,
)
from sbayes.util import get_neighbours, normalize, dirichlet_logpdf, get_max_size_list
from sbayes.preprocessing import sample_categorical
from sbayes.util import get_neighbours, normalize, get_max_size_list
from sbayes.config.config import OperatorsConfig


Expand Down Expand Up @@ -102,7 +99,7 @@ def calculate_current_source_prob(self, sample: Sample, site_subset=None):
lh_per_component = likelihood.update_component_likelihoods(sample=sample)
weights = likelihood.update_weights(sample=sample)
source_posterior = normalize(lh_per_component[site_subset] * weights[site_subset], axis=-1)
is_source = np.where(sample.source.ravel())
is_source = np.where(sample.source.value.ravel())
return np.sum(np.log(source_posterior.ravel()[is_source]))

# todo: fix
Expand All @@ -111,7 +108,7 @@ def gibbsish_sample_clusters(self, sample: Sample, resample_source=True, **kwarg

sample_new = sample.copy()
likelihood = self.posterior_per_chain[sample.chain].likelihood
occupied = np.any(sample.clusters, axis=0)
occupied = np.any(sample.clusters.value, axis=0)

# Randomly choose one of the clusters to modify
i_cluster = np.random.choice(range(sample.clusters.shape[0]))
Expand Down Expand Up @@ -178,7 +175,7 @@ def random_subset(n, k):
marginal_lh_without_z = np.prod(feature_lh_without_z, axis=-1)

posterior_cluster = marginal_lh_with_z / (marginal_lh_with_z + marginal_lh_without_z)
new_cluster = sample.clusters[i_cluster].copy()
new_cluster = sample.clusters.value[i_cluster].copy()
new_cluster[available] = (np.random.random(n_available) < posterior_cluster)
sample_new.update_cluster(i_cluster, new_cluster)

Expand Down Expand Up @@ -527,7 +524,7 @@ def get_operators(self, operators_config: OperatorsConfig):
for k in self.model.confounders:
op_name = f"gibbs_sample_confounding_effects_{k}"
operators[op_name] = GibbsSampleConfoundingEffects(
weight=operators_config.confounding_effects,
weight=r * operators_config.confounding_effects,
confounder=k,
source_index=self.source_index['confounding_effects'][k],
model_by_chain=self.posterior_per_chain,
Expand Down
Loading

0 comments on commit a3373ac

Please sign in to comment.