Skip to content

Commit

Permalink
- Updated LANDMark version
Browse files Browse the repository at this point in the history
- TreeOrdination can now take advantage of LANDMark's proximity measures (both using terminal nodes as features or all nodes in the decision path as features)
- Data is cast into the np.float32 dtype for the CLRClosureTransformer
  • Loading branch information
jrudar committed Jul 12, 2023
1 parent 3b1b51f commit b27da44
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 18 deletions.
28 changes: 17 additions & 11 deletions TreeOrdination/TreeOrdination.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
from sklearn.base import ClassifierMixin, BaseEstimator, clone
from sklearn.decomposition import PCA

from scipy.sparse import csr_array
from scipy.sparse import hstack as sp_hstack

from umap import UMAP

from LANDMark import LANDMarkClassifier
Expand Down Expand Up @@ -61,6 +64,7 @@ def __init__(
feature_names,
resampler=None,
metric="hamming",
prox_method = "terminal",
supervised_clf=ExtraTreesClassifier(1024),
proxy_model = ExtraTreesRegressor(1024),
landmark_model = LANDMarkClassifier(160, use_nnet=False, n_jobs = 8),
Expand All @@ -76,6 +80,7 @@ def __init__(
self.resampler = resampler

self.metric = metric
self.prox_method = prox_method

self.supervised_clf = supervised_clf
self.proxy_model = proxy_model
Expand Down Expand Up @@ -117,20 +122,22 @@ def get_initial_embedding(self, X):

# Get proximity
X_trf = resampler.transform(X)
self.LM_emb.append(model.proximity(X_trf))

# Update Overall Proximity
if i > 0:
self.LM_emb = sp_hstack((self.LM_emb, model.proximity(X_trf, self.prox_method)))
else:
self.LM_emb = model.proximity(X_trf, self.prox_method)

# Save the resampler
self.transformers.append(resampler)

# Get Overall Proximity
self.LM_emb = np.hstack(self.LM_emb)

# Get Embeddings
self.UMAP_trf = UMAP(
n_neighbors=self.n_neighbors,
n_components=15,
min_dist=self.min_dist,
metric=self.metric,
metric="hamming",
densmap=False,
).fit(self.LM_emb)

Expand Down Expand Up @@ -248,7 +255,7 @@ def plot_projection(self, X, y, ax_1=0, ax_2=1, use_approx=True, trf_type = "PCA

def predict_proba(self, X):

tree_emb = self.emb_transform(X, "LM")
tree_emb = self.emb_transform(X, "UMAP")

P = self.p_model.predict_proba(tree_emb)

Expand Down Expand Up @@ -281,11 +288,10 @@ def emb_transform(self, X, trf_type = "PCA"):
transformer = self.transformers[i]

# Get proximity
proximity = model.proximity(transformer.transform(X))

tree_emb.append(proximity)

tree_emb = np.hstack(tree_emb)
if i != 0:
tree_emb = sp_hstack((tree_emb, model.proximity(transformer.transform(X), self.prox_method)))
else:
tree_emb = model.proximity(transformer.transform(X), self.prox_method)

if trf_type == "LM":
return tree_emb
Expand Down
8 changes: 4 additions & 4 deletions TreeOrdination/transformers_treeord.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,18 @@ def __init__(self, do_clr=False, delta=None):
def fit_transform(self, X, y=None, **kwargs):

if self.do_clr:
return clr(multiplicative_replacement(closure(X), delta=self.delta))
return clr(multiplicative_replacement(closure(X), delta=self.delta)).astype(np.float32)

else:
return closure(X)
return closure(X).astype(np.float32)

def transform(self, X, y=None, **kwargs):

if self.do_clr:
return clr(multiplicative_replacement(closure(X), delta=self.delta))
return clr(multiplicative_replacement(closure(X), delta=self.delta)).astype(np.float32)

else:
return closure(X)
return closure(X).astype(np.float32)


class ResampleRandomizeTransform(BaseEstimator, TransformerMixin):
Expand Down
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ dependencies:
- python >=3.8
- pip
- pip:
- LANDMarkClassifier >= 2.0.0
- LANDMarkClassifier >= 2.1.0
- numpy == 1.23.5
- scikit-learn >= 1.1.2
- scikit-bio >= 0.5.8
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ requires = ["hatchling"]

[project]
name = "TreeOrdination"
version = "1.3.2"
version = "1.3.4"
authors = [
{name = "Josip Rudar", email = "[email protected]"},
{name = "G. Brian Golding"},
Expand Down Expand Up @@ -40,7 +40,7 @@ dependencies = [
"umap-learn >= 0.5.3",
"seaborn",
"shap >= 0.40.0",
"LANDMarkClassifier >= 2.0.4"
"LANDMarkClassifier >= 2.1.0"
]

[tool.hatch.metadata]
Expand Down

0 comments on commit b27da44

Please sign in to comment.