Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Features/607 som #760

Draft
wants to merge 26 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
0b4c4c1
Initial Commit
Inzlinger Jun 29, 2020
543ef69
WIP
Inzlinger Jul 6, 2020
4ab210d
WIP
Inzlinger Jul 8, 2020
bbc054e
Merge branch 'master' of github.com:helmholtz-analytics/heat into fea…
Inzlinger Jul 8, 2020
42520de
Merge branch 'master' of github.com:helmholtz-analytics/heat into fea…
Inzlinger Jul 17, 2020
09e85ff
WIP
Inzlinger Jul 17, 2020
8119054
Merge branch 'master' of github.com:helmholtz-analytics/heat into fea…
Inzlinger Jul 23, 2020
85b8747
Implementation som
Inzlinger Jul 23, 2020
a9b1fc7
Merge branch 'master' of github.com:helmholtz-analytics/heat into fea…
Inzlinger Jul 31, 2020
d04d877
SOM with batches, bugfix operations
Inzlinger Aug 17, 2020
fba5c36
Bugfix in SOM
Inzlinger Aug 19, 2020
cd4c97b
SOM more memory efficient
Inzlinger Aug 19, 2020
2a0a196
Merge branch 'master' of https://github.com/helmholtz-analytics/heat …
Aug 20, 2020
2da3e17
Merge branch 'master' of github.com:helmholtz-analytics/heat into fea…
Inzlinger Aug 20, 2020
12a43d1
SOM fit_batch with smaller batches
Inzlinger Aug 21, 2020
4a8315b
Merge branch 'features/607-som' of https://github.com/helmholtz-analy…
Aug 21, 2020
ee8c89f
SOM fit_batch with smaller batches
Inzlinger Aug 21, 2020
ff2e798
Merge branch 'features/607-som' of github.com:helmholtz-analytics/hea…
Inzlinger Aug 21, 2020
79425e7
Merge branch 'features/607-som' of https://github.com/helmholtz-analy…
Aug 21, 2020
7beb9c4
Merge branch 'master' of github.com:helmholtz-analytics/heat into fea…
Inzlinger Nov 16, 2020
3bfb7fc
Cleanup
Inzlinger Apr 17, 2021
4f15226
Merge branch 'master' of github.com:helmholtz-analytics/heat into fea…
Inzlinger Apr 17, 2021
0a1e9e2
Adjusted paramters of examples
Inzlinger Apr 17, 2021
089f9c4
precompute batching
Inzlinger Apr 17, 2021
4421f3d
Merge branch 'master' of github.com:helmholtz-analytics/heat into fea…
Inzlinger Nov 11, 2021
69926fb
Doc
Inzlinger Nov 11, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions examples/classification/demo_knn.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,6 @@ def create_fold(dataset_x, dataset_y, size, seed=None):
random.seed(seed)
indices = [i for i in range(data_length)]
random.shuffle(indices)

data_indices = ht.array(indices[0:size], split=0)
verification_indices = ht.array(indices[size:], split=0)

Expand Down Expand Up @@ -142,4 +141,4 @@ def verify_algorithm(x, y, split_number, split_size, k, seed=None):
return accuracies


print(verify_algorithm(X, Y, 1, 30, 5, 1))
print(verify_algorithm(X, Y, 10, 30, 5))
40 changes: 40 additions & 0 deletions examples/som/demo_fixedsom.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import sys
import os
import matplotlib.pyplot as plt
import matplotlib.cm as cm

# Fix python Path if run from terminal
curdir = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, os.path.abspath(os.path.join(curdir, "../../")))

import heat as ht
from heat.som.som import FixedSOM

# Load Dataset from hdf5 file
X = ht.load_hdf5("../../heat/datasets/iris.h5", dataset="data", split=0)

# Generate Keys for the iris.h5 dataset
keys = []
for i in range(50):
keys.append(0)
for i in range(50, 100):
keys.append(1)
for i in range(100, 150):
keys.append(2)
Y = ht.array(keys, split=0)

som = FixedSOM(
10,
10,
4,
initial_learning_rate=0.1,
target_learning_rate=0.01,
initial_radius=6,
target_radius=2,
max_epoch=400,
batch_size=75,
seed=1,
)
som.fit(X)

print(som.umatrix())
107 changes: 107 additions & 0 deletions examples/som/demo_knn_som.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
import sys
import os
import random

# Fix python Path if run from terminal
curdir = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, os.path.abspath(os.path.join(curdir, "../../")))

import heat as ht
from heat.som.som import FixedSOM
from heat.classification.knn import KNN

X = ht.load_hdf5("../../heat/datasets/iris.h5", dataset="data", split=0)

keys = []
for i in range(50):
keys.append(0)
for i in range(50, 100):
keys.append(1)
for i in range(100, 150):
keys.append(2)
Y = ht.array(keys, split=0)


def create_fold(dataset_x, dataset_y, size, seed=None):
"""
Randomly splits the dataset into two parts for cross-validation.

Parameters
----------
dataset_x : ht.DNDarray
data vectors, required
dataset_y : ht.DNDarray
labels for dataset_x, required
size : int
the size of the split to create
seed: int, optional
seed for the random generator, allows deterministic testing

Returns
----------
fold_x : ht.DNDarray
DNDarray of shape (size,) containing data vectors from dataset_x
fold_y : ht.DNDarray
DNDarray of shape(size,) containing labels from dataset_y
verification_x : ht.DNDarray
DNDarray of shape(len(dataset_x - size),) containing all items from dataset_x not in fold_x
verification_y : ht.DNDarray
DNDarray of shape(len(dataset_y - size),) containing all items from dataset_y not in fold_y
"""
assert len(dataset_y) == len(dataset_x)
assert size < len(dataset_x)

data_length = len(dataset_x)

if seed:
random.seed(seed)
indices = [i for i in range(data_length)]
random.shuffle(indices)
data_indices = ht.array(indices[0:size], split=0)
verification_indices = ht.array(indices[size:], split=0)

fold_x = ht.array(dataset_x[data_indices], is_split=0)
fold_y = ht.array(dataset_y[data_indices], is_split=0)
verification_y = ht.array(dataset_y[verification_indices], is_split=0)
verification_x = ht.array(dataset_x[verification_indices], is_split=0)

# Balance arrays
fold_x.balance_()
fold_y.balance_()
verification_y.balance_()
verification_x.balance_()

return fold_x, fold_y, verification_x, verification_y


def test_net(som, x, y, split_number, split_size, seed=None):
accuracies = []
for split in range(split_number):
fold_x, fold_y, verification_x, verification_y = create_fold(x, y, split_size, seed)

new_x = som.predict(fold_x)
verification_x = som.predict(verification_x)
knn = KNN(new_x, fold_y, 5)
result = knn.predict(verification_x)
accuracies.append(
(ht.sum(ht.where(result == verification_y, 1, 0)) / verification_y.shape[0]).item()
)
return accuracies


som = FixedSOM(
10,
10,
4,
initial_learning_rate=0.1,
target_learning_rate=0.01,
initial_radius=8,
target_radius=2,
max_epoch=100,
batch_size=150,
seed=1,
)

som.fit(X)
# print(som.umatrix())
print(test_net(som, X, Y, 10, 30), 0)
13 changes: 13 additions & 0 deletions heat/core/_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from . import statistics
from .dndarray import DNDarray
from . import types
from .manipulations import resplit

from typing import Callable, Optional, Type, Union, Dict

Expand Down Expand Up @@ -142,6 +143,18 @@ def __binary_op(
newcomm.Bcast(t2)
newcomm.Free()

if (
len(t2.shape) < len(output_shape)
or t2.shape[t2.split] != output_shape[t2.split]
):
if t2.shape[t2.split] > 1 and t2.comm.is_distributed():
t2 = resplit(t2)
elif (
len(t1.shape) < len(output_shape)
or t1.shape[t1.split] != output_shape[t1.split]
):
if t1.shape[t1.split] > 1 and t1.comm.is_distributed():
t1 = resplit(t1)
else:
raise TypeError(
"Only tensors and numeric scalars are supported, but input was {}".format(type(t2))
Expand Down
5 changes: 5 additions & 0 deletions heat/som/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""
This is the heat.nn submodule.

It contains modules for creating and using self organizing maps
"""
Loading