Skip to content

Commit

Permalink
mega simple commit so far!
Browse files Browse the repository at this point in the history
  • Loading branch information
hearues-zueke-github committed Feb 11, 2021
1 parent d630c9d commit ea65143
Show file tree
Hide file tree
Showing 11 changed files with 587 additions and 51 deletions.
45 changes: 31 additions & 14 deletions clustering/try_k_mean_algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,6 @@

import itertools

import matplotlib.pyplot as plt

import multiprocessing as mp

PATH_ROOT_DIR = os.path.dirname(os.path.abspath(__file__)).replace("\\", "/")+"/"
Expand All @@ -47,30 +45,49 @@

import utils_cluster

if __name__ == '__main__':
l_n = [30, 100, 84]
def main():
l_n = [380, 180, 284]

l_mean = [(1, 3), (4, 8), (-1, 5)]
l_std = [(1., 2.), (1., 0.7), (0.5, 1.25)]

l_v = [np.random.normal(mean, std, (n, 2)) for n, mean, std in zip(l_n, l_mean, l_std)]
l_v = [np.random.normal(mean, std, (n, 2)).astype(np.float128) for n, mean, std in zip(l_n, l_mean, l_std)]

points = np.vstack(l_v)

cluster_amount = 3
iterations = 100

assert len(utils_cluster.l_color) >= cluster_amount
# sys.exit()

cluster_points, arr_error = utils_cluster.calculate_clusters(points, 4, 100)
cluster_points, l_cluster_points_correspond, arr_error, l_error_cluster, l_cluster = utils_cluster.calculate_clusters(
points=points,
cluster_amount=cluster_amount,
iterations=iterations,
)

xs, ys = points.T
xs_c, ys_c = cluster_points.T
utils_cluster.get_plots(
cluster_points=cluster_points,
l_cluster_points_correspond=l_cluster_points_correspond,
arr_error=arr_error,
l_error_cluster=l_error_cluster,
)

plt.figure()
# xs, ys = points.T

plt.plot(xs, ys, color='#0000FF', marker='.', ms=2., ls='')
plt.plot(xs_c, ys_c, color='#00FF00', marker='.', ms=8., ls='')
dm = utils_cluster.do_clustering_silhouette(points, l_cluster, cluster_amount)
l_cluster_val_s = dm.l_cluster_val_s

plt.figure()
# l_arr_val_s = np.array([(np.min(arr_val_s), np.median(arr_val_s), np.max(arr_val_s)) for arr_val_s in l_cluster_val_s])
# pprint(l_arr_val_s)

plt.plot(np.arange(0, arr_error.shape[0]), arr_error, color='#00FF00', marker='.', ms=8., ls='-')
l_mean_val_s = [np.mean(l) for l in l_cluster_val_s]
print('l_mean_val_s:')
pprint(l_mean_val_s)

plt.show()
return DotMap(locals(), _dynamic=None)


if __name__ == '__main__':
dm = main()
155 changes: 142 additions & 13 deletions clustering/utils_cluster.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,166 @@
from typing import List, Dict, Set, Mapping, Any, Tuple

import matplotlib.pyplot as plt

from dotmap import DotMap
import numpy as np

__version__ = '0.1.0'
dm_obj_file_name = 'dm_obj.pkl.gz'

# l_color = [
# '#00F020',
# '#008000',
# '#FF0000',
# '#0000FF',
# ]

l_hex_str = ['00', '40', '80', 'C0', 'FF']
l_color = ['#{}{}{}'.format(col_r, col_g, col_b) for col_r in l_hex_str for col_g in l_hex_str for col_b in l_hex_str]

def calculate_clusters(points : np.ndarray, cluster_amount : int, iterations : int) -> Tuple[np.ndarray, np.ndarray]:
def calculate_clusters(points : np.ndarray, cluster_amount : int, iterations : int) \
-> Tuple[np.ndarray, List[np.ndarray], np.ndarray, List[List[np.float128]], np.ndarray]:
point_dim = points.shape[1]
# cluster_amount <= points.shape[0] !!!
cluster_points = points[np.random.permutation(np.arange(0, len(points)))[:cluster_amount]].copy()
print("before cluster_points:\n{}".format(cluster_points))
# print("before cluster_points:\n{}".format(cluster_points))

# calc new clusters!
l_error : List[float] = []
l_error : List[np.float128] = []
l_error_cluster : List[List[np.float128]] = [[] for _ in range(0, cluster_amount)]

cluster_points_prev : np.ndarray = cluster_points.copy()
i_nr : int
for i_nr in range(0, iterations + 1):
arr_sums_diff = np.sqrt(np.sum((points.reshape((-1, 1, point_dim)) - cluster_points.reshape((1, -1, point_dim)))**2, axis=2))

arr_argmin = np.argmin(arr_sums_diff, axis=1)

# error = np.sum(arr_sums_diff)
error = np.sum(arr_sums_diff[:, arr_argmin])
l_error.append(error)
print("i_nr: {}, error: {}".format(i_nr, error))


u, c = np.unique(arr_argmin, return_counts=True)
assert c.shape[0] == cluster_amount

# error = np.sum(arr_sums_diff)

cluster_points_prev[:] = cluster_points

l_error_cluster_one = []

i : int
for i in range(0, cluster_amount):
arr = points[arr_argmin==i]
arr_idxs : np.ndarray = arr_argmin==i
if arr_idxs.shape[0] == 0:
continue
arr = points[arr_idxs]
error_cluster = np.mean(arr_sums_diff[arr_idxs, i])
l_error_cluster_one.append(error_cluster)
l_error_cluster[i].append(error_cluster)
cluster_points[i] = np.mean(arr, axis=0)


error = np.sum(l_error_cluster_one)
print("i_nr: {}, error: {}".format(i_nr, error))
l_error.append(error)

if np.all(np.equal(cluster_points, cluster_points_prev)):
break

# print("- after cluster_points:\n{}".format(cluster_points))

arr_error = np.array(l_error)

return cluster_points, arr_error

arr_sums_diff = np.sqrt(np.sum((points.reshape((-1, 1, point_dim)) - cluster_points.reshape((1, -1, point_dim))) ** 2, axis=2))
arr_argmin = np.argmin(arr_sums_diff, axis=1)
l_cluster_points_correspond = []
for i in range(0, cluster_amount):
l_cluster_points_correspond.append(points[arr_argmin==i])

return cluster_points, l_cluster_points_correspond, arr_error, l_error_cluster, arr_argmin


def get_plots(cluster_points, l_cluster_points_correspond, arr_error, l_error_cluster):
cluster_amount = cluster_points.shape[0]
plt.close('all')

if cluster_points.shape[1] == 2:
plt.figure()

for color, cluster_points_correspond in zip(l_color, l_cluster_points_correspond):
xs_i, ys_i = cluster_points_correspond.T
plt.plot(xs_i, ys_i, color=color, marker='.', ms=2., ls='')
# plt.plot(xs, ys, color='#0000FF', marker='.', ms=2., ls='')
xs_c, ys_c = cluster_points.T
plt.plot(xs_c, ys_c, color='#00FF00', marker='.', ms=8., ls='', mec='#000000')

plt.title('Cluster scatter plot')
plt.tight_layout()


plt.figure()

plt.plot(np.arange(0, arr_error.shape[0]), arr_error, color='#00FF00', marker='.', ms=8., ls='-')

plt.title('Error curve')
plt.tight_layout()


plt.figure()

xs = np.arange(0, arr_error.shape[0])
l_handler_names = []
for i in range(0, cluster_amount):
p = plt.plot(xs, l_error_cluster[i], color=l_color[i], marker='.', ms=8., ls='-')[0]
l_handler_names.append((p, 'p{}'.format(i)))
# p0 = plt.plot(xs, l_error_cluster[0], color=l_color[0], marker='.', ms=8., ls='-')[0]
# p1 = plt.plot(xs, l_error_cluster[1], color=l_color[1], marker='.', ms=8., ls='-')[0]
# p2 = plt.plot(xs, l_error_cluster[2], color=l_color[2], marker='.', ms=8., ls='-')[0]
# p3 = plt.plot(xs, l_error_cluster[3], color=l_color[3], marker='.', ms=8., ls='-')[0]

# l_legend = [(p0, p1, p2, p3), ('p0', 'p1', 'p2', 'p3')]
plt.legend(*list(zip(*l_handler_names)))

plt.title('Many error curves')
plt.tight_layout()


plt.show()


def do_clustering_silhouette(points, l_cluster, cluster_amount):
l_points_in_cluster = [[] for _ in range(0, cluster_amount)]
for point, c in zip(points, l_cluster):
l_points_in_cluster[c].append(point)

l_arr_points_in_cluster = [np.array(l) for l in l_points_in_cluster]

l_cluster_val_a = [np.zeros((arr.shape[0], )) for arr in l_arr_points_in_cluster]
l_cluster_val_b = [np.zeros((arr.shape[0], )) for arr in l_arr_points_in_cluster]
l_cluster_val_s = [np.zeros((arr.shape[0], )) for arr in l_arr_points_in_cluster]

for cluster_nr_i, (arr_a, arr_b, arr_s, arr_points_in_cluster_i) in enumerate(zip(
l_cluster_val_a, l_cluster_val_b, l_cluster_val_s, l_arr_points_in_cluster
), 0):
rows = arr_points_in_cluster_i.shape[0]

if rows == 1:
arr_a[0] = 0
arr_b[0] = 0
arr_s[0] = 0
continue

for i, p1 in enumerate(arr_points_in_cluster_i, 0):
val_a = np.sum(np.sqrt(np.sum((arr_points_in_cluster_i - p1)**2, axis=1))) / (rows - 1)
arr_a[i] = val_a

l_b_vals = []
for cluster_nr_j, arr_points_in_cluster_j in enumerate(l_arr_points_in_cluster, 0):
if cluster_nr_j == cluster_nr_i:
continue

val_b_i = np.sum(np.sqrt(np.sum((arr_points_in_cluster_j - p1)**2, axis=1))) / (rows - 1)
l_b_vals.append(val_b_i)

val_b = np.min(l_b_vals)
arr_b[i] = val_b

arr_s[i] = 1 - val_a / val_b if val_a < val_b else val_b / val_a - 1

return DotMap(locals(), _dynamic=None)
2 changes: 2 additions & 0 deletions cpp_programs/boost_programs/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
default:
g++ -Wall example_boost_1.cpp -lboost_program_options -o example_boost_1_prog.o
40 changes: 40 additions & 0 deletions cpp_programs/boost_programs/example_boost_1.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#include <boost/program_options.hpp>
#include <iostream>

namespace opt = boost::program_options;

int main(int argc, char *argv[])
{
// Constructing an options describing variable and giving it a
// textual description "All options".
opt::options_description desc("All options");

// When we are adding options, first parameter is a name
// to be used in command line. Second parameter is a type
// of that option, wrapped in value<> class. Third parameter
// must be a short description of that option.
desc.add_options()
("apples", opt::value<int>(), "how many apples do you have")
("oranges", opt::value<int>(), "how many oranges do you have")
("help", "produce help message")
;

// Variable to store our command line arguments.
opt::variables_map vm;

// Parsing and storing arguments.
opt::store(opt::parse_command_line(argc, argv, desc), vm);

// Must be called after all the parsing and storing.
opt::notify(vm);

if (vm.count("help")) {
std::cout << desc << "\n";
return 1;
}

std::cout << "Fruits count: "
<< vm["apples"].as<int>() + vm["oranges"].as<int>()
<< std::endl;

} // end of `main`
40 changes: 40 additions & 0 deletions cpp_programs/boost_programs/example_boost_float128.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#include <boost/program_options.hpp>
#include <iostream>

namespace opt = boost::program_options;

int main(int argc, char *argv[])
{
// Constructing an options describing variable and giving it a
// textual description "All options".
opt::options_description desc("All options");

// When we are adding options, first parameter is a name
// to be used in command line. Second parameter is a type
// of that option, wrapped in value<> class. Third parameter
// must be a short description of that option.
desc.add_options()
("apples", opt::value<int>(), "how many apples do you have")
("oranges", opt::value<int>(), "how many oranges do you have")
("help", "produce help message")
;

// Variable to store our command line arguments.
opt::variables_map vm;

// Parsing and storing arguments.
opt::store(opt::parse_command_line(argc, argv, desc), vm);

// Must be called after all the parsing and storing.
opt::notify(vm);

if (vm.count("help")) {
std::cout << desc << "\n";
return 1;
}

std::cout << "Fruits count: "
<< vm["apples"].as<int>() + vm["oranges"].as<int>()
<< std::endl;

} // end of `main`
2 changes: 1 addition & 1 deletion game_of_life/execute_many_times.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#! /bin/bash

for i in {1..20}; do
for i in {1..5}; do
./generic_automaton_fields.py
done
Loading

0 comments on commit ea65143

Please sign in to comment.