mega simple commit so far!

hearues-zueke-github · Feb 11, 2021 · ea65143 · ea65143
1 parent d630c9d
commit ea65143
Show file tree

Hide file tree

Showing 11 changed files with 587 additions and 51 deletions.
diff --git a/clustering/try_k_mean_algorithm.py b/clustering/try_k_mean_algorithm.py
@@ -28,8 +28,6 @@
 
 import itertools
 
-import matplotlib.pyplot as plt
-
 import multiprocessing as mp
 
 PATH_ROOT_DIR = os.path.dirname(os.path.abspath(__file__)).replace("\\", "/")+"/"
@@ -47,30 +45,49 @@
 
 import utils_cluster
 
-if __name__ == '__main__':
-    l_n = [30, 100, 84]
+def main():
+    l_n = [380, 180, 284]
 
     l_mean = [(1, 3), (4, 8), (-1, 5)]
     l_std = [(1., 2.), (1., 0.7), (0.5, 1.25)]
 
-    l_v = [np.random.normal(mean, std, (n, 2)) for n, mean, std in zip(l_n, l_mean, l_std)]
+    l_v = [np.random.normal(mean, std, (n, 2)).astype(np.float128) for n, mean, std in zip(l_n, l_mean, l_std)]
 
     points = np.vstack(l_v)
 
+    cluster_amount = 3
+    iterations = 100
+
+    assert len(utils_cluster.l_color) >= cluster_amount
     # sys.exit()
 
-    cluster_points, arr_error = utils_cluster.calculate_clusters(points, 4, 100)
+    cluster_points, l_cluster_points_correspond, arr_error, l_error_cluster, l_cluster = utils_cluster.calculate_clusters(
+        points=points,
+        cluster_amount=cluster_amount,
+        iterations=iterations,
+    )
 
-    xs, ys = points.T
-    xs_c, ys_c = cluster_points.T
+    utils_cluster.get_plots(
+        cluster_points=cluster_points,
+        l_cluster_points_correspond=l_cluster_points_correspond,
+        arr_error=arr_error,
+        l_error_cluster=l_error_cluster,
+    )
 
-    plt.figure()
+    # xs, ys = points.T
 
-    plt.plot(xs, ys, color='#0000FF', marker='.', ms=2., ls='')
-    plt.plot(xs_c, ys_c, color='#00FF00', marker='.', ms=8., ls='')
+    dm =  utils_cluster.do_clustering_silhouette(points, l_cluster, cluster_amount)
+    l_cluster_val_s = dm.l_cluster_val_s
 
-    plt.figure()
+    # l_arr_val_s = np.array([(np.min(arr_val_s), np.median(arr_val_s), np.max(arr_val_s)) for arr_val_s in l_cluster_val_s])
+    # pprint(l_arr_val_s)
 
-    plt.plot(np.arange(0, arr_error.shape[0]), arr_error, color='#00FF00', marker='.', ms=8., ls='-')
+    l_mean_val_s = [np.mean(l) for l in l_cluster_val_s]
+    print('l_mean_val_s:')
+    pprint(l_mean_val_s)
 
-    plt.show()
+    return DotMap(locals(), _dynamic=None)
+
+
+if __name__ == '__main__':
+    dm = main()
diff --git a/clustering/utils_cluster.py b/clustering/utils_cluster.py
@@ -1,37 +1,166 @@
 from typing import List, Dict, Set, Mapping, Any, Tuple
 
+import matplotlib.pyplot as plt
+
+from dotmap import DotMap
 import numpy as np
 
+__version__ = '0.1.0'
 dm_obj_file_name = 'dm_obj.pkl.gz'
 
+# l_color = [
+#     '#00F020',
+#     '#008000',
+#     '#FF0000',
+#     '#0000FF',
+# ]
+
+l_hex_str = ['00', '40', '80', 'C0', 'FF']
+l_color = ['#{}{}{}'.format(col_r, col_g, col_b) for col_r in l_hex_str for col_g in l_hex_str for col_b in l_hex_str]
 
-def calculate_clusters(points : np.ndarray, cluster_amount : int, iterations : int) -> Tuple[np.ndarray, np.ndarray]:
+def calculate_clusters(points : np.ndarray, cluster_amount : int, iterations : int) \
+-> Tuple[np.ndarray, List[np.ndarray], np.ndarray, List[List[np.float128]], np.ndarray]:
     point_dim = points.shape[1]
     # cluster_amount <= points.shape[0] !!!
     cluster_points = points[np.random.permutation(np.arange(0, len(points)))[:cluster_amount]].copy()
-    print("before cluster_points:\n{}".format(cluster_points))
+    # print("before cluster_points:\n{}".format(cluster_points))
 
     # calc new clusters!
-    l_error : List[float] = []
+    l_error : List[np.float128] = []
+    l_error_cluster : List[List[np.float128]] = [[] for _ in range(0, cluster_amount)]
 
+    cluster_points_prev : np.ndarray = cluster_points.copy()
     i_nr : int
     for i_nr in range(0, iterations + 1):
         arr_sums_diff = np.sqrt(np.sum((points.reshape((-1, 1, point_dim)) - cluster_points.reshape((1, -1, point_dim)))**2, axis=2))
 
         arr_argmin = np.argmin(arr_sums_diff, axis=1)
-
-        # error = np.sum(arr_sums_diff)
-        error = np.sum(arr_sums_diff[:, arr_argmin])
-        l_error.append(error)
-        print("i_nr: {}, error: {}".format(i_nr, error))
-
+
+        u, c = np.unique(arr_argmin, return_counts=True)
+        assert c.shape[0] == cluster_amount
+
+        # error = np.sum(arr_sums_diff)     
+
+        cluster_points_prev[:] = cluster_points
+
+        l_error_cluster_one = []
+
         i : int
         for i in range(0, cluster_amount):
-            arr = points[arr_argmin==i]
+            arr_idxs : np.ndarray = arr_argmin==i
+            if arr_idxs.shape[0] == 0:
+                continue
+            arr = points[arr_idxs]
+            error_cluster = np.mean(arr_sums_diff[arr_idxs, i])
+            l_error_cluster_one.append(error_cluster)
+            l_error_cluster[i].append(error_cluster)
             cluster_points[i] = np.mean(arr, axis=0)
-
+
+        error = np.sum(l_error_cluster_one)
+        print("i_nr: {}, error: {}".format(i_nr, error))
+        l_error.append(error)
+
+        if np.all(np.equal(cluster_points, cluster_points_prev)):
+            break
+
         # print("- after cluster_points:\n{}".format(cluster_points))
 
     arr_error = np.array(l_error)
-
-    return cluster_points, arr_error
+
+    arr_sums_diff = np.sqrt(np.sum((points.reshape((-1, 1, point_dim)) - cluster_points.reshape((1, -1, point_dim))) ** 2, axis=2))
+    arr_argmin = np.argmin(arr_sums_diff, axis=1)
+    l_cluster_points_correspond = []
+    for i in range(0, cluster_amount):
+        l_cluster_points_correspond.append(points[arr_argmin==i])
+
+    return cluster_points, l_cluster_points_correspond, arr_error, l_error_cluster, arr_argmin
+
+
+def get_plots(cluster_points, l_cluster_points_correspond, arr_error, l_error_cluster):
+    cluster_amount = cluster_points.shape[0]
+    plt.close('all')
+
+    if cluster_points.shape[1] == 2:
+        plt.figure()
+
+        for color, cluster_points_correspond in zip(l_color, l_cluster_points_correspond):
+            xs_i, ys_i = cluster_points_correspond.T
+            plt.plot(xs_i, ys_i, color=color, marker='.', ms=2., ls='')
+        # plt.plot(xs, ys, color='#0000FF', marker='.', ms=2., ls='')
+        xs_c, ys_c = cluster_points.T
+        plt.plot(xs_c, ys_c, color='#00FF00', marker='.', ms=8., ls='', mec='#000000')
+
+        plt.title('Cluster scatter plot')
+        plt.tight_layout()
+
+
+    plt.figure()
+
+    plt.plot(np.arange(0, arr_error.shape[0]), arr_error, color='#00FF00', marker='.', ms=8., ls='-')
+
+    plt.title('Error curve')
+    plt.tight_layout()
+
+
+    plt.figure()
+
+    xs = np.arange(0, arr_error.shape[0])
+    l_handler_names = []
+    for i in range(0, cluster_amount):
+        p = plt.plot(xs, l_error_cluster[i], color=l_color[i], marker='.', ms=8., ls='-')[0]
+        l_handler_names.append((p, 'p{}'.format(i)))
+    # p0 = plt.plot(xs, l_error_cluster[0], color=l_color[0], marker='.', ms=8., ls='-')[0]
+    # p1 = plt.plot(xs, l_error_cluster[1], color=l_color[1], marker='.', ms=8., ls='-')[0]
+    # p2 = plt.plot(xs, l_error_cluster[2], color=l_color[2], marker='.', ms=8., ls='-')[0]
+    # p3 = plt.plot(xs, l_error_cluster[3], color=l_color[3], marker='.', ms=8., ls='-')[0]
+
+    # l_legend = [(p0, p1, p2, p3), ('p0', 'p1', 'p2', 'p3')]
+    plt.legend(*list(zip(*l_handler_names)))
+
+    plt.title('Many error curves')
+    plt.tight_layout()
+
+
+    plt.show()
+
+
+def do_clustering_silhouette(points, l_cluster, cluster_amount):
+    l_points_in_cluster = [[] for _ in range(0, cluster_amount)]
+    for point, c in zip(points, l_cluster):
+        l_points_in_cluster[c].append(point)
+
+    l_arr_points_in_cluster = [np.array(l) for l in l_points_in_cluster]
+
+    l_cluster_val_a = [np.zeros((arr.shape[0], )) for arr in l_arr_points_in_cluster]
+    l_cluster_val_b = [np.zeros((arr.shape[0], )) for arr in l_arr_points_in_cluster]
+    l_cluster_val_s = [np.zeros((arr.shape[0], )) for arr in l_arr_points_in_cluster]
+
+    for cluster_nr_i, (arr_a, arr_b, arr_s, arr_points_in_cluster_i) in enumerate(zip(
+        l_cluster_val_a, l_cluster_val_b, l_cluster_val_s, l_arr_points_in_cluster
+    ), 0):
+        rows = arr_points_in_cluster_i.shape[0]
+
+        if rows == 1:
+            arr_a[0] = 0
+            arr_b[0] = 0
+            arr_s[0] = 0
+            continue
+
+        for i, p1 in enumerate(arr_points_in_cluster_i, 0):
+            val_a = np.sum(np.sqrt(np.sum((arr_points_in_cluster_i - p1)**2, axis=1))) / (rows - 1)
+            arr_a[i] = val_a
+
+            l_b_vals = []
+            for cluster_nr_j, arr_points_in_cluster_j in enumerate(l_arr_points_in_cluster, 0):
+                if cluster_nr_j == cluster_nr_i:
+                    continue
+
+                val_b_i = np.sum(np.sqrt(np.sum((arr_points_in_cluster_j - p1)**2, axis=1))) / (rows - 1)
+                l_b_vals.append(val_b_i)
+
+            val_b = np.min(l_b_vals)
+            arr_b[i] = val_b
+
+            arr_s[i] = 1 - val_a / val_b if val_a < val_b else val_b / val_a - 1
+
+    return DotMap(locals(), _dynamic=None)
diff --git a/cpp_programs/boost_programs/Makefile b/cpp_programs/boost_programs/Makefile
@@ -0,0 +1,2 @@
+default:
+	g++ -Wall example_boost_1.cpp -lboost_program_options -o example_boost_1_prog.o
diff --git a/cpp_programs/boost_programs/example_boost_1.cpp b/cpp_programs/boost_programs/example_boost_1.cpp
@@ -0,0 +1,40 @@
+#include <boost/program_options.hpp>
+#include <iostream>
+
+namespace opt = boost::program_options;
+
+int main(int argc, char *argv[])
+{
+    // Constructing an options describing variable and giving it a
+    // textual description "All options".
+    opt::options_description desc("All options");
+
+    // When we are adding options, first parameter is a name
+    // to be used in command line. Second parameter is a type
+    // of that option, wrapped in value<> class. Third parameter
+    // must be a short description of that option.
+    desc.add_options()
+        ("apples", opt::value<int>(), "how many apples do you have")
+        ("oranges", opt::value<int>(), "how many oranges do you have")
+        ("help", "produce help message")
+    ;
+
+    // Variable to store our command line arguments.
+    opt::variables_map vm;
+
+    // Parsing and storing arguments.
+    opt::store(opt::parse_command_line(argc, argv, desc), vm);
+
+    // Must be called after all the parsing and storing.
+    opt::notify(vm);
+
+    if (vm.count("help")) {
+        std::cout << desc << "\n";
+        return 1;
+    }
+
+    std::cout << "Fruits count: "
+        << vm["apples"].as<int>() + vm["oranges"].as<int>()
+        << std::endl;
+
+} // end of `main`
diff --git a/cpp_programs/boost_programs/example_boost_float128.cpp b/cpp_programs/boost_programs/example_boost_float128.cpp
@@ -0,0 +1,40 @@
+#include <boost/program_options.hpp>
+#include <iostream>
+
+namespace opt = boost::program_options;
+
+int main(int argc, char *argv[])
+{
+    // Constructing an options describing variable and giving it a
+    // textual description "All options".
+    opt::options_description desc("All options");
+
+    // When we are adding options, first parameter is a name
+    // to be used in command line. Second parameter is a type
+    // of that option, wrapped in value<> class. Third parameter
+    // must be a short description of that option.
+    desc.add_options()
+        ("apples", opt::value<int>(), "how many apples do you have")
+        ("oranges", opt::value<int>(), "how many oranges do you have")
+        ("help", "produce help message")
+    ;
+
+    // Variable to store our command line arguments.
+    opt::variables_map vm;
+
+    // Parsing and storing arguments.
+    opt::store(opt::parse_command_line(argc, argv, desc), vm);
+
+    // Must be called after all the parsing and storing.
+    opt::notify(vm);
+
+    if (vm.count("help")) {
+        std::cout << desc << "\n";
+        return 1;
+    }
+
+    std::cout << "Fruits count: "
+        << vm["apples"].as<int>() + vm["oranges"].as<int>()
+        << std::endl;
+
+} // end of `main`
diff --git a/game_of_life/execute_many_times.sh b/game_of_life/execute_many_times.sh
@@ -1,5 +1,5 @@
 #! /bin/bash
 
-for i in {1..20}; do
+for i in {1..5}; do
     ./generic_automaton_fields.py
 done
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		default:
		g++ -Wall example_boost_1.cpp -lboost_program_options -o example_boost_1_prog.o