From d7914ad9e19dcd8b729ffba8dc0cfe2dda336e53 Mon Sep 17 00:00:00 2001 From: Hearues Zueke Date: Mon, 26 Oct 2020 20:40:28 +0100 Subject: [PATCH] new old forgotten commits --- .gitignore | 2 + compress_enwiki8/.gitignore | 1 + compress_enwiki8/compress_enwik8_attempt_2.py | 192 ++++++++++ compress_enwiki8/compress_enwiki8.py | 102 ++++- compress_enwiki8/create_stats_enwik8.py | 69 ++++ global_object_getter_setter.py | 7 +- guis/tkinter_guis/.gitignore | 3 + pixel_images/config_file.py | 3 +- pixel_images/create_pixel_images.py | 351 +++++++++++++++++- .../check_different_map_datatypes.py | 84 ++++- vectorize_programs/gpu_example.py | 47 +++ 11 files changed, 818 insertions(+), 43 deletions(-) create mode 100644 compress_enwiki8/compress_enwik8_attempt_2.py create mode 100755 compress_enwiki8/create_stats_enwik8.py create mode 100755 vectorize_programs/gpu_example.py diff --git a/.gitignore b/.gitignore index d1c6815..06ea1d5 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +nim + .idea *.pyc diff --git a/compress_enwiki8/.gitignore b/compress_enwiki8/.gitignore index b8adf34..10c7c4b 100644 --- a/compress_enwiki8/.gitignore +++ b/compress_enwiki8/.gitignore @@ -1,2 +1,3 @@ *.pkl.gz *.hex +data_enwik8 diff --git a/compress_enwiki8/compress_enwik8_attempt_2.py b/compress_enwiki8/compress_enwik8_attempt_2.py new file mode 100644 index 0000000..8e7dfc2 --- /dev/null +++ b/compress_enwiki8/compress_enwik8_attempt_2.py @@ -0,0 +1,192 @@ +#! /usr/bin/python3 + +# -*- coding: utf-8 -*- + +import dill +import gzip +import os +import sys + +# import tempfile +from memory_tempfile import MemoryTempfile +tempfile = MemoryTempfile() + +from collections import defaultdict +from copy import deepcopy +from dotmap import DotMap +from operator import itemgetter + +from os.path import expanduser + +import multiprocessing as mp + +PATH_ROOT_DIR = os.path.dirname(os.path.abspath(__file__)).replace("\\", "/")+"/" +HOME_DIR = os.path.expanduser("~") +TEMP_DIR = tempfile.gettempdir()+"/" + +from PIL import Image + +import numpy as np + +sys.path.append("../") +from utils_serialization import get_pkl_gz_obj, save_pkl_gz_obj +import global_object_getter_setter + +import utils_compress_enwik8 + +from create_stats_enwik8 import calc_sorted_stats + +def calc_stats_using_bytes_tuple(arr, max_len): + used_len = 1000000 + max_amount_values = 50 + # max_len = 6 + l_stat = [] + s_chars = set() + for pos_i in range(0, arr.shape[0], used_len): + arr_1 = arr[pos_i:pos_i+used_len+max_len].reshape((-1, 1)) + print("pos_i: {:9}, {:9}".format(pos_i, pos_i+used_len)) + for _ in range(0, max_len-1): + arr_1 = np.hstack((arr_1[:-1], arr_1[1:, -1:])) + u, c = np.unique(arr_1.reshape((-1, )).view(','.join(['u1']*max_len)), return_counts=True) + if max_len == 1: + d = {tuple((t, )): j for t, j in zip(u, c)} + else: + d = {tuple(t): j for t, j in zip(u, c)} + + # get the max len for each seperate combined bytes! + l_t, l_j = list(zip(*list(d.items()))) + i_max = np.argmax(l_j) + + print("- max_len: {:2}, amount: {:10}, mult: {:10}, t: {}".format(max_len, l_j[i_max], max_len*l_j[i_max], l_t[i_max])) + print("-- len(d): {}".format(len(d))) + s_chars |= set(list(d.keys())) + + l = list(d.items()) + l_sort = sorted(list(d.items()), reverse=True, key=lambda x: (x[1], x[0])) + + l_stat.append('{:9},{:9}:{}'.format( + pos_i, + pos_i+used_len, + '|'.join(['{},{:5}'.format(''.join(map(lambda x: '{:02X}'.format(x), t)), c) for t, c in l_sort[:max_amount_values]]) + )) + l = sorted(s_chars) + print("l: {}".format(l)) + print("len(l): {}".format(len(l))) + + with open(TEMP_DIR+'enwik8_stats_max_len_{}.txt'.format(max_len), 'w') as f: + f.write('\n'.join(l_stat)+'\n') + + +if __name__ == "__main__": + file_object_name = 'global_compress_enwik8_attempt_2_object' + + if not global_object_getter_setter.do_object_exist(file_object_name): + arr = utils_compress_enwik8.get_arr(used_length=-1) + + + # calc_stats_using_bytes_tuple(arr, 1) + + # l_proc = [] + # cpu_count = mp.cpu_count() + # for i in range(2, cpu_count+2): + # l_proc.append(mp.Process(target=calc_stats_using_bytes_tuple, args=(arr, i))) + # for proc in l_proc: proc.start() + # for proc in l_proc: proc.join() + + d_all_part, l_sort = calc_sorted_stats() + d3 = d_all_part[3] + l_k_3 = list(d3.keys()) + l_k_i_2_byte = [(k, (0, i)) for i, k in enumerate(l_k_3, 0)] + + l_sort_ge_4_byte = sorted([(len(k)*v, -len(k), v, k) for k1 in range(4, 14) for k, v in d_all_part[k1].items()], reverse=True) + l_k_i_2_byte += [(k, (0, i)) for i, (_, _, _, k) in enumerate(l_sort_ge_4_byte[:256-len(l_k_3)], len(l_k_3))] + + l_k_i_3_byte = [(k, (0, i//256, i%256)) for i, (_, _, _, k) in enumerate(l_sort_ge_4_byte[256-len(l_k_3):], 0)] + + + d_obj = { + 'arr': arr, + 'd_all_part': d_all_part, + 'l_sort': l_sort, + 'd3': d3, + 'l_k_3': l_k_3, + 'l_k_i_2_byte': l_k_i_2_byte, + 'l_sort_ge_4_byte': l_sort_ge_4_byte, + 'l_k_i_2_byte': l_k_i_2_byte, + 'l_k_i_3_byte': l_k_i_3_byte, + # 'd_k_to_count': d_k_to_count, + # 'd_k_to_i_byte': d_k_to_i_byte, + # 'l_arr': l_arr, + } + print('Save global DATA!') + global_object_getter_setter.save_object(file_object_name, d_obj) + else: + print('Load global DATA!') + d_obj = global_object_getter_setter.load_object(file_object_name) + arr = d_obj['arr'] + d_all_part = d_obj['d_all_part'] + l_sort = d_obj['l_sort'] + d3 = d_obj['d3'] + l_k_3 = d_obj['l_k_3'] + l_k_i_2_byte = d_obj['l_k_i_2_byte'] + l_sort_ge_4_byte = d_obj['l_sort_ge_4_byte'] + l_k_i_2_byte = d_obj['l_k_i_2_byte'] + l_k_i_3_byte = d_obj['l_k_i_3_byte'] + # d_k_to_count = d_obj['d_k_to_count'] + # d_k_to_i_byte = d_obj['d_k_to_i_byte'] + # l_arr = d_obj['l_arr'] + + d_k_to_count = {k: v for k1 in range(3, 14) for k, v in d_all_part[k1].items()} + d_k_to_i_byte = dict(l_k_i_2_byte+l_k_i_3_byte) + + print("len(d_k_to_count): {}".format(len(d_k_to_count))) + print("len(d_k_to_i_byte): {}".format(len(d_k_to_i_byte))) + + assert set(list(d_k_to_count)) == set(list(d_k_to_i_byte)) + + l_arr = arr.tolist() + l_encrypt = [] + + max_len = 13 + length = len(l_arr) + i = 0 + while i < length: + l = [] + l_count = [] + l_mult = [] + + length_byte = 3 + j = i+3 + while j <= length and length_byte <= max_len: + t = tuple(l_arr[i:j]) + + if t in d_k_to_count: + l.append(t) + c = d_k_to_count[t] + l_count.append(c) + l_mult.append(len(t)*c) + + j += 1 + length_byte += 1 + + if len(l) == 0: + l_encrypt.append(l_arr[i]) + i += 1 + else: + i_max = len(l)-1 + # i_max = np.argmax(l_mult) + t_max = l[i_max] + l_byte = d_k_to_i_byte[t_max] + l_encrypt.extend(l_byte) + i += len(t_max) + + # print("l: {}".format(l)) + # print("l_count: {}".format(l_count)) + # print("l_mult: {}".format(l_mult)) + # break + + if i % 10000 == 0: + print("i: {}".format(i)) + + if i > 10000000: + break diff --git a/compress_enwiki8/compress_enwiki8.py b/compress_enwiki8/compress_enwiki8.py index 5b02a3d..2c1ba65 100755 --- a/compress_enwiki8/compress_enwiki8.py +++ b/compress_enwiki8/compress_enwiki8.py @@ -6,17 +6,23 @@ import gzip import os import sys -import tempfile + +# import tempfile +from memory_tempfile import MemoryTempfile +tempfile = MemoryTempfile() from collections import defaultdict from copy import deepcopy from dotmap import DotMap from operator import itemgetter -# from sortedcontainers import SortedSet from os.path import expanduser -PATH_HOME = expanduser("~")+'/' -print("PATH_HOME: {}".format(PATH_HOME)) + +import multiprocessing as mp + +PATH_ROOT_DIR = os.path.dirname(os.path.abspath(__file__)).replace("\\", "/")+"/" +HOME_DIR = os.path.expanduser("~") +TEMP_DIR = tempfile.gettempdir()+"/" from PIL import Image @@ -27,10 +33,8 @@ import global_object_getter_setter import utils_compress_enwik8 -# utils_compress_enwik8.do_some_simple_tests() -# sys.exit() -PATH_ROOT_DIR = os.path.abspath(os.path.dirname(sys.argv[0]))+"/" +from create_stats_enwik8 import calc_sorted_stats def create_dict_word_count_for_arr(arr, max_byte_length=10): d_arr_comb = {} @@ -59,13 +63,84 @@ def create_dict_word_count_for_arr(arr, max_byte_length=10): if __name__ == "__main__": + arr = utils_compress_enwik8.get_arr(used_length=-1) + + def calc_stats_using_bytes_tuple(arr, max_len): + used_len = 1000000 + max_amount_values = 50 + # max_len = 6 + l_stat = [] + s_chars = set() + for pos_i in range(0, arr.shape[0], used_len): + arr_1 = arr[pos_i:pos_i+used_len+max_len].reshape((-1, 1)) + print("pos_i: {:9}, {:9}".format(pos_i, pos_i+used_len)) + for _ in range(0, max_len-1): + arr_1 = np.hstack((arr_1[:-1], arr_1[1:, -1:])) + u, c = np.unique(arr_1.reshape((-1, )).view(','.join(['u1']*max_len)), return_counts=True) + if max_len == 1: + d = {tuple((t, )): j for t, j in zip(u, c)} + else: + d = {tuple(t): j for t, j in zip(u, c)} - # arr = utils_compress_enwik8.get_arr(used_length=2**21) - arr = utils_compress_enwik8.get_arr(used_length=2**18) - # arr = utils_compress_enwik8.get_arr(used_length=2**23) - bytes_starting_size = arr.shape[0] - # arr = utils_compress_enwik8.get_arr(used_length=2**22+1) + # get the max len for each seperate combined bytes! + l_t, l_j = list(zip(*list(d.items()))) + i_max = np.argmax(l_j) + + print("- max_len: {:2}, amount: {:10}, mult: {:10}, t: {}".format(max_len, l_j[i_max], max_len*l_j[i_max], l_t[i_max])) + print("-- len(d): {}".format(len(d))) + s_chars |= set(list(d.keys())) + + l = list(d.items()) + l_sort = sorted(list(d.items()), reverse=True, key=lambda x: (x[1], x[0])) + + l_stat.append('{:9},{:9}:{}'.format( + pos_i, + pos_i+used_len, + '|'.join(['{},{:5}'.format(''.join(map(lambda x: '{:02X}'.format(x), t)), c) for t, c in l_sort[:max_amount_values]]) + )) + l = sorted(s_chars) + print("l: {}".format(l)) + print("len(l): {}".format(len(l))) + + with open(TEMP_DIR+'enwik8_stats_max_len_{}.txt'.format(max_len), 'w') as f: + f.write('\n'.join(l_stat)+'\n') + + # calc_stats_using_bytes_tuple(arr, 1) + + # l_proc = [] + # cpu_count = mp.cpu_count() + # for i in range(2, cpu_count+2): + # l_proc.append(mp.Process(target=calc_stats_using_bytes_tuple, args=(arr, i))) + # for proc in l_proc: proc.start() + # for proc in l_proc: proc.join() + + d_all_part, l_sort = calc_sorted_stats() + d3 = d_all_part[3] + l_k_3 = list(d3.keys()) + l_k_i_2_byte = [(k, (0, i)) for i, k in enumerate(l_k_3, 0)] + + l_sort_ge_4_byte = sorted([(len(k)*v, -len(k), v, k) for k1 in range(4, 14) for k, v in d_all_part[k1].items()], reverse=True) + l_k_i_2_byte += [(k, (0, i)) for i, (_, _, _, k) in enumerate(l_sort_ge_4_byte[:256-len(l_k_3)], len(l_k_3))] + + l_k_i_3_byte = [(k, (0, i//256, i%256)) for i, (_, _, _, k) in enumerate(l_sort_ge_4_byte[256-len(l_k_3):], 0)] + d_k_to_count = {k: v for k1 in range(3, 14) for k, v in d_all_part[k1].items()} + d_k_to_i_byte = dict(l_k_i_2_byte+l_k_i_3_byte) + + print("len(d_k_to_count): {}".format(len(d_k_to_count))) + print("len(d_k_to_i_byte): {}".format(len(d_k_to_i_byte))) + + assert set(list(d_k_to_count)) == set(list(d_k_to_i_byte)) + + l_encrypt = [] + + l_arr = arr.tolist() + + + sys.exit() + + + bytes_starting_size = arr.shape[0] # global_object_getter_setter.delete_object(OBJ_NAME_D_ARR_COMB) # global_object_getter_setter.delete_object(OBJ_NAME_D_ARR_COMB_UNIQUE) @@ -411,7 +486,6 @@ def create_dict_word_count_for_arr(arr, max_byte_length=10): # sys.exit() print() - print("bytes_starting_size: {}".format(bytes_starting_size)) print("LEN_BITS_CHOSEN_INDEX: {}".format(LEN_BITS_CHOSEN_INDEX)) print("LEN_CHOSEN_INDEX: {}".format(LEN_CHOSEN_INDEX)) print("MAX_BYTE_LENGTH: {}".format(MAX_BYTE_LENGTH)) @@ -440,6 +514,8 @@ def create_dict_word_count_for_arr(arr, max_byte_length=10): if not os.path.exists(tmp_hex_dir): os.makedirs(tmp_hex_dir) + print("bytes_starting_size: {}".format(bytes_starting_size)) + arr_compressed_full.tofile( (tmp_hex_dir+'content_compressed_size_orig_{size_orig}_size_comp_{size_comp}_round_nr_{round_nr}'+ '_max_idx_{LEN_CHOSEN_INDEX}_max_word_len_{MAX_BYTE_LENGTH}_max_first_pos_{MAX_FIRST_POS}.hzzv2.hex').format( diff --git a/compress_enwiki8/create_stats_enwik8.py b/compress_enwiki8/create_stats_enwik8.py new file mode 100755 index 0000000..1d98766 --- /dev/null +++ b/compress_enwiki8/create_stats_enwik8.py @@ -0,0 +1,69 @@ +#! /usr/bin/python3 + +# -*- coding: utf-8 -*- + +import dill +import gzip +import os +import sys + +# import tempfile +from memory_tempfile import MemoryTempfile +tempfile = MemoryTempfile() + +from collections import defaultdict +from copy import deepcopy +from dotmap import DotMap +from operator import itemgetter + +from os.path import expanduser + +PATH_ROOT_DIR = os.path.dirname(os.path.abspath(__file__)).replace("\\", "/")+"/" +HOME_DIR = os.path.expanduser("~") +TEMP_DIR = tempfile.gettempdir()+"/" + +from PIL import Image + +import numpy as np + +sys.path.append("../") +from utils_serialization import get_pkl_gz_obj, save_pkl_gz_obj +import global_object_getter_setter + + +def calc_sorted_stats(): + file_path_template = PATH_ROOT_DIR+'data_enwik8/enwik8_stats_max_len_{}.txt' + # file_path_template = TEMP_DIR+'enwik8_stats_max_len_{}.txt' + + d_all = {} + d_all_part = {} + for i in range(2, 14): + print("i: {}".format(i)) + file_path = file_path_template.format(i) + with open(file_path, 'r') as f: + l_line = f.read().rstrip('\n').split('\n') + + l_line = [[t.split(',') for t in l.split(':')[1].split('|')] for l in l_line] + l_line = [[[tuple(int(t_str[i:i+2], 16) for i in range(0, len(t_str), 2)), int(c_str)] for t_str, c_str in l] for l in l_line] + + l_d = [{k: v for k, v in l} for l in l_line] + d = l_d[0] + for d_next in l_d[1:]: + for k, v in d_next.items(): + if k in d: + d[k] += v + else: + d[k] = v + + for k, v in d.items(): + assert k not in d_all + d_all[k] = v + d_all_part[i] = d + + l_sort = sorted([(len(k)*v, -len(k), v, k) for k, v in d_all.items() if len(k) > 2], reverse=True) + return d_all_part, l_sort + + +if __name__ == "__main__": + d_all_part, l_sort = calc_sorted_stats() + print('\n'.join([str(l) for l in l_sort[:30]])) diff --git a/global_object_getter_setter.py b/global_object_getter_setter.py index ee85248..6423cf2 100644 --- a/global_object_getter_setter.py +++ b/global_object_getter_setter.py @@ -2,10 +2,13 @@ import gzip import os import sys -import tempfile + +# import tempfile +from memory_tempfile import MemoryTempfile +tempfile = MemoryTempfile() TEMP_ROOT_DIR_PATH = tempfile.gettempdir() -TMP_PATH_DIR = os.path.join(TEMP_ROOT_DIR_PATH, 'python_objs/') +TEMP_FOLDER_PATH = os.path.join(TEMP_ROOT_DIR_PATH, 'python_objs/') if not os.path.exists(TEMP_FOLDER_PATH): os.makedirs(TEMP_FOLDER_PATH) diff --git a/guis/tkinter_guis/.gitignore b/guis/tkinter_guis/.gitignore index a6c57f5..48885e2 100644 --- a/guis/tkinter_guis/.gitignore +++ b/guis/tkinter_guis/.gitignore @@ -1 +1,4 @@ *.json +*.spec +build +dist diff --git a/pixel_images/config_file.py b/pixel_images/config_file.py index b9329fb..c9f78b2 100644 --- a/pixel_images/config_file.py +++ b/pixel_images/config_file.py @@ -1,3 +1,4 @@ # FILE_NAME_JPG = 'jim_gade/jim-gade-eYWNaMffWHI-unsplash.jpg' # FILE_NAME_JPG = 'mark_hang_fung/mark-hang-fung-so-kDY9Nwfcl2c-unsplash.jpg' -FILE_NAME_JPG = 'image_own/image_own_1.jpg' +# FILE_NAME_JPG = 'image_own/image_own_1.jpg' +FILE_NAME_JPG = 'danika_perkinson/danika-perkinson-QxHJ9lkXYNk-unsplash.png' \ No newline at end of file diff --git a/pixel_images/create_pixel_images.py b/pixel_images/create_pixel_images.py index 0a0d77f..a5daeac 100755 --- a/pixel_images/create_pixel_images.py +++ b/pixel_images/create_pixel_images.py @@ -5,8 +5,11 @@ import os import re import string +import sys import numpy as np +import scipy.stats as st +# import scipy as sp from PIL import Image, ImageDraw, ImageFont @@ -21,6 +24,196 @@ from config_file import FILE_NAME_JPG +def gkern(kernlen=21, nsig=3): + """Returns a 2D Gaussian kernel.""" + x = np.linspace(-nsig, nsig, kernlen+1) + kern1d = np.diff(st.norm.cdf(x)) + kern2d = np.outer(kern1d, kern1d) + return kern2d/kern2d.sum() +# sys.exit(0) + +""" +pix ... grayscale image! +boder_amount ... how many pixels the border should be copied outside! +""" +arr_direction_colors = np.array([ + [0x00, 0x00, 0x00], + [0xFF, 0x00, 0x00], + [0x00, 0xFF, 0x00], + [0x00, 0x00, 0xFF], + [0xFF, 0xFF, 0x00], + [0xFF, 0x00, 0xFF], + [0x00, 0xFF, 0xFF], + [0x80, 0x80, 0xFF], + [0x80, 0xFF, 0x80], +], dtype=np.uint8) +def create_sobel_image_derivative(pix, border_amount, dir_path): + pix_int = pix.astype(int) + pix_int_border_h = np.hstack((pix_int[:, :1], )*border_amount+(pix_int, )+(pix_int[:, -1:], )*border_amount) + pix_int_border = np.vstack((pix_int_border_h[:1, :], )*border_amount+(pix_int_border_h, )+(pix_int_border_h[-1:, :], )*border_amount) + + pix_float_border = pix_int_border.astype(np.float) / 255. + + print("pix.shape: {}".format(pix.shape)) + print("pix_int_border.shape: {}".format(pix_int_border.shape)) + + arr_weight_part = np.array([[1, 2, 1]]) + + for i in range(2, border_amount+1): + arr_col = np.zeros((arr_weight_part.shape[0], 1), dtype=arr_weight_part.dtype) + arr_weight_part_1 = np.hstack((arr_col, arr_weight_part, arr_col)) + arr_weight_part = np.vstack((arr_weight_part_1[:1, :]+1, arr_weight_part_1)) + + arr_weight = arr_weight_part.T + arr_weight_x = np.hstack((-np.flip(arr_weight, 1), np.zeros((arr_weight.shape[0], 1), dtype=arr_weight.dtype), arr_weight)) + arr_weight_y = np.flip(arr_weight_x.T, 0) + + print("arr_weight_x:\n{}".format(arr_weight_x)) + print("arr_weight_y:\n{}".format(arr_weight_y)) + + h, w = pix.shape + + pb_sobel_x_float = np.zeros(pix_int.shape, dtype=np.float) + # pb_sobel_x_int = pix_int.copy() + for j, arr_row in enumerate(arr_weight_x, 0): + for i, v in enumerate(arr_row, 0): + pb_sobel_x_float += v*pix_float_border[j:j+h, i:i+w] + # pb_sobel_x_int += v*pix_int_border[j:j+h, i:i+w] + + pb_sobel_y_float = np.zeros(pix_int.shape, dtype=np.float) + # pb_sobel_y_int = pix_int.copy() + for j, arr_row in enumerate(arr_weight_y, 0): + for i, v in enumerate(arr_row, 0): + pb_sobel_y_float += v*pix_float_border[j:j+h, i:i+w] + # pb_sobel_y_int += v*pix_int_border[j:j+h, i:i+w] + + pb_sobel_x_int = np.round(pb_sobel_x_float).astype(np.int) + pb_sobel_y_int = np.round(pb_sobel_y_float).astype(np.int) + + div_x_min = np.min(pb_sobel_x_int) + div_x_max = np.max(pb_sobel_x_int) + div_y_min = np.min(pb_sobel_y_int) + div_y_max = np.max(pb_sobel_y_int) + + print("div_x_min: {}".format(div_x_min)) + print("div_x_max: {}".format(div_x_max)) + print("div_y_min: {}".format(div_y_min)) + print("div_y_max: {}".format(div_y_max)) + + y_size = div_y_max-div_y_min+1 + x_size = div_x_max-div_x_min+1 + + arr_direction_template = np.zeros((y_size, x_size), dtype=np.int) + arr_magnitude_template = np.zeros((y_size, x_size), dtype=np.int) + + for j, dy in enumerate(range(div_y_min, div_y_max+1), 0): + print("dy: {}".format(dy)) + for i, dx in enumerate(range(div_x_min, div_x_max+1), 0): + direction = 0 + magnitude = 0 + + abs_dy = abs(dy) + abs_dx = abs(dx) + + if dy==0 and dx==0: + continue + elif dy==0: + if dx>0: + angle = 0 + else: + angle = 180 + elif dx==0: + if dy>0: + angle = 90 + else: + angle = 270 + elif dy>0 and dx>=0: + if abs_dy < abs_dx: + angle = 0+np.arctan(abs_dy/abs_dx)*180/3.141592654 + else: + angle = 90-np.arctan(abs_dx/abs_dy)*180/3.141592654 + elif dy>=0 and dx<0: + if abs_dy > abs_dx: + angle = 90+np.arctan(abs_dx/abs_dy)*180/3.141592654 + else: + angle = 180-np.arctan(abs_dy/abs_dx)*180/3.141592654 + elif dy<0 and dx<=0: + if abs_dy < abs_dx: + angle = 180+np.arctan(abs_dy/abs_dx)*180/3.141592654 + else: + angle = 270-np.arctan(abs_dx/abs_dy)*180/3.141592654 + elif dy<=0 and dx>0: + if abs_dy > abs_dx: + angle = 270+np.arctan(abs_dx/abs_dy)*180/3.141592654 + else: + angle = 360-np.arctan(abs_dy/abs_dx)*180/3.141592654 + + if angle>=22.5*1 and angle<22.5*3: + direction = 2 + elif angle>=22.5*3 and angle<22.5*5: + direction = 3 + elif angle>=22.5*5 and angle<22.5*7: + direction = 4 + elif angle>=22.5*7 and angle<22.5*9: + direction = 5 + elif angle>=22.5*9 and angle<22.5*11: + direction = 6 + elif angle>=22.5*11 and angle<22.5*13: + direction = 7 + elif angle>=22.5*13 and angle<22.5*15: + direction = 8 + else: + direction = 1 + + magnitude = int(np.sqrt(dy**2 + dx**2)) + + arr_direction_template[j, i] = direction + arr_magnitude_template[j, i] = magnitude + + pix_dir = arr_direction_colors[arr_direction_template] + img_dir = Image.fromarray(pix_dir) + img_dir.save(dir_path+'img_template_direction_border_amount_{}.png'.format(border_amount)) + + + pix_mag = (arr_magnitude_template.astype(np.float) / np.max(arr_magnitude_template) * 255.999).astype(np.int).astype(np.uint8) + img_mag = Image.fromarray(pix_mag) + img_mag.save(dir_path+'img_tempalte_magnitude_border_amount_{}.png'.format(border_amount)) + + print("border_amount: {}".format(border_amount)) + print("- div_x_min: {}".format(div_x_min)) + print("- div_x_max: {}".format(div_x_max)) + print("- div_y_min: {}".format(div_y_min)) + print("- div_y_max: {}".format(div_y_max)) + + + def calc_direction_magnitude(dy, dx): + return arr_direction_template[dy-div_y_min-1, dx-div_x_min-1], arr_magnitude_template[dy-div_y_min-1, dx-div_x_min-1] + + + nfunc = np.frompyfunc(calc_direction_magnitude, 2, 2) + arr_direction, arr_magnitude = nfunc(pb_sobel_y_int, pb_sobel_x_int) + + return pb_sobel_x_int, pb_sobel_y_int, arr_direction.astype(np.int), arr_magnitude.astype(np.int) + + +def gauss_blur(pix, border_size): + pix_int = pix.astype(int) + pix_int_border_h = np.hstack((pix_int[:, :1], )*border_size+(pix_int, )+(pix_int[:, -1:], )*border_size) + pix_int_border = np.vstack((pix_int_border_h[:1, :], )*border_size+(pix_int_border_h, )+(pix_int_border_h[-1:, :], )*border_size) + + kernel = gkern(kernlen=border_size*2+1, nsig=2) + kernel_int = (kernel / np.min(kernel)).astype(np.int) + + h, w = pix_int.shape + + pix_int_sum = np.zeros(pix_int.shape, dtype=np.int) + for j, arr_row in enumerate(kernel_int, 0): + for i, v in enumerate(arr_row, 0): + pix_int_sum += v*pix_int_border[j:j+h, i:i+w] + pix_gauss = (pix_int_sum.astype(np.float)/np.sum(kernel_int)*(255.999/255.)).astype(np.int).astype(np.uint8) + return pix_gauss + + if __name__ == '__main__': DIR_IMAGES = PATH_ROOT_DIR+'images/' if not os.path.exists(DIR_IMAGES): @@ -28,16 +221,49 @@ FILE_PATH_ORIG = DIR_IMAGES+FILE_NAME_JPG - FILE_NAME_PNG = re.sub(r'\.jpg$', '', FILE_NAME_JPG)+'.png' - FILE_PATH_PNG = DIR_IMAGES+FILE_NAME_PNG + assert os.path.exists(FILE_PATH_ORIG) + + DIR_PATH_ORIG = FILE_PATH_ORIG[:FILE_PATH_ORIG.rfind('/')+1] + img = Image.open(FILE_PATH_ORIG) + + resize = 3 + w, h = img.size + img = img.resize((w*resize, h*resize), resample=Image.LANCZOS) + + FILE_NAME_PNG = re.sub(r'\.png$', '', FILE_NAME_JPG)+'_resize_{}.png'.format(resize) + # FILE_NAME_PNG = re.sub(r'\.jpg$', '', FILE_NAME_JPG)+'_resize_{}.png'.format(resize) + FILE_PATH_PNG = DIR_IMAGES+FILE_NAME_PNG if not os.path.exists(FILE_PATH_PNG): img.save(FILE_PATH_PNG) - pix = np.array(img) + pix_orig = np.array(img) + pix = pix_orig.copy() + h, w, d = pix.shape + + u, c = np.unique(pix_orig.reshape((-1, )).view('u1,u1,u1'), return_counts=True) + u_mat = u.view('u1').reshape((-1, d)) + u_sum = np.sum(u_mat.astype('i8')**2, axis=1) + + uc_comb = np.empty((u.shape[0], ), dtype='i8,i8,u1,u1,u1') + uc_comb['f0'] = c + uc_comb['f1'] = u_sum + uc_comb['f2'] = u_mat[:, 0] + uc_comb['f3'] = u_mat[:, 1] + uc_comb['f4'] = u_mat[:, 2] + + uc_comb_sort = np.sort(uc_comb) + + sys.exit() + + img_gray = img.convert('L') + img_gray.save(DIR_IMAGES+FILE_NAME_PNG.replace('.png', '_grayscale.png')) + + pix_gray = np.array(img_gray) print("pix.shape: {}".format(pix.shape)) + def create_hist_256(u, c): assert u.dtype == np.dtype('uint8') c_new = np.zeros((256, ), dtype=c.dtype) @@ -51,11 +277,16 @@ def create_hist_256(u, c): ys_g = l_c[1] ys_b = l_c[2] - fig, axs = plt.subplots(nrows=3, ncols=1) - axs[0].bar(xs, ys_r, color='#FF0000', width=1.) - axs[1].bar(xs, ys_g, color='#00FF00', width=1.) - axs[2].bar(xs, ys_b, color='#0000FF', width=1.) - plt.show(block=False) + # fig, axs = plt.subplots(nrows=3, ncols=1) + # fig.set_title('Histogram of image') + # axs[0].bar(xs, ys_r, color='#FF0000', width=1.) + # axs[1].bar(xs, ys_g, color='#00FF00', width=1.) + # axs[2].bar(xs, ys_b, color='#0000FF', width=1.) + # plt.show(block=False) + + + pix_all = np.zeros((8, h, w, d), dtype=np.uint8) + pix_all[7, ...] = pix # create simple rgb reduces images! for bit in range(1, 8): @@ -78,5 +309,109 @@ def create_hist_256(u, c): pix2[..., 0] = arr_map[pix[..., 0]] pix2[..., 1] = arr_map[pix[..., 1]] pix2[..., 2] = arr_map[pix[..., 2]] + + pix_all[bit-1, ...] = pix2 + img2 = Image.fromarray(pix2) img2.save(DIR_IMAGES+FILE_NAME_PNG.replace('.png', '_c{}b{}.png'.format(pix2.shape[2], bit))) + + + bits = 2 + pix = pix_all[bits-1] + img = Image.fromarray(pix) + img_gray = img.convert('L') + img_gray.save(DIR_IMAGES+FILE_NAME_PNG.replace('.png', '_grayscale_c1b{}.png'.format(bits))) + + pix_gray = np.array(img_gray) + + + border_amount = 1 + + pb_sobel_x_int, pb_sobel_y_int, arr_direction, arr_magnitude = create_sobel_image_derivative(pix_gray, border_amount, dir_path=DIR_PATH_ORIG) + + max_abs_v_x = np.max(np.abs(pb_sobel_x_int)) + pix_sobel_x = (pb_sobel_x_int.astype(np.float)*127.999/max_abs_v_x+128).astype(np.int).astype(np.uint8) + + max_abs_v_y = np.max(np.abs(pb_sobel_y_int)) + pix_sobel_y = (pb_sobel_y_int.astype(np.float)*127.999/max_abs_v_y+128).astype(np.int).astype(np.uint8) + + pb_sobel_int = np.sqrt(pb_sobel_x_int**2+pb_sobel_y_int**2) + max_abs_v_sobel = np.max(np.abs(pb_sobel_int)) + pix_sobel = (pb_sobel_y_int.astype(np.float)*255.999/max_abs_v_sobel).astype(np.int).astype(np.uint8) + + pix_dir = arr_direction_colors[arr_direction] + img_dir = Image.fromarray(pix_dir) + img_dir.save(DIR_PATH_ORIG+'img_direction_border_amount_{:02}.png'.format(border_amount)) + + # max_abs_v_mag = np.max(np.abs(arr_magnitude)) + # pix_sobel_mag = (pb_sobel_y_int.astype(np.float)*255.999/max_abs_v_mag).astype(np.int).astype(np.uint8) + + img_sobel_x = Image.fromarray(pix_sobel_x) + img_sobel_y = Image.fromarray(pix_sobel_y) + img_sobel = Image.fromarray(pix_sobel) + # img_sobel_mag = Image.fromarray(pix_sobel_mag) + + # print('save img_sobel_x') + # img_sobel_x.save(DIR_PATH_ORIG+'img_sobel_x.png') + # print('save img_sobel_y') + # img_sobel_y.save(DIR_PATH_ORIG+'img_sobel_y.png') + print('save img_sobel') + img_sobel.save(DIR_PATH_ORIG+'img_sobel.png') + + + # border_size_gauss = 2 + d_pix_gauss_blur = {} + d_pix_gauss_blur_sobel = {} + for border_size_gauss in range(5, 16): + # for border_size_gauss in range(1, 5): + # for border_size_gauss in range(1, 4): + print("border_size_gauss: {}".format(border_size_gauss)) + + pix_gauss_blur = gauss_blur(pix_gray, border_size_gauss) + + + img_gauss_blur = Image.fromarray(pix_gauss_blur) + + img_gauss_blur.save(DIR_IMAGES+FILE_NAME_PNG.replace('.png', '_gauss_blur_border_size_{:02}.png'.format(border_size_gauss))) + + pb_sobel_x_int, pb_sobel_y_int, arr_direction, arr_magnitude = create_sobel_image_derivative(pix_gauss_blur, border_amount, dir_path=DIR_PATH_ORIG) + + max_abs_v_x = np.max(np.abs(pb_sobel_x_int)) + pix_sobel_x = (pb_sobel_x_int.astype(np.float)*127.999/max_abs_v_x+128).astype(np.int).astype(np.uint8) + + max_abs_v_y = np.max(np.abs(pb_sobel_y_int)) + pix_sobel_y = (pb_sobel_y_int.astype(np.float)*127.999/max_abs_v_y+128).astype(np.int).astype(np.uint8) + + pb_sobel_int = np.sqrt(pb_sobel_x_int**2+pb_sobel_y_int**2) + max_abs_v_sobel = np.max(np.abs(pb_sobel_int)) + pix_sobel = (pb_sobel_y_int.astype(np.float)*255.999/max_abs_v_sobel).astype(np.int).astype(np.uint8) + + d_pix_gauss_blur[border_size_gauss] = pix_gauss_blur + d_pix_gauss_blur_sobel[border_size_gauss] = pix_sobel + + pix_dir = arr_direction_colors[arr_direction] + img_dir = Image.fromarray(pix_dir) + img_dir.save(DIR_PATH_ORIG+'img_direction_border_amount_{:02}_border_size_gauss_{:02}.png'.format(border_amount, border_size_gauss)) + + # max_abs_v_mag = np.max(np.abs(arr_magnitude)) + # pix_sobel_mag = (pb_sobel_y_int.astype(np.float)*255.999/max_abs_v_mag).astype(np.int).astype(np.uint8) + + img_sobel_x = Image.fromarray(pix_sobel_x) + img_sobel_y = Image.fromarray(pix_sobel_y) + img_sobel = Image.fromarray(pix_sobel) + # img_sobel_mag = Image.fromarray(pix_sobel_mag) + + # print('save img_gauss_blur_sobel_x') + # img_sobel_x.save(DIR_PATH_ORIG+'img_gauss_blur_border_size_{:02}_sobel_x.png'.format(border_size_gauss)) + # print('save img_gauss_blur_sobel_y') + # img_sobel_y.save(DIR_PATH_ORIG+'img_gauss_blur_border_size_{:02}_sobel_y.png'.format(border_size_gauss)) + print('save img_gauss_blur_sobel') + img_sobel.save(DIR_PATH_ORIG+'img_gauss_blur_border_size_{:02}_sobel.png'.format(border_size_gauss)) + + for threashold in [64, 128, 196]: + idxs_contur = d_pix_gauss_blur_sobel[border_size_gauss]>=threashold + pix_ = pix.copy() + pix_[idxs_contur] = (0x00, 0x00, 0x00) + img = Image.fromarray(pix_) + img.save(DIR_PATH_ORIG+'img_add_black_outlines_gauss_blur_size_{}_bits_{}_threshold_{:03}.png'.format(border_size_gauss, bits, threashold)) + # img.show() diff --git a/test_programs/check_different_map_datatypes.py b/test_programs/check_different_map_datatypes.py index 6bb6f55..dd47b5d 100755 --- a/test_programs/check_different_map_datatypes.py +++ b/test_programs/check_different_map_datatypes.py @@ -29,42 +29,88 @@ def func_timer(f, args): start_time = time.time() r = f(*args) end_time = time.time() - return r, end_time - start_time + return end_time - start_time, r if __name__ == '__main__': # create random arr nums with a dimension m and amount n - dimension = 3 - n = 10000 + dimension = 10 + n = 100000 assert 256**dimension >= n arr_nums = np.random.randint(0, 2**8, (n, dimension)) arr_nums_unique = np.unique(arr_nums.astype(np.uint8).reshape((-1, )).view(','.join(['u1']*dimension))) - arr_nums_view = np.empty((len(arr_nums_unique), ), dtype=object) - arr_nums_view[:] = arr_nums_unique + length = len(arr_nums_unique) + arr_vals = np.random.randint(0, 2**32, (length, )) + # df_uint8_view = pd.DataFrame(data={'val': arr_vals, 'nums': arr_nums_unique}, columns=['nums', 'val']) + # df_uint8_view_nums = df_uint8_view.set_index('nums') - arr_vals = np.random.randint(0, 2**32, (len(arr_nums_unique), )) - - arr_nums_uint8 = np.empty((len(arr_nums_unique), ), dtype=object) + arr_nums_uint8 = np.empty((length, ), dtype=object) arr_nums_uint8[:] = list(map(tuple, arr_nums_unique)) - df_uint8 = pd.DataFrame(data={'val': arr_vals, 'nums': arr_nums_uint8}, columns=['nums', 'val']) - # l_nums_uint8 = list(map(tuple, arr_nums_unique)) - # df_uint8 = pd.DataFrame(data={'val': arr_vals, 'nums': l_nums_uint8}, columns=['nums', 'val']) - df_uint8_nums = df_uint8.set_index('nums') + # df_uint8 = pd.DataFrame(data={'val': arr_vals, 'nums': arr_nums_uint8}, columns=['nums', 'val']) + # df_uint8_nums = df_uint8.set_index('nums') + df_uint8_nums = pd.Series(data=arr_vals, index=arr_nums_uint8) - arr_nums_int = np.empty((len(arr_nums_unique), ), dtype=object) + arr_nums_int = np.empty((length, ), dtype=object) arr_nums_int[:] = list(map(lambda x: tuple(map(int, x)), arr_nums_unique)) - df_int = pd.DataFrame(data={'val': arr_vals, 'nums': arr_nums_int}, columns=['nums', 'val']) - # l_nums_int = list(map(lambda x: tuple(map(int, x)), arr_nums_unique)) - # df_int = pd.DataFrame(data={'val': arr_vals, 'nums': l_nums_int}, columns=['nums', 'val']) - df_int_nums = df_int.set_index('nums') + # df_int = pd.DataFrame(data={'val': arr_vals, 'nums': arr_nums_int}, columns=['nums', 'val']) + # df_int_nums = df_int.set_index('nums') + df_int_nums = pd.Series(data=arr_vals, index=arr_nums_int) + + d_uint8 = {k: v for k, v in zip(arr_nums_uint8, arr_vals)} + d_int = {k: v for k, v in zip(arr_nums_int, arr_vals)} + + + def mapping_test_df_uint8(l_tpl_nums): + return df_uint8_nums.loc[l_tpl_nums].values + # return df_uint8_nums.loc[l_tpl_nums]['val'].values + + def mapping_test_df_int(l_tpl_nums): + return df_int_nums.loc[l_tpl_nums].values + # return df_int_nums.loc[l_tpl_nums]['val'].values + + def mapping_test_d_uint8(l_tpl_nums): + return [d_uint8[t] for t in l_tpl_nums] + + def mapping_test_d_int(l_tpl_nums): + return [d_int[t] for t in l_tpl_nums] - def mapping_1(arr_nums, arr_vals): - d = 4 + n_test = 300000 + arr_idxs = np.random.randint(0, length, (n_test, )) + + arr_idxs_vals = arr_vals[arr_idxs] + l_idxs_vals = arr_idxs_vals.tolist() + arr_nums_uint8_idxs = arr_nums_uint8[arr_idxs] + arr_nums_int_idxs = arr_nums_int[arr_idxs] + + print('Test df_uint8') + time_df_uint8, ret_l_idxs_vals_df_uint8 = func_timer(mapping_test_df_uint8, [arr_nums_uint8_idxs]) + print('Test df_int') + time_df_int, ret_l_idxs_vals_df_int = func_timer(mapping_test_df_int, [arr_nums_int_idxs]) + print('Test d_uint8') + time_d_uint8, ret_l_idxs_vals_d_uint8 = func_timer(mapping_test_d_uint8, [arr_nums_uint8_idxs]) + print('Test d_int') + time_d_int, ret_l_idxs_vals_d_int = func_timer(mapping_test_d_int, [arr_nums_int_idxs]) + + assert np.all(ret_l_idxs_vals_df_uint8 == arr_idxs_vals) + assert np.all(ret_l_idxs_vals_df_int == arr_idxs_vals) + assert ret_l_idxs_vals_d_uint8 == l_idxs_vals + assert ret_l_idxs_vals_d_int == l_idxs_vals + + print("time_df_uint8: {}".format(time_df_uint8)) + print("time_df_int: {}".format(time_df_int)) + print("time_d_uint8: {}".format(time_d_uint8)) + print("time_d_int: {}".format(time_d_int)) + + # l_nums_val_df_uint8 = [(arr_nums_uint8[i], arr_vals[i]) for i in arr_idxs] + # l_nums_val_df_int = [(arr_nums_int[i], arr_vals[i]) for i in arr_idxs] + + # l_nums_val_d_uint8 = [(arr_nums_uint8[i], arr_vals[i]) for i in arr_idxs] + # l_nums_val_d_int = [(arr_nums_int[i], arr_vals[i]) for i in arr_idxs] # def convert_num_to_8bit_array(num, dimension): # l = [] diff --git a/vectorize_programs/gpu_example.py b/vectorize_programs/gpu_example.py new file mode 100755 index 0000000..85a4493 --- /dev/null +++ b/vectorize_programs/gpu_example.py @@ -0,0 +1,47 @@ +#! /usr/bin/python3 + +# -*- coding: utf-8 -*- + +import numpy as np +from timeit import default_timer as timer +from numba import vectorize + +# This should be a substantially high value. On my test machine, this took +# 33 seconds to run via the CPU and just over 3 seconds on the GPU. +NUM_ELEMENTS = 100000000 + +# This is the CPU version. +def vector_add_cpu(a, b): + c = np.zeros(NUM_ELEMENTS, dtype=np.float32) + for i in range(NUM_ELEMENTS): + c[i] = a[i] + b[i] + return c + +# This is the GPU version. Note the @vectorize decorator. This tells +# numba to turn this into a GPU vectorized function. +@vectorize(["float32(float32, float32)"], target='cuda') +def vector_add_gpu(a, b): + return a + b; + +def main(): + a_source = np.ones(NUM_ELEMENTS, dtype=np.float32) + b_source = np.ones(NUM_ELEMENTS, dtype=np.float32) + + # Time the CPU function + start = timer() + vector_add_cpu(a_source, b_source) + vector_add_cpu_time = timer() - start + + # Time the GPU function + start = timer() + vector_add_gpu(a_source, b_source) + vector_add_gpu_time = timer() - start + + # Report times + print("CPU function took %f seconds." % vector_add_cpu_time) + print("GPU function took %f seconds." % vector_add_gpu_time) + + return 0 + +if __name__ == "__main__": + main()