Skip to content

Commit

Permalink
many more mini programs!
Browse files Browse the repository at this point in the history
  • Loading branch information
hearues-zueke-github committed Jan 12, 2021
1 parent ad9e76b commit a44611c
Show file tree
Hide file tree
Showing 7 changed files with 984 additions and 56 deletions.
298 changes: 298 additions & 0 deletions compress_enwiki8/analyse_compressions_method.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,298 @@
#! /usr/bin/python3

# -*- coding: utf-8 -*-

# Some other needed imports
import datetime
import dill
import gzip
import os
import pdb
import re
import io
import sys
import traceback

import numpy as np
import pandas as pd

from typing import List, Dict, Any, Callable, Set

from copy import deepcopy, copy
from dotmap import DotMap
from functools import reduce
from pathlib import Path
from memory_tempfile import MemoryTempfile
from shutil import copyfile
from collections import defaultdict
from pprint import pprint

import matplotlib.pyplot as plt

sys.path.append('..')
from utils import mkdirs

PATH_ROOT_DIR = os.path.dirname(os.path.abspath(__file__))+"/"
HOME_DIR = os.path.expanduser("~")+"/"
TEMP_DIR = MemoryTempfile().gettempdir()+"/"

OBJS_DIR_PATH = PATH_ROOT_DIR+'objs/'
mkdirs(OBJS_DIR_PATH)

def main(d_env: Dict[str, Any]) -> None:
def load_obj_in_d_env(obj_name: str, func: Callable[[Dict[str, Any]], Any], d_env: Dict[str, Any]) -> None:
file_path_obj = OBJS_DIR_PATH + obj_name + '.pkl.gz'

if not os.path.exists(file_path_obj):
print("Creating '{}' object.".format(obj_name))

obj = func(d_env)

with gzip.open(file_path_obj, 'wb') as f:
dill.dump(obj, f)
else:
print("Loading '{}' object.".format(obj_name))
with gzip.open(file_path_obj, 'rb') as f:
obj = dill.load(f)

d_env[obj_name] = obj

def func_obj_d_chunck_size_d_xy(d_env: Dict[str, Any]) -> Dict[int, Any]:
return {}

d_chunck_size_d_xy = func_obj_d_chunck_size_d_xy(d_env)

d_env['d_chunck_size_d_xy'] = d_chunck_size_d_xy

# for

# def func_obj_d_df(d_env: Dict[str, Any]) -> Any:
# d_wb_d_df = d_env['d_wb_d_df']
# df_merge = create_df_merge_from_basal_bolus(d_wb_d_df)
# d_df = create_d_df_base(d_wb_d_df, df_merge)

# return d_df


def base_convert_b1_to_b2(l1, b1, b2):
n = 0
p1 = 1
for v in l1[::-1]:
n += v * p1
p1 *= b1

l2 = []
while n > 0:
l2.append(n % b2)
n //= b2

return l2[::-1]


if __name__ == '__main__':
# d_env: Dict[str, Any] = {}
# main(d_env=d_env)

sys.exit()

# # b1 = io.BytesIO()
# # b2 = open(b1, 'wb')
# b1 = MemoryTempfile().TemporaryFile()
# # with tempfile.TemporaryFile() as tf:
# with gzip.open(b1, 'wb', compresslevel=9) as f:
# f.write(b'Test123!')

# b1.seek(0)

# with gzip.open(b1, 'rb') as f:
# content = f.read()
# print("content: {}".format(content))

# sys.exit()

# print("Hello World!")

tmp_folder = TEMP_DIR + 'content_tpl/'
mkdirs(tmp_folder)

byte_length = 1

# file_path_content_tpl = tmp_folder + 'content_tpl_byte_len_{}.pkl.gz'.format(byte_length)
file_path_l_sorted = tmp_folder + 'l_sorted_byte_len_{}.pkl.gz'.format(byte_length)

# if os.path.exists(file_path_l_sorted):
# # if os.path.exists(file_path_content_tpl) and os.path.exists(file_path_l_sorted):
# sys.exit()

with open(HOME_DIR+'Downloads/enwik8', 'rb') as f:
content = f.read()

l_column = ['chunck_size', 'folder_size_content']
d_stats = {s: [] for s in l_column}

content = content
# content = content[:1000000]
content_tpl = tuple([int(i) for i in content])
# content = tuple([int(i) for i in content[:100000000]])
# content = content[:1000000]

d = defaultdict(int)
for i in range(0, len(content_tpl)-byte_length+1):
d[content_tpl[i:i+byte_length]] += 1

l_count_hexstr = [(v, ''.join(["{:02X}".format(i) for i in k]), ''.join([chr(i) for i in k])) for k, v in d.items()]
l_sorted = sorted(l_count_hexstr, reverse=False)
print("l_sorted:")
pprint(l_sorted[-200:])

# with gzip.open(file_path_content_tpl, 'wb') as f:
# dill.dump(content_tpl, f)

with gzip.open(file_path_l_sorted, 'wb') as f:
dill.dump(l_sorted, f)

sys.exit()


# tmp_folder_test_compressions = TEMP_DIR + 'test_compressions/'
# mkdirs(tmp_folder_test_compressions)

l_chunck_size = [
# 1000, 2000, 5000,
# 10000, 20000, 50000,
100000, 200000, 500000,
# 1000000, 2000000, 5000000,
]

d_chunck_size_d_xy = {}

chunck_jump = 1000
for chunck_size in l_chunck_size:
l_x = []
l_y = []
d_chunck_size_xy[chunck_size] = {
'x': l_x,
'y': l_y,
}
for idx, pos in enumerate(range(0, len(content)-chunck_size+1, chunck_jump), 0):
print("chunck_size: {}, pos: {}".format(chunck_size, pos))
content_part = content[pos:pos+chunck_size]
assert len(content_part) == chunck_size

f_out = MemoryTempfile().TemporaryFile()
with gzip.open(f_out, mode='wb', compresslevel=9) as f:
f.write(content_part)
f_out.seek(0, os.SEEK_END)
l_x.append(pos)
l_y.append(f_out.tell())
# folder_size_content += f_out.tell()
del f_out

fig, axs = plt.subplots(figsize=(15, 9), nrows=3, ncols=1)
fig.suptitle('Plot for diff chunck size compressions', fontsize=14)

for i, chunck_size in enumerate(l_chunck_size, 0):
ax = axs[i]
d_xy = d_chunck_size_xy[chunck_size]
l_x = d_xy['x']
l_y = d_xy['y']
ax.plot(l_x, l_y, marker='o', ms=5, color='#0000FFFF')
ax.set_title('chunck_size: {}'.format(chunck_size))
ax.set_ylim([0, chunck_size])
ax.set_xlabel('Left position of chunck')
ax.set_ylabel('Bytes')
plt.tight_layout()

fig, axs = plt.subplots(figsize=(15, 9), nrows=3, ncols=1)
fig.suptitle('Plot for diff chunck size compressions (minimal points)', fontsize=14)

for i, chunck_size in enumerate(l_chunck_size, 0):
ax = axs[i]
d_xy = d_chunck_size_xy[chunck_size]
arr_x = np.array(d_xy['x'])
arr_y = np.array(d_xy['y'])

a1 = arr_y[:-2]
a2 = arr_y[1:-1]
a3 = arr_y[2:]

idxs = np.hstack(((False, ), (a2 < a1) & (a2 < a3), (False, )))
arr_x = arr_x[idxs]
arr_y = arr_y[idxs]

ax.plot(arr_x, arr_y, marker='o', ms=5, color='#0000FFFF')
ax.set_title('chunck_size: {}'.format(chunck_size))
ax.set_ylim([0, chunck_size])
ax.set_xlabel('Left position of chunck')
ax.set_ylabel('Bytes')
plt.tight_layout()

plt.show()

sys.exit(0)

for chunck_size in l_chunck_size:
# chunck_size = 1000
folder_size_content = 0
# tmp_folder_chuncks = tmp_folder_test_compressions + 'chuncks_bytes_{:08}/'.format(chunck_size)
# mkdirs(tmp_folder_chuncks)
for idx, pos in enumerate(range(0, len(content), chunck_size), 0):
print("idx: {}, pos: {}".format(idx, pos))
content_part = content[pos:pos+chunck_size]
# with gzip.open(tmp_folder_chuncks + 'part_{:06}.txt.gz'.format(idx), mode='wb', compresslevel=9) as f:
# f.write(content_part)

# b1 = MemoryTempfile().TemporaryFile()
# # with tempfile.TemporaryFile() as tf:
# with gzip.open(b1, 'wb', compresslevel=9) as f:
# f.write(b'Test123!')

# b1.seek(0)
# with gzip.open(b1, 'rb') as f:
# content = f.read()
# print("content: {}".format(content))

f_out = MemoryTempfile().TemporaryFile()
with gzip.open(f_out, mode='wb', compresslevel=9) as f:
f.write(content_part)
f_out.seek(0, os.SEEK_END)
folder_size_content += f_out.tell()
del f_out

# with io.BytesIO() as f_out:
# with gzip.open(f_out, mode='wb', compresslevel=9) as f:
# f.write(content_part)
# f_out.seek(0, os.SEEK_END)
# folder_size_content += f_out.tell()

# def get_file_size(path):
# with open(path, 'rb') as f:
# size = len(f.read())
# return size
# folder_size_content = sum([get_file_size(os.path.join(r, f)) for r, ds, fs in os.walk(tmp_folder_chuncks) for f in fs])
# folder_size_content = sum([Path(os.path.join(r, f)).stat().st_size for r, ds, fs in os.walk(tmp_folder_chuncks) for f in fs])
print("folder_size_content: {}".format(folder_size_content))

d_stats['chunck_size'].append(chunck_size)
d_stats['folder_size_content'].append(folder_size_content)

df = pd.DataFrame(data=d_stats, columns=l_column)
print("df:\n{}".format(df))

# {'chunck_size': [1000, 2000, 5000, 10000], 'folder_size_content': [560052, 499507, 448302, 420957]}

# with io.BytesIO:
# chunck_size folder_size_content
# 0 1000 544052
# 1 2000 491507
# 2 5000 445102
# 3 10000 419357

# with get_file_size:
# chunck_size folder_size_content
# 0 1000 560052
# 1 2000 499507
# 2 5000 448302
# 3 10000 420957


50 changes: 17 additions & 33 deletions game_of_life/bit_automaton.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,31 @@
import numpy as np
from types import FunctionType

def copy_function(f, d_glob={}):
return FunctionType(f.__code__, d_glob, f.__name__, f.__defaults__, f.__closure__)

class BitAutomaton(Exception):
__slot__ = [
'h', 'w',
'frame', 'frame_wrap',
'field_size', 'field',
'field_frame_size', 'field_frame',
'd_vars', 'd_func', 'funcs_str',
'l_func', 's_func_nr', 'l_func_name',
'd_vars',
'l_func', 'func_rng', 's_func_nr',
]

def __init__(self, h, w, frame, frame_wrap, funcs_str):
def __init__(self, h, w, frame, frame_wrap, l_func=None, func_rng=None):
self.h = h
self.w = w

self.frame = frame
self.frame_wrap = frame_wrap

self.field_size = (h, w)
self.field = np.zeros(self.field_size, dtype=np.uint8)
self.field = np.zeros(self.field_size, dtype=np.bool)

self.field_frame_size = (h+frame*2, w+frame*2)
self.field_frame = np.zeros(self.field_frame_size, dtype=np.uint8)
self.field_frame = np.zeros(self.field_frame_size, dtype=np.bool)

self.d_vars = {}
self.d_vars['n'] = self.field_frame[frame:-frame, frame:-frame]
Expand All @@ -43,37 +47,17 @@ def __init__(self, h, w, frame, frame_wrap, funcs_str):
self.d_vars[direction_y+str(amount_y)+direction_x+str(amount_x)] = self.field_frame[frame+i_y:frame+i_y+h, frame+i_x:frame+i_x+w]
self.d_vars[direction_y*amount_y+direction_x*amount_x] = self.field_frame[frame+i_y:frame+i_y+h, frame+i_x:frame+i_x+w]

self.d_func = {}

self.funcs_str = funcs_str

exec(funcs_str, self.d_vars, self.d_func)

assert 'rng' in self.d_func.keys()

# test if each function starting with 'fun_' is returning a boolean array!
self.l_func_name = []
for func_name in self.d_func.keys():
if func_name[:4] != 'fun_':
continue

self.l_func_name.append(func_name)

# print("func_name: {}".format(func_name))
v = self.d_func[func_name]()
assert v.dtype == np.bool
assert v.shape == self.field_size

# check, if every function name is appearing starting from 0 to upwards in ascending order!
assert np.all(np.diff(np.sort([int(v.replace('fun_', '')) for v in self.l_func_name])) == 1)

self.l_func = [self.d_func[func_name] for func_name in self.l_func_name]
self.s_func_nr = {i for i in range(0, len(self.l_func_name))}
if l_func is not None:
self.l_func = [copy_function(f, self.d_vars) for f in l_func]
self.s_func_nr = set(range(0, len(l_func)))
if func_rng is not None:
self.func_rng = copy_function(func_rng, self.d_vars)


def set_field(self, field):
assert isinstance(field, np.ndarray)
assert field.shape == self.field_size
assert np.all((field==0)|(field==1))
assert field.dtype == np.bool
self.field = field
self.fill_field_frame()

Expand All @@ -99,7 +83,7 @@ def fill_field_frame(self):

def execute_func(self, n):
assert n in self.s_func_nr
self.field = (self.l_func[n]()).astype(np.uint8)
self.field = self.l_func[n]()
self.fill_field_frame()


Expand Down
Loading

0 comments on commit a44611c

Please sign in to comment.