Skip to content

Commit

Permalink
Merge pull request #36 from masakistan/save
Browse files Browse the repository at this point in the history
make values in dictionary opaque
  • Loading branch information
masakistan authored Feb 10, 2020
2 parents cb9ef34 + 79b606d commit 2e3d941
Show file tree
Hide file tree
Showing 15 changed files with 337 additions and 34 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ kcollections.egg-info/
*.so
*.pyc
*.dylib
*.fa
9 changes: 7 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ project(kcollections)
#set(BOOST_REQUESTED_VERSION 1.72.0)
#set(BOOST_ROOT_DIR ${CMAKE_SOURCE_DIR}/libs/boost-${BOOST_REQUESTED_VERSION})
#list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake)
find_package(Boost ${BOOST_REQUESTED_VERSION} REQUIRED COMPONENTS serialization)
find_package(Boost 1.65.1 ${BOOST_REQUESTED_VERSION} REQUIRED COMPONENTS serialization)
message("boost libs: ${Boost_LIBRARIES} ${Boost_INCLUDE_DIRS}")

# The version number.
Expand Down Expand Up @@ -128,7 +128,12 @@ else()
add_executable(setsavetest "${SOURCE_DIR}/TestSetSave.cpp" $<TARGET_OBJECTS:kcollections_set_objs>)
target_link_libraries(setsavetest Threads::Threads Kset)
set_target_properties(setsavetest PROPERTIES COMPILE_DEFINITIONS "KSET")


# Testing Kdict save
add_executable(dictsavetest "${SOURCE_DIR}/TestDictSave.cpp" $<TARGET_OBJECTS:kcollections_dict_objs>)
target_link_libraries(dictsavetest Threads::Threads Kset)
set_target_properties(dictsavetest PROPERTIES COMPILE_DEFINITIONS "KDICT")

# Testing Kcounter save
add_executable(countersavetest "${SOURCE_DIR}/TestCounterSave.cpp" $<TARGET_OBJECTS:kcollections_counter_objs>)
target_link_libraries(countersavetest Threads::Threads Kset)
Expand Down
8 changes: 6 additions & 2 deletions inc/Kcontainer.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,24 @@
#include <stdlib.h>
#include <string>
#include <stdexcept>
#include "Vertex.h"
#include "helper.h"
//#include <jemalloc/jemalloc.h>
#include <math.h>
#include <functional>
#include "globals.h"
#include "helper.h"
#include "Vertex.h"


#if defined(PYTHON)
#include <pybind11/pybind11.h>
namespace py = pybind11;
#endif

#if defined(KDICT) || defined(KCOUNTER)
template <class T>
#endif


struct ThreadGlobals {
#if defined(KSET)
std::vector<std::vector<std::vector<uint8_t*>>>* kmers;
Expand Down
1 change: 1 addition & 0 deletions inc/Kcounter.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <boost/archive/binary_oarchive.hpp>
#include <boost/archive/binary_iarchive.hpp>

#include "globals.h"
#include "Kcontainer.h"

class Kcounter
Expand Down
12 changes: 7 additions & 5 deletions inc/Kdict.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
#include <boost/archive/binary_oarchive.hpp>
#include <boost/archive/binary_iarchive.hpp>

#include "globals.h"
#include "Kcontainer.h"
#include "opaque.h"

template <class T>
class Kdict
Expand Down Expand Up @@ -70,12 +72,12 @@ class Kdict
this->merge_func = merge_func;
}

void add(char* kmer, T& obj) {
void add(const char* kmer, T obj) {
CHECK_KMER_LENGTH(kmer, m_k, "Kdict");
kc->kcontainer_add(kmer, obj, overwrite_merge_func);
}

bool contains(char* kmer) {
bool contains(const char* kmer) {
CHECK_KMER_LENGTH(kmer, m_k, "Kdict");
return kc->kcontainer_contains(kmer);
}
Expand All @@ -89,12 +91,12 @@ class Kdict
return kc->kcontainer_size();
}

void remove(char* kmer) {
void remove(const char* kmer) {
CHECK_KMER_LENGTH(kmer, m_k, "Kdict");
kc->kcontainer_remove(kmer);
}

T get(char* kmer) {
T& get(const char* kmer) {
CHECK_KMER_LENGTH(kmer, m_k, "Kdict");
return kc->kcontainer_get(kmer);
}
Expand Down Expand Up @@ -144,7 +146,7 @@ class Kdict
kc->parallel_kcontainer_add_init(threads, merge_func);
}

void parallel_add(const char* kmer, T& value) {
void parallel_add(const char* kmer, T value) {
kc->parallel_kcontainer_add(kmer, value);
}

Expand Down
2 changes: 1 addition & 1 deletion inc/Kset.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@

#include <boost/archive/binary_oarchive.hpp>
#include <boost/archive/binary_iarchive.hpp>
#include "globals.h"

#include "uint256_t.h"
#include "Kcontainer.h"
#include "globals.h"
#include "helper.h"

class Kset
Expand Down
1 change: 1 addition & 0 deletions inc/UContainer.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ class UC {
}
#else
ar & objs;

suffixes = (uint8_t*) calloc(objs.size() * CDEPTH, sizeof(uint8_t));
for(size_t i = 0; i < objs.size() * CDEPTH; i++) {
ar & suffixes[i];
Expand Down
2 changes: 1 addition & 1 deletion inc/Vertex.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
#include <functional>

#include <boost/serialization/split_member.hpp>
#include "globals.h"

#include "UContainer.h"
#include "globals.h"
//#include <jemalloc/jemalloc.h>
#include "uint256_t.h"
#include "uint128_t.h"
Expand Down
4 changes: 3 additions & 1 deletion inc/globals.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#pragma once

#include <stdint.h>

//#include <list>
#define CAPACITY 4096
#define NHASHES 12
#define HASHSIZE 512 // HASHSIZE % 32 must be 0
Expand All @@ -11,3 +11,5 @@ typedef int count_dtype;
#define MAXCOUNT UINT16_MAX

extern int CDEPTH;


40 changes: 40 additions & 0 deletions inc/opaque.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#if defined(KDICT) && defined(PYTHON)
#include <vector>
#include <set>

#include <pybind11/stl_bind.h>
PYBIND11_MAKE_OPAQUE(std::vector<int>);
PYBIND11_MAKE_OPAQUE(std::vector<float>);
PYBIND11_MAKE_OPAQUE(std::vector<bool>);
PYBIND11_MAKE_OPAQUE(std::vector<std::string>);
PYBIND11_MAKE_OPAQUE(std::vector<pybind11::object>);

PYBIND11_MAKE_OPAQUE(std::vector<std::vector<int>>);
PYBIND11_MAKE_OPAQUE(std::vector<std::vector<float>>);
PYBIND11_MAKE_OPAQUE(std::vector<std::vector<bool>>);
PYBIND11_MAKE_OPAQUE(std::vector<std::vector<std::string>>);
PYBIND11_MAKE_OPAQUE(std::vector<std::vector<pybind11::object>>);

/*
PYBIND11_MAKE_OPAQUE(std::list<int>);
PYBIND11_MAKE_OPAQUE(std::list<float>);
PYBIND11_MAKE_OPAQUE(std::list<bool>);
PYBIND11_MAKE_OPAQUE(std::list<std::string>);
PYBIND11_MAKE_OPAQUE(std::list<py::object>);
PYBIND11_MAKE_OPAQUE(std::list<int>>);
PYBIND11_MAKE_OPAQUE(std::list<std::list<float>>);
PYBIND11_MAKE_OPAQUE(std::list<std::list<bool>>);
PYBIND11_MAKE_OPAQUE(std::list<std::list<std::string>>);
PYBIND11_MAKE_OPAQUE(std::list<std::list<py::object>>);
*/
/*
PYBIND11_MAKE_OPAQUE(std::set<int>);
PYBIND11_MAKE_OPAQUE(std::set<float>);
PYBIND11_MAKE_OPAQUE(std::set<bool>);
PYBIND11_MAKE_OPAQUE(std::set<std::string>);
//PYBIND11_MAKE_OPAQUE(std::set<pybind11::object>);
*/

#endif

114 changes: 104 additions & 10 deletions kcollections/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,31 +5,75 @@

def create_kdict(base):
class tkdict(base):
def __init__(self, k=0):
def __init__(self, k=0, caster=None, seq_caster=None, rcaster=None):
super(tkdict, self).__init__(k)
self.caster = caster
self.rcaster = rcaster
self.seq_caster = seq_caster

def __str__( self ):
res = []
for key, val in self.items():
res.append( key + ':' + str( val ) )
return '{' + ','.join( res ) + '}'

'''
def __getitem__(self, key):
if key in self:
val = super(tkdict, self).__getitem__(key)
print(self.rcaster)
try:
return self.rcaster(val)
except:
return val
else:
raise KeyError("kmer {} not in kdict".format(key))
'''
def __setitem__(self, key, val):
#print('setitem')
if self.caster is not None:
#print('casting', val)
val = self.caster(val)
#print(type(val))
super(tkdict, self).__setitem__(key, val)

def __repr__( self ):
return self.__str__()

def items( self ):
return self.__iter__()

for kmer, val in self.__iter__():
try:
yield kmer, self.rcaster(val)
except:
yield kmer, val

def parallel_add_seq(self, seq, values):
if self.seq_caster:
values = self.seq_caster(map(self.caster, values))
super(tkdict, self).parallel_add_seq(seq, values)

def add_seq(self, seq, values):
if self.seq_caster:
values = self.seq_caster(map(self.caster, values))
super(tkdict, self).add_seq(seq, values)

def iteritems( self ):
return self.__iter__()
for kmer, val in self.__iter__():
try:
yield kmer, self.rcaster(val)
except:
yield kmer, val

def keys( self ):
for kmer, val in self.__iter__():
yield kmer

def values( self ):
for kmer, val in self.__iter__():
yield val
try:
yield self.rcaster(val)
except:
yield val

def copy( self ):
new_kdict = Kdict( self.k )
Expand All @@ -39,27 +83,42 @@ def copy( self ):

def get( self, key, value = None ):
if key in self:
return self[ key ]
val = self[key]
try:
return self.rcaster(val)
except:
return val
else:
return value

def popitem( self ):
kmer, item = next( self.items() )
del self[ kmer ]
try:
item = self.rcaster(item)
except:
pass
return ( kmer, item )

def setdefault( self, key, value = None ):
if key not in self:
self[ key ] = value
return value
else:
return self[ key ]
value = self[ key ]
try:
return self.rcaster(value)
except:
return value

def pop( self, key, *default ):
if key in self:
value = self[ key ]
del self[ key ]
return value
try:
return self.rcaster(value)
except:
return value
else:
if len( default ) > 0:
return default
Expand All @@ -74,7 +133,32 @@ def update( self, *others ):
except:
key = item
val = other[ key ]
self[ key ] = val
try:
self[ key ] = self.caster(val)
except:
self[key] = val

'''
def add_seq(self, seq, values):
if self.caster:
caster = self.caster.__name__
split = caster.find('_')
caster = eval(caster[:split] + '_vector' + caster[split:])
values = caster(values)
super(tkdict, self).add_seq(seq, values)
def parallel_add_seq(self, seq, values):
if self.caster:
print('casting')
caster = self.caster.__name__
split = caster.find('_')
caster = eval(caster[:split] + '_vector' + caster[split:])
print(caster)
values = caster(values)
print(type(values))
super(tkdict, self).parallel_add_seq(seq, values)
'''


return tkdict

Expand All @@ -83,9 +167,19 @@ def Kdict(val_type, k):
iter(val_type)
except:
type_name = 'Kdict_' + val_type.__name__
caster = None
rcaster = None
seq_caster = None
kd = create_kdict(eval(type_name))(k)
else:
type_name = 'Kdict_' + '_'.join([x.__name__ if x != list else 'vector' for x in val_type])
return create_kdict(eval(type_name))(k)
# NOTE: for now, we just store everything as a list
caster = 'o' + '_'.join([x.__name__ if x != list else 'vector' for x in val_type])
seq_caster = 'ovector_' + '_'.join([x.__name__ if x != list else 'vector' for x in val_type])
# NOTE: this does not allow for nexted collections
rcaster = val_type[0]
kd = create_kdict(eval(type_name))(k, eval(caster), eval(seq_caster), rcaster)
return kd


class Kset(KsetParent):
Expand Down
Loading

0 comments on commit 2e3d941

Please sign in to comment.