Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CQF is replaced by MQF #1859

Closed
wants to merge 17 commits into from
3 changes: 3 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ matrix:
osx_image: xcode7.3
env:
- TESTATTR="'not linux and not known_failing and not huge'"
- CC=gcc
- CXX=g++
- c++=g++
before_install:
- source ci_scripts/install.sh

Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ clean: FORCE
cd src/oxli && $(MAKE) clean || true
cd tests && rm -rf khmertest_* || true
rm -f pytests.xml
cd third-party/cqf && make clean || true
cd third-party/mqf && make clean || true
rm -f $(EXTENSION_MODULE)
rm -f khmer/*.pyc scripts/*.pyc tests/*.pyc oxli/*.pyc \
sandbox/*.pyc khmer/__pycache__/* sandbox/__pycache__/* \
Expand Down
6 changes: 5 additions & 1 deletion examples/c++-api/Makefile
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
CXXFLAGS=--std=c++11 \
-I ../../include/ \
-I ../../third-party/smhasher \
-I ../../third-party/cqf \
-I ../../third-party/mqf \
-I ../../third-party/seqan/core/include/ \
-I ../../third-party/rollinghash

TESTS=exact-counting bloom consume

ifneq ($(INCLUDE),)
INCLUDE := -I$(INCLUDE)
endif

%: %.cc ../../src/oxli/liboxli.a
$(CXX) $(CXXFLAGS) $< ../../src/oxli/liboxli.a -o $@

Expand Down
111 changes: 69 additions & 42 deletions include/oxli/storage.hh
Original file line number Diff line number Diff line change
Expand Up @@ -410,51 +410,78 @@ public:
*
* \brief A Quotient Filter storage
*/
class QFStorage : public Storage {
class QFStorage : public Storage
{
protected:
QF cf;
QF mf;

public:
QFStorage(int size) {
// size is the power of two to specify the number of slots in
// the filter (2**size). Third argument sets the number of bits used
// in the key (current value of size+8 is copied from the CQF example)
// Final argument is the number of bits allocated for the value, which
// we do not use.
qf_init(&cf, (1ULL << size), size+8, 0);
}

~QFStorage() { qf_destroy(&cf); }

BoundedCounterType test_and_set_bits(HashIntoType khash) {
BoundedCounterType x = get_count(khash);
add(khash);
return !x;
}

//
bool add(HashIntoType khash) {
bool is_new = get_count(khash) == 0;
qf_insert(&cf, khash % cf.range, 0, 1);
return is_new;
}

// get the count for the given k-mer hash.
const BoundedCounterType get_count(HashIntoType khash) const {
return qf_count_key_value(&cf, khash % cf.range, 0);
}

// Accessors for protected/private table info members
// xnslots is larger than nslots. It includes some extra slots to deal
// with some details of how the counting is implemented
std::vector<uint64_t> get_tablesizes() const { return {cf.xnslots}; }
const size_t n_tables() const { return 1; }
const uint64_t n_unique_kmers() const { return cf.ndistinct_elts; }
const uint64_t n_occupied() const { return cf.noccupied_slots; }
void save(std::string outfilename, WordLength ksize);
void load(std::string infilename, WordLength &ksize);

Byte **get_raw_tables() { return nullptr; }
QFStorage(int size)
{
// size is the power of two to specify the number of slots in
// the filter (2**size). Third argument sets the number of bits used
// in the key (current value of size+8 is copied from the CQF example)
// Final argument is the number of bits allocated for the value, which
// we do not use.
_supports_bigcount = true;
qf_init(&mf, (1ULL << size), size+8, 0,2,true,"",2038074761);



}

~QFStorage()
{
qf_destroy(&mf);
}

BoundedCounterType test_and_set_bits(HashIntoType khash)
{
BoundedCounterType x = get_count(khash);
add(khash);
return !x;
}

//
bool add(HashIntoType khash)
{
bool is_new = get_count(khash) == 0;
qf_insert(&mf, khash % mf.metadata->range, 1,false,false);
return is_new;
}

// get the count for the given k-mer hash.
const BoundedCounterType get_count(HashIntoType khash) const
{
return qf_count_key(&mf, khash % mf.metadata->range);
}

// Accessors for protected/private table info members
// xnslots is larger than nslots. It includes some extra slots to deal
// with some details of how the counting is implemented
std::vector<uint64_t> get_tablesizes() const
{
return {mf.metadata->xnslots};
}
const size_t n_tables() const
{
return 1;
}
const uint64_t n_unique_kmers() const
{
return mf.metadata->ndistinct_elts;
}
const uint64_t n_occupied() const
{
return mf.metadata->noccupied_slots;
}
void save(std::string outfilename, WordLength ksize);
void load(std::string infilename, WordLength &ksize);

Byte **get_raw_tables()
{
return nullptr;
}
};


Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ undef = NO_UNIQUE_RC
# docker/Dockerfile
# libraries = z,bz2
## if using system libraries
include-dirs = include:third-party/zlib:third-party/bzip2:third-party/seqan/core/include:third-party/smhasher:third-party/cqf:third-party/rollinghash
include-dirs = include:third-party/zlib:third-party/bzip2:third-party/seqan/core/include:third-party/smhasher:third-party/rollinghash:third-party/mqf
# include-dirs = lib
## if using system libraries (broken)

Expand Down
6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,10 +330,10 @@ def run(self):
if sys.platform == 'darwin' and 'gcov' in self.libraries:
self.libraries.remove('gcov')

cqfcmd = ['bash', '-c', 'cd third-party/cqf && make']
spawn(cmd=cqfcmd, dry_run=self.dry_run)
mqfcmd = ['bash', '-c', 'cd third-party/mqf && make']
spawn(cmd=mqfcmd, dry_run=self.dry_run)
for ext in self.extensions:
ext.extra_objects.append(path_join("third-party", "cqf", "gqf.o"))
ext.extra_objects.append(path_join("third-party", "mqf", "gqf.o"))

if "z" not in self.libraries:
zcmd = ['bash', '-c', 'cd ' + ZLIBDIR + ' && ( test Makefile -nt'
Expand Down
26 changes: 14 additions & 12 deletions src/oxli/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,9 @@ PREFIX=/usr/local

INCLUDES= -I ../../include/ -I ../../third-party/seqan/core/include/ \
-I ../../third-party/smhasher/ \
-I ../../third-party/cqf/ \
-I ../../third-party/rollinghash
-I ../../third-party/mqf/ \
-I ../../third-party/rollinghash \


ifeq ($(USE_SYSTEM_ZLIB), false)
INCLUDES += -I ../../third-party/zlib/
Expand Down Expand Up @@ -102,6 +103,7 @@ CFLAGS += -Wshadow -Wcast-align -Wstrict-prototypes
CFLAGS += $(INCLUDES) $(CPPFLAGS)

LDFLAGS ?=
LDFLAGS += -lstdc++
ifneq ($(USE_SYSTEM_ZLIB), false)
LDFLAGS += -lz
endif
Expand Down Expand Up @@ -221,10 +223,10 @@ BZIP2_OBJS_BASE= \
BZIP2_OBJS=$(addprefix $(BZIP2_DIR)/, $(BZIP2_OBJS_BASE))

# Counting bloom filter
CQF_DIR=../../third-party/cqf
CQF_OBJS_BASE= gqf.o
MQF_DIR=../../third-party/mqf
MQF_OBJS_BASE= gqf.o

CQF_OBJS=$(addprefix $(CQF_DIR)/, $(CQF_OBJS_BASE))
MQF_OBJS=$(addprefix $(MQF_DIR)/, $(MQF_OBJS_BASE))

#### oxli proper below here ####

Expand Down Expand Up @@ -259,9 +261,9 @@ PRECOMILE_OBJS += $(BZIP2_OBJS)
PRECLEAN_TARGS += libbz2clean
endif

LIBOXLI_OBJS += $(CQF_OBJS)
PRECOMILE_OBJS += $(CQF_OBJS)
PRECLEAN_TARGS += libcqfclean
LIBOXLI_OBJS += $(MQF_OBJS)
PRECOMILE_OBJS += $(MQF_OBJS)
PRECLEAN_TARGS += libmqfclean

HEADERS= \
hashtable.hh \
Expand Down Expand Up @@ -290,8 +292,8 @@ zlibclean:
(cd $(ZLIB_DIR) && make distclean)
libbz2clean:
(cd $(BZIP2_DIR) && make -f Makefile-libbz2_so clean)
libcqfclean:
(cd $(CQF_DIR) && make clean)
libmqfclean:
(cd $(MQF_DIR) && make clean)

clean: $(PRECLEAN_TARGS)
rm -f *.o *.a *.$(SHARED_EXT)* oxli.pc $(TEST_PROGS)
Expand All @@ -315,8 +317,8 @@ $(ZLIB_OBJS):
$(BZIP2_OBJS):
(cd $(BZIP2_DIR) && make -f Makefile-libbz2_so $(BZIP2_OBJS_BASE))

$(CQF_OBJS):
(cd $(CQF_DIR) && make)
$(MQF_OBJS):
(cd $(MQF_DIR) && make)

# MurMur3
murmur3.o: ../../third-party/smhasher/MurmurHash3.cc
Expand Down
72 changes: 20 additions & 52 deletions src/oxli/storage.cc
Original file line number Diff line number Diff line change
Expand Up @@ -923,34 +923,14 @@ void QFStorage::save(std::string outfilename, WordLength ksize)
unsigned char version = SAVED_FORMAT_VERSION;
unsigned char ht_type = SAVED_QFCOUNT;


outfile.write(SAVED_SIGNATURE, 4);
outfile.write((const char *) &version, 1);
outfile.write((const char *) &ht_type, 1);
outfile.write((const char *) &ksize, sizeof(ksize));

/* just a hack to handle __uint128_t value. Don't know a better to handle it
* right now */
uint64_t tmp_range;
tmp_range = cf.range;

outfile.write((const char *) &cf.nslots, sizeof(cf.nslots));
outfile.write((const char *) &cf.xnslots, sizeof(cf.xnslots));
outfile.write((const char *) &cf.key_bits, sizeof(cf.key_bits));
outfile.write((const char *) &cf.value_bits, sizeof(cf.value_bits));
outfile.write((const char *) &cf.key_remainder_bits, sizeof(cf.key_remainder_bits));
outfile.write((const char *) &cf.bits_per_slot, sizeof(cf.bits_per_slot));
outfile.write((const char *) &tmp_range, sizeof(tmp_range));
outfile.write((const char *) &cf.nblocks, sizeof(cf.nblocks));
outfile.write((const char *) &cf.nelts, sizeof(cf.nelts));
outfile.write((const char *) &cf.ndistinct_elts, sizeof(cf.ndistinct_elts));
outfile.write((const char *) &cf.noccupied_slots, sizeof(cf.noccupied_slots));

#if BITS_PER_SLOT == 8 || BITS_PER_SLOT == 16 || BITS_PER_SLOT == 32 || BITS_PER_SLOT == 64
outfile.write((const char *) cf.blocks, sizeof(qfblock) * cf.nblocks);
#else
outfile.write((const char *) cf.blocks,
(sizeof(qfblock) + SLOTS_PER_BLOCK * cf.bits_per_slot / 8) * cf.nblocks);
#endif
outfile.write((const char *)mf.metadata,sizeof(qfmetadata));
outfile.write((const char *)mf.blocks,mf.metadata->size);
outfile.close();
}

Expand Down Expand Up @@ -1011,34 +991,22 @@ void QFStorage::load(std::string infilename, WordLength &ksize)
infile.read((char *) &save_ksize, sizeof(save_ksize));
ksize = save_ksize;

infile.read((char *) &cf.nslots, sizeof(cf.nslots));
infile.read((char *) &cf.xnslots, sizeof(cf.xnslots));
infile.read((char *) &cf.key_bits, sizeof(cf.key_bits));
infile.read((char *) &cf.value_bits, sizeof(cf.value_bits));
infile.read((char *) &cf.key_remainder_bits, sizeof(cf.key_remainder_bits));
infile.read((char *) &cf.bits_per_slot, sizeof(cf.bits_per_slot));
infile.read((char *) &tmp_range, sizeof(tmp_range));

infile.read((char *) &cf.nblocks, sizeof(cf.nblocks));
infile.read((char *) &cf.nelts, sizeof(cf.nelts));
infile.read((char *) &cf.ndistinct_elts, sizeof(cf.ndistinct_elts));
infile.read((char *) &cf.noccupied_slots, sizeof(cf.noccupied_slots));
/* just a hack to handle __uint128_t value. Don't know a better to handle it
* right now */
cf.range = tmp_range;
// deallocate previously allocated blocks
free(cf.blocks);
/* allocate the space for the actual qf blocks */
#if BITS_PER_SLOT == 8 || BITS_PER_SLOT == 16 || BITS_PER_SLOT == 32 || BITS_PER_SLOT == 64
cf.blocks = (qfblock *)calloc(cf.nblocks, sizeof(qfblock));
#else
cf.blocks = (qfblock *)calloc(cf.nblocks, sizeof(qfblock) + SLOTS_PER_BLOCK * cf.bits_per_slot / 8);
#endif
#if BITS_PER_SLOT == 8 || BITS_PER_SLOT == 16 || BITS_PER_SLOT == 32 || BITS_PER_SLOT == 64
infile.read((char *) cf.blocks, sizeof(qfblock) * cf.nblocks);
#else
infile.read((char *) cf.blocks,
(sizeof(qfblock) + SLOTS_PER_BLOCK * cf.bits_per_slot / 8) * cf.nblocks);
#endif
mf.mem = (qfmem *)calloc(sizeof(qfmem), 1);
mf.metadata = (qfmetadata *)calloc(sizeof(qfmetadata), 1);
infile.read((char*)mf.metadata,sizeof(qfmetadata));
mf.blocks = (qfblock *)calloc(mf.metadata->size, 1);
infile.read((char*)mf.blocks, mf.metadata->size);

mf.metadata->num_locks =
10;//should be changed to something realistic like function qf_deserialize
mf.mem->metadata_lock = 0;
/* initialize all the locks to 0 */
mf.mem->locks = (volatile int *)calloc(mf.metadata->num_locks,
sizeof(volatile int));





infile.close();
}
Loading