-
Notifications
You must be signed in to change notification settings - Fork 121
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Showing
5 changed files
with
387 additions
and
277 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,262 @@ | ||
/* | ||
* Copyright 2018, Chanhee Park <[email protected]> and Daehwan Kim <[email protected]> | ||
* | ||
* This file is part of HISAT 2. | ||
* | ||
* HISAT 2 is free software: you can redistribute it and/or modify | ||
* it under the terms of the GNU General Public License as published by | ||
* the Free Software Foundation, either version 3 of the License, or | ||
* (at your option) any later version. | ||
* | ||
* HISAT 2 is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
* GNU General Public License for more details. | ||
* | ||
* You should have received a copy of the GNU General Public License | ||
* along with HISAT 2. If not, see <http://www.gnu.org/licenses/>. | ||
*/ | ||
|
||
#include "bit_packed_array.h" | ||
|
||
TIndexOffU BitPackedArray::get(size_t index) const | ||
{ | ||
assert_lt(index, cur_); | ||
|
||
pair<size_t, size_t> addr = indexToAddress(index); | ||
uint64_t *block = blocks_[addr.first]; | ||
pair<size_t, size_t> pos = columnToPosition(addr.second); | ||
TIndexOffU val = getItem(block, pos.first, pos.second); | ||
|
||
return val; | ||
} | ||
|
||
|
||
#define write_fp(x) fp.write((const char *)&(x), sizeof((x))) | ||
|
||
void BitPackedArray::writeFile(ofstream &fp) | ||
{ | ||
size_t sz = 0; | ||
|
||
write_fp(item_bit_size_); | ||
write_fp(elm_bit_size_); | ||
write_fp(items_per_block_bit_); | ||
write_fp(items_per_block_bit_mask_); | ||
write_fp(items_per_block_); | ||
|
||
write_fp(cur_); | ||
write_fp(sz_); | ||
|
||
write_fp(block_size_); | ||
|
||
// number of blocks | ||
sz = blocks_.size(); | ||
write_fp(sz); | ||
for(size_t i = 0; i < sz; i++) { | ||
fp.write((const char *)blocks_[i], block_size_); | ||
} | ||
} | ||
|
||
#define read_fp(x) fp.read((char *)&(x), sizeof((x))) | ||
|
||
void BitPackedArray::readFile(ifstream &fp) | ||
{ | ||
TIndexOffU val = 0; | ||
size_t val_sz = 0; | ||
|
||
read_fp(val_sz); | ||
rt_assert_eq(val_sz, item_bit_size_); | ||
|
||
read_fp(val_sz); | ||
rt_assert_eq(val_sz, elm_bit_size_); | ||
|
||
read_fp(val_sz); | ||
rt_assert_eq(val_sz, items_per_block_bit_); | ||
|
||
read_fp(val_sz); | ||
rt_assert_eq(val_sz, items_per_block_bit_mask_); | ||
|
||
read_fp(val_sz); | ||
rt_assert_eq(val_sz, items_per_block_); | ||
|
||
// skip cur_ | ||
size_t prev_cnt = 0; | ||
read_fp(prev_cnt); | ||
cur_ = 0; | ||
|
||
// skip sz_ | ||
size_t prev_sz = 0; | ||
read_fp(prev_sz); | ||
sz_ = 0; | ||
|
||
// block_size_ | ||
read_fp(val_sz); | ||
rt_assert_eq(val_sz, block_size_); | ||
|
||
// alloc blocks | ||
allocItems(prev_cnt); | ||
rt_assert_eq(prev_sz, sz_); | ||
|
||
// number of blocks | ||
read_fp(val_sz); | ||
rt_assert_eq(val_sz, blocks_.size()); | ||
for(size_t i = 0; i < blocks_.size(); i++) { | ||
fp.read((char *)blocks_[i], block_size_); | ||
} | ||
cur_ = prev_cnt; | ||
} | ||
|
||
void BitPackedArray::put(size_t index, TIndexOffU val) | ||
{ | ||
assert_lt(index, cur_); | ||
|
||
pair<size_t, size_t> addr = indexToAddress(index); | ||
uint64_t *block = blocks_[addr.first]; | ||
pair<size_t, size_t> pos = columnToPosition(addr.second); | ||
|
||
setItem(block, pos.first, pos.second, val); | ||
} | ||
|
||
void BitPackedArray::pushBack(TIndexOffU val) | ||
{ | ||
if(cur_ == sz_) { | ||
allocItems(items_per_block_); | ||
} | ||
|
||
put(cur_++, val); | ||
|
||
assert_leq(cur_, sz_); | ||
} | ||
|
||
TIndexOffU BitPackedArray::getItem(uint64_t *block, size_t idx, size_t offset) const | ||
{ | ||
size_t remains = item_bit_size_; | ||
|
||
TIndexOffU val = 0; | ||
|
||
while(remains > 0) { | ||
size_t bits = min(elm_bit_size_ - offset, remains); | ||
uint64_t mask = bitToMask(bits); | ||
|
||
// get value from block | ||
TIndexOffU t = (block[idx] >> offset) & mask; | ||
val = val | (t << (item_bit_size_ - remains)); | ||
|
||
remains -= bits; | ||
offset = 0; | ||
idx++; | ||
} | ||
|
||
return val; | ||
} | ||
|
||
void BitPackedArray::setItem(uint64_t *block, size_t idx, size_t offset, TIndexOffU val) | ||
{ | ||
size_t remains = item_bit_size_; | ||
|
||
while(remains > 0) { | ||
size_t bits = min(elm_bit_size_ - offset, remains); | ||
uint64_t mask = bitToMask(bits); | ||
uint64_t dest_mask = mask << offset; | ||
|
||
// get 'bits' lsb from val | ||
uint64_t t = val & mask; | ||
val >>= bits; | ||
|
||
// save 't' to block[idx] | ||
t <<= offset; | ||
block[idx] &= ~(dest_mask); // clear | ||
block[idx] |= t; | ||
|
||
idx++; | ||
remains -= bits; | ||
offset = 0; | ||
} | ||
} | ||
|
||
pair<size_t, size_t> BitPackedArray::indexToAddress(size_t index) const | ||
{ | ||
pair<size_t, size_t> addr; | ||
|
||
addr.first = index >> items_per_block_bit_; | ||
addr.second = index & items_per_block_bit_mask_; | ||
|
||
return addr; | ||
} | ||
|
||
pair<size_t, size_t> BitPackedArray::columnToPosition(size_t col) const { | ||
pair<size_t, size_t> pos; | ||
|
||
pos.first = (col * item_bit_size_) / elm_bit_size_; | ||
pos.second = (col * item_bit_size_) % elm_bit_size_; | ||
return pos; | ||
} | ||
|
||
void BitPackedArray::expand(size_t count) | ||
{ | ||
if((cur_ + count) > sz_) { | ||
allocItems(count); | ||
} | ||
|
||
cur_ += count; | ||
|
||
assert_leq(cur_, sz_); | ||
} | ||
|
||
void BitPackedArray::allocSize(size_t sz) | ||
{ | ||
size_t num_block = (sz * sizeof(uint64_t) + block_size_ - 1) / block_size_; | ||
|
||
for(size_t i = 0; i < num_block; i++) { | ||
uint64_t *ptr = new uint64_t[block_size_]; | ||
blocks_.push_back(ptr); | ||
sz_ += items_per_block_; | ||
} | ||
} | ||
|
||
void BitPackedArray::allocItems(size_t count) | ||
{ | ||
size_t sz = (count * item_bit_size_ + elm_bit_size_ - 1) / elm_bit_size_; | ||
allocSize(sz); | ||
} | ||
|
||
void BitPackedArray::init(size_t max_value) | ||
{ | ||
item_bit_size_ = ceil(log2(max_value)); | ||
elm_bit_size_ = sizeof(uint64_t) * 8; | ||
|
||
items_per_block_bit_ = 20; // 1M | ||
items_per_block_ = 1ULL << (items_per_block_bit_); | ||
items_per_block_bit_mask_ = items_per_block_ - 1; | ||
|
||
block_size_ = (items_per_block_ * item_bit_size_ + elm_bit_size_ - 1) / elm_bit_size_ * sizeof(uint64_t); | ||
|
||
cur_ = 0; | ||
sz_ = 0; | ||
} | ||
|
||
void BitPackedArray::dump() const | ||
{ | ||
cerr << "item_bit_size_: " << item_bit_size_ << endl; | ||
cerr << "block_size_: " << block_size_ << endl; | ||
cerr << "items_per_block_: " << items_per_block_ << endl; | ||
cerr << "cur_: " << cur_ << endl; | ||
cerr << "sz_: " << sz_ << endl; | ||
cerr << "number of blocks: " << blocks_.size() << endl; | ||
} | ||
|
||
size_t BitPackedArray::getMemUsage() const | ||
{ | ||
size_t tot = blocks_.size() * block_size_; | ||
tot += blocks_.totalCapacityBytes(); | ||
return tot; | ||
} | ||
|
||
BitPackedArray::~BitPackedArray() | ||
{ | ||
for(size_t i = 0; i < blocks_.size(); i++) { | ||
uint64_t *ptr = blocks_[i]; | ||
delete [] ptr; | ||
} | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
/* | ||
* Copyright 2018, Chanhee Park <[email protected]> and Daehwan Kim <[email protected]> | ||
* | ||
* This file is part of HISAT 2. | ||
* | ||
* HISAT 2 is free software: you can redistribute it and/or modify | ||
* it under the terms of the GNU General Public License as published by | ||
* the Free Software Foundation, either version 3 of the License, or | ||
* (at your option) any later version. | ||
* | ||
* HISAT 2 is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
* GNU General Public License for more details. | ||
* | ||
* You should have received a copy of the GNU General Public License | ||
* along with HISAT 2. If not, see <http://www.gnu.org/licenses/>. | ||
*/ | ||
|
||
#ifndef __HISAT2_BIT_PACKED_ARRAY_H | ||
#define __HISAT2_BIT_PACKED_ARRAY_H | ||
|
||
#include <iostream> | ||
#include <fstream> | ||
#include <limits> | ||
#include <map> | ||
#include "assert_helpers.h" | ||
#include "word_io.h" | ||
#include "mem_ids.h" | ||
#include "ds.h" | ||
|
||
using namespace std; | ||
|
||
class BitPackedArray { | ||
public: | ||
BitPackedArray () {} | ||
~BitPackedArray(); | ||
|
||
/** | ||
* Return true iff there are no items | ||
* @return | ||
*/ | ||
inline bool empty() const { return cur_ == 0; } | ||
inline size_t size() const { return cur_; } | ||
|
||
TIndexOffU get(size_t idx) const; | ||
|
||
inline TIndexOffU operator[](size_t i) const { return get(i); } | ||
void pushBack(TIndexOffU val); | ||
|
||
void init(size_t max_value); | ||
|
||
void writeFile(ofstream &fp); | ||
void readFile(ifstream &fp); | ||
|
||
void dump() const; | ||
|
||
size_t getMemUsage() const; | ||
|
||
private: | ||
void put(size_t index, TIndexOffU val); | ||
inline uint64_t bitToMask(size_t bit) const | ||
{ | ||
return (uint64_t) ((1ULL << bit) - 1); | ||
} | ||
|
||
TIndexOffU getItem(uint64_t *block, size_t idx, size_t offset) const; | ||
void setItem(uint64_t *block, size_t idx, size_t offset, TIndexOffU val); | ||
|
||
pair<size_t, size_t> indexToAddress(size_t index) const; | ||
pair<size_t, size_t> columnToPosition(size_t col) const; | ||
|
||
|
||
void expand(size_t count = 1); | ||
void allocSize(size_t sz); | ||
void allocItems(size_t count); | ||
|
||
|
||
private: | ||
size_t item_bit_size_; // item bit size(e.g. 33bit) | ||
|
||
size_t elm_bit_size_; // 64bit | ||
size_t items_per_block_bit_; | ||
size_t items_per_block_bit_mask_; | ||
size_t items_per_block_; // number of items in block | ||
|
||
size_t cur_; // current item count | ||
size_t sz_; // maximum item count | ||
|
||
size_t block_size_; // block size in byte | ||
|
||
// List of packed array | ||
EList<uint64_t *> blocks_; | ||
}; | ||
|
||
|
||
#endif //__HISAT2_BIT_PACKED_ARRAY_H |
Oops, something went wrong.