Pipelined varint compression of a vector of 16 elements. #639

ealione · 2023-11-01T23:01:09Z

ealione
Nov 1, 2023

I have a vector of 16 elements that I'd like to compress using variable integer encoding. I am not entirely sure what a good pipelined design would be that would allow me to compress one vector per cycle.

I have tested a software-like algorithm that obviously is a bad fit for hardware:

function Tuple2 #(UInt #(5), UInt #(5)) getNumLen(UInt #(4) i);
    case (i)
        0: return tuple2(2, 1);
        1: return tuple2(1, 1);
        2: return tuple2(10, 28);
        3: return tuple2(1, 2);
        4: return tuple2(9, 28);
        5: return tuple2(7, 22);
        6: return tuple2(3, 9);
        7: return tuple2(1, 3);
        8: return tuple2(8, 29);
        9: return tuple2(5, 18);
        10: return tuple2(3, 11);
        11: return tuple2(5, 19);
        12: return tuple2(4, 16);
        13: return tuple2(3, 12);
        14: return tuple2(2, 8);
        15: return tuple2(1, 4);
    endcase
endfunction

rule encode;
    let d = pack(streamInQ.first);
    streamInQ.deq;
    Bit #(4) bits = 0;
    Bit #(64) buff = 0;
    UInt #(4) i = 1;
    UInt #(4) j = 0;
    while (i <= 15) begin
        let numlen = getNumLen(i);
        UInt #(5) num = tpl_1(numlen);
        UInt #(5) len = tpl_2(numlen);
        UInt #(48) k = 1; // maximum values in a group are 16, thus k in [1, 16!]
        Bit #(64) code = 0;
        while (num > 0) begin
            code = code + zeroExtend(pack(k)) * zeroExtend(d[i]);
            k = k * zeroExtend(i);
            num = num - 1;
            i = i + 1;
        end
        if (bits != 0) begin
            comp[j] <= truncate(buff | (code << bits));
            code = code >> 8 - bits;
            len = len - 8 - zeroExtend(unpack(bits));
            j = j + 1;
        end
        while (len > 7) begin
            comp[j] <= truncate(code);
            code = code >> 8;
            len = len - 8;
            j = j + 1;
        end
        buff = code;
        bits = truncate(pack(len));
    end
endrule: encode

Or another design that I thought of looks like so:

module mkStreamVarintEncoder (VarintCompressionIfc #(16, inType)) provisos(Bits #(inType, a__), Add #(b__, a__, 512));
	FIFO #(Vector #(16, inType))            streamInQ <- mkFIFO;
	FIFO #(Tuple2 #(Bit #(512), Bit #(10))) streamOutQ <- mkFIFO;
        ...
       rule encode_stage1 (stage == 0 && !is_last);
        let d = streamInQ.first; streamInQ.deq;
        Vector #(16, Bit #(2)) prefix;
        Bit #(10) b = 0;
        is_last <= lastQ.first;

        for (Integer i = 0; i < 16; i = i + 1) begin
            prefix[i] = count_bytes(d[i]);
            Bit #(10) l = zeroExtend(prefix[i]) + 1;
            pipe_shift_left[i].rotate(zeroExtend(pack(d[i])), truncate(b));
	    b = b + l;
        end
        ...
    endrule : encode_stage1

    rule encode_stage2 (stage == 0);
        Vector #(16, Bit #(512)) w = replicate(0);
        Bit #(512) d = 0;
        for (Integer i = 0; i < 16; i = i + 1) begin
            w[i] <- pipe_shift_left[i].get;
            d = d | w[i];
        end
        ...
    endrule : encode_stage2

     ...

    method Action put(Vector #(16, inType) data, Bool last);
        streamInQ.enq(data);
        lastQ.enq(last);
    endmethod : put

    method ActionValue #(Tuple2 #(Bit #(512), Bit #(10))) get;
        streamOutQ.deq;
        return streamOutQ.first;
    endmethod : get
endmodule : mkStreamVarintEncoder

I still want to check with the community for a good way of designing such a circuit. If someone finds the time I'd appreciate his thoughts on this, at a high level of course.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Pipelined varint compression of a vector of 16 elements. #639

{{title}}

Replies: 0 comments

Select a reply

Pipelined varint compression of a vector of 16 elements. #639

ealione Nov 1, 2023

Replies: 0 comments

ealione
Nov 1, 2023