Skip to content

Commit

Permalink
For all 4 adix/oats test programs:
Browse files Browse the repository at this point in the history
  - Change behavior on token overflow to truncate not skip and always
    print the too long length and a truncated version of it.

  - Make programs more lexically similar so that side-by-side's like
        `diff tests/ucl.nim util/lfreq.nim` to add counts
        `diff tests/ucl.nim tests/wu.nim` to add words
        `diff tests/wu.nim tests/wfr.nim` to add both words & counts
    are all less noisy.  (Probably still not perfect, but closer.)
  • Loading branch information
c-blake committed Sep 27, 2024
1 parent 806e21f commit 5805dc7
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 26 deletions.
28 changes: 13 additions & 15 deletions tests/ucl.nim
Original file line number Diff line number Diff line change
Expand Up @@ -12,42 +12,40 @@ type
dat: seq[Count]
nUsed: int

var a = " "; oatKStack a, Counts, Count, off,uint32, MSlice, MSlice
#proc key(c: var Counts, i: int, q: MSlice) = c.dat[i]=c.keyR(q) wrong&unneeded
var s = " "; oatKStack s, Counts, Count, off,uint32, MSlice, MSlice
proc key(c: Counts, i: int): MSlice = c.dat[i].key
proc used(c: Counts, i: int): bool = c.dat[i].off!=0

when defined hashCache: # def auto-triggers use
proc hash(ms: MSlice): Hash = mslice.hash(ms).uint32.Hash
proc hash(c: var Counts, i: int, hc: Hash) {.used.} = c.dat[i].hc = hc.uint32
proc hash(c: Counts, i: int): Hash = c.dat[i].hc.Hash

oatCounted c,Counts, c.nUsed; oatSeq Counts, dat # make counted & resizable
when Counts is ROat[MSlice, MSlice]: {.warning: "Counts is a ROat"}

proc incFailed(h: var Counts, r: MSlice): bool =
if r.len + 1 > 1 shl bLen: # Careful to not overflow
erru "skipping too long(", $r.len, ") line: ",$r,"\n"
return # Cannot go on LOCALLY
h.upSert(r, i): discard # Found key @i: nothing to do
proc incFailed(h: var Counts, ms: MSlice): bool =
var ms = ms
if ms.len > (1 shl bLen) - 1: # Careful to not overflow XXX rate limit msgs
erru "truncating too long (", $ms.len, ") line: ", ($ms)[0..<256], "...\n"
ms.len = (1 shl bLen) - 1 # Truncation makes count potentially off
h.upSert(ms, i): discard # Found key @i: nothing to do
do: # Novel key->i:
h.dat[i].off = a.add(r, (1 shl bOff) - 1):
erru "unique word data overflow at:",$r,"\n" #XXX rate limit msgs
h.dat[i].off = s.add(ms, (1 shl bOff) - 1):
erru "unique word data overflow at:",$ms,"\n" #XXX rate limit msgs
return true # Cannot go on GLOBALLY
h.dat[i].len = r.len.uint32 # Init
h.dat[i].len = ms.len.uint32 # Init

proc ucl(size=9999, dSize=81920, tm=false) =
## Count unique & total lines on `stdin`. <256B long; <16 MiB unique data.
let t0 = if tm: epochTime() else: 0.0
var h: Counts; h.setCap size # Pre-size table & data
a.setLen dSize; a.setLen 1
var h: Counts; h.setCap size # pre-size table & data
s.setLen dSize; s.setLen 1
var nTot = 0
block IO:
for (line, nLine) in stdin.getDelims:
let ms = MSlice(mem: line, len: nLine - 1)
inc nTot # Always bump `nTotal`
if h.incFailed(ms): break IO
echo h.len," unique ",nTot," total ",a.len," B"
echo h.len," unique ",nTot," total ",s.len," B"
if tm: stderr.write epochTime() - t0, "\n"

when isMainModule: dispatch ucl, help={
Expand Down
7 changes: 4 additions & 3 deletions tests/wfr.nim
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,10 @@ oatCounted c,Counts, c.nUsed; oatSeq Counts, dat # make counted & resizable
when Counts is VROat[MSlice, MSlice, uint32]: {.warning: "Counts is a VROat"}

proc incFailed(h: var Counts, ms: MSlice): bool =
if ms.len > (1 shl bLen) - 1: # Careful to not overflow
erru "skipping too long word: ",$ms,"\n"
return # Cannot go on LOCALLY
var ms = ms
if ms.len > (1 shl bLen) - 1: # Careful to not overflow XXX rate limit msgs
erru "truncating too long (", $ms.len, ") word: ", ($ms)[0..<32], "...\n"
ms.len = (1 shl bLen) - 1 # Truncation makes count potentially off
h.upSert(ms, i): # Found key @i:
if h.dat[i].cnt == (1 shl bCnt) - 1:
erru "counter overflow for: ",$ms,"\n" # no update XXX rate limit
Expand Down
7 changes: 4 additions & 3 deletions tests/wu.nim
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,10 @@ oatCounted c,Counts, c.nUsed; oatSeq Counts, dat # make counted & resizable
when Counts is ROat[MSlice, MSlice]: {.warning: "Counts is a ROat"}

proc incFailed(h: var Counts, ms: MSlice): bool =
if ms.len > (1 shl bLen) - 1: # Careful to not overflow
erru "skipping too long word: ",$ms,"\n"
return # Cannot go on LOCALLY
var ms = ms
if ms.len > (1 shl bLen) - 1: # Careful to not overflow XXX rate limit msgs
erru "truncating too long (", $ms.len, ") word: ", ($ms)[0..<32], "...\n"
ms.len = (1 shl bLen) - 1
h.upSert(ms, i): discard # Found key @i:
do: # Novel key->i:
h.dat[i].off = s.add(ms, (1 shl bOff) - 1):
Expand Down
10 changes: 5 additions & 5 deletions util/lfreq.nim
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,16 @@ oatCounted c,Counts, c.nUsed; oatSeq Counts, dat # make counted & resizable
when Counts is VROat[MSlice, MSlice, uint32]: {.warning: "Counts is a VROat"}

proc incFailed(h: var Counts, ms: MSlice): bool =
if ms.len > (1 shl bLen) - 1: # Careful to not overflow
erru "skipping too long line: ", ($ms)[0..<128], "...\n"
return false # Cannot go on LOCALLY
var ms = ms
if ms.len > (1 shl bLen) - 1: # Careful to not overflow XXX rate limit msgs
erru "truncating too long (", $ms.len, ") line: ", ($ms)[0..<128], "...\n"
h.upSert(ms, i): # Found key @i:
if h.dat[i].cnt == (1 shl bCnt) - 1:
erru "counter overflow for: ",$ms,"\n" # no update XXX rate limit
erru "counter overflow for: ",$ms,"\n" # no update XXX rate limit msgs
else: h.dat[i].cnt.inc # bump
do: # Novel key->i:
h.dat[i].off = s.add(ms, (1 shl bOff) - 1):
erru "unique line data overflow at:",$ms,"\n" #XXX rate limit
erru "unique line data overflow at:",$ms,"\n" #XXX rate limit msgs
return true # Cannot go on GLOBALLY
h.dat[i].len = ms.len.uint32# Init
h.dat[i].cnt = 1u32
Expand Down

0 comments on commit 5805dc7

Please sign in to comment.