Skip to content

Commit

Permalink
FEAT: not using hashing for small enough maps (reduces memory usage)
Browse files Browse the repository at this point in the history
related to: 7091c94
  • Loading branch information
Oldes committed Feb 4, 2024
1 parent dad1e5f commit da4718b
Show file tree
Hide file tree
Showing 4 changed files with 111 additions and 25 deletions.
7 changes: 2 additions & 5 deletions src/core/n-sets.c
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,8 @@ enum {
if (GET_FLAG(flags, SOP_BOTH)) i += VAL_LEN(val2);
retser = BUF_EMIT; // use preallocated shared block
Resize_Series(retser, i);
hret = Make_Hash_Array(i); // allocated

// Optimization note: !!
// This code could be optimized for small blocks by not hashing them
// and extending Find_Key to do a FIND on the value itself w/o the hash.
// don't hash small blocks...
hret = (i <= MIN_DICT) ? NULL : Make_Hash_Array(i); // allocated

do {
// Check what is in first series/map but not in second series/map:
Expand Down
49 changes: 49 additions & 0 deletions src/core/t-block.c
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,55 @@ static void No_Nones_Or_Logic(REBVAL *arg) {
}
}

/***********************************************************************
**
*/ REBCNT Find_Block_Key(REBSER* series, REBVAL* key, REBCNT skip, REBOOL cased)
/*
** Try to find the key value in the block.
**
** RETURNS: the index to the KEY or NOT_FOUND if there is none.
**
***********************************************************************/
{
REBSER* hser = series->series; // can be null
REBCNT* hashes = NULL;
REBCNT hash;
REBCNT n;
REBVAL* val;

val = BLK_HEAD(series);
if (ANY_WORD(key)) {
for (n = 0; n < series->tail; n += skip, val += skip) {
if (
ANY_WORD(val) && (
VAL_WORD_SYM(key) == VAL_BIND_SYM(val) ||
(!cased && VAL_WORD_CANON(key) == VAL_BIND_CANON(val))
)
) {
return n;
}
}
}
else if (ANY_BINSTR(key)) {
for (n = 0; n < series->tail; n += skip, val += skip) {
if (
VAL_TYPE(val) == VAL_TYPE(key)
&& 0 == Compare_String_Vals(key, val, (REBOOL)(!IS_BINARY(key) && !cased))
) {
return n;
}
}
}
else {
for (n = 0; n < series->tail; n += skip, val += skip) {
if (VAL_TYPE(val) == VAL_TYPE(key) && 0 == Cmp_Value(key, val, cased)) {
return n;
}
}
}
return NOT_FOUND;
}


/***********************************************************************
**
Expand Down
79 changes: 59 additions & 20 deletions src/core/t-map.c
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,8 @@
REBSER *blk = Make_Block(size*2);
REBSER *ser = 0;

ser = Make_Hash_Array(size);
// Use hashing only when there is more then MIN_DICT keys.
if (size > MIN_DICT) ser = Make_Hash_Array(size);

blk->series = ser;

Expand Down Expand Up @@ -124,6 +125,19 @@
REBCNT n;
REBVAL *val;

if (!hser) {
// If there are no hashes for the keys, use plain linear search...
hash = Find_Block_Key(series, key, wide, cased);
if (hash == NOT_FOUND) {
if (mode > 1) {
// Append new value the target series:
Append_Series(series, (REBYTE*)key, wide);
}
return -1;
}
return hash;
}

// Compute hash for value:
len = hser->tail;
hash = Hash_Value(key, len);
Expand Down Expand Up @@ -214,18 +228,43 @@
** and val is SET, create the entry and store the key and
** val.
**
** RETURNS: the index to the VALUE or zero if there is none.
** RETURNS: the index to the VALUE or NOT_FOUND if there is none.
**
***********************************************************************/
{
REBSER *hser = series->series; // can be null
REBCNT *hashes;
REBCNT *hashes = NULL;
REBCNT hash;
REBCNT n;
REBVAL *set;

if (IS_NONE(key) || hser == NULL) return 0;
if (IS_NONE(key)) return NOT_FOUND;

// We may not be large enough yet for the hash table to
// be worthwhile, so just do a linear search:
if (!hser) {
if (series->tail <= MIN_DICT * 2) {
hash = Find_Block_Key(series, key, 2, cased);
if (hash != NOT_FOUND) {
hash++; // position of the value
// Key already exists so update the value, if needed...
if (val) {
set = BLK_SKIP(series, hash);
*set = *val;
}
// Return
return hash;
}
if (!val) return NOT_FOUND;
hash /= 2;
goto new_entry;
}

// Add hash table:
//Print("hash added %d", series->tail);
series->series = hser = Make_Hash_Array(series->tail);
Rehash_Hash(series);
}
// Get hash table, expand it if needed:
if (series->tail > hser->tail/2) {
Expand_Hash(hser); // modifies size value
Expand All @@ -237,16 +276,17 @@
n = hashes[hash];

// Just a GET of value:
if (!val) return n;
if (!val) return ((n-1)*2)+1;

// Must set the value:
if (n) { // re-set it:
set = BLK_SKIP(series, ((n-1)*2)); // find the key
VAL_CLR_OPT(set++, OPTS_HIDE); // clear HIDE flag in case it was removed key; change to value position
*set = *val; // set the value
return n;
n = (n-1)*2; // index of the key
set = BLK_SKIP(series, n); // find the key
VAL_CLR_OPT(set++, OPTS_HIDE); // clear HIDE flag in case it was removed key; change to value position
*set = *val; // set the value
return n+1; // index of the value
}

new_entry:
// Create new entry:
#ifndef DO_NOT_NORMALIZE_MAP_KEYS
// append key
Expand Down Expand Up @@ -275,8 +315,8 @@
#endif
// append value
Append_Val(series, val); // no Copy_Series_Value(val) on strings

return (hashes[hash] = series->tail/2);
if (hashes) hashes[hash] = series->tail / 2; // Hash index is not a real index position of the value!
return series->tail; // Index of the new value.
}


Expand Down Expand Up @@ -320,9 +360,9 @@

n = Find_Entry(VAL_SERIES(data), pvs->select, val, FALSE);

if (!n) return PE_NONE;
if (n == NOT_FOUND) return PE_NONE;

pvs->value = VAL_BLK_SKIP(data, ((n-1)*2)+1);
pvs->value = VAL_BLK_SKIP(data, n);
return PE_OK;
}

Expand Down Expand Up @@ -507,8 +547,8 @@
case A_SELECT:
case A_FIND:
n = Find_Entry(series, arg, 0, Find_Refines(ds, AM_SELECT_CASE) ? AM_FIND_CASE : 0);
if (!n) return R_NONE;
*D_RET = *VAL_BLK_SKIP(val, ((n-1)*2)+((action == A_FIND)?0:1));
if (n == NOT_FOUND) return R_NONE;
*D_RET = *VAL_BLK_SKIP(val, n - ((action == A_FIND)?1:0)); // `find` returns the key
break;

case A_INSERT:
Expand All @@ -534,10 +574,9 @@
case A_REMOVE:
//O: throw an error if /part is used?
n = Find_Entry(series, D_ARG(ARG_REMOVE_KEY_ARG), 0, TRUE);
if (n) {
n = (n-1)*2;
VAL_SET_OPT(VAL_BLK_SKIP(val, n), OPTS_HIDE);
VAL_SET(VAL_BLK_SKIP(val, n+1), REB_NONE); // set value to none (so the old one may be GCed)
if (n != NOT_FOUND) {
VAL_SET_OPT(VAL_BLK_SKIP(val, n-1), OPTS_HIDE); // hide the key
VAL_SET(VAL_BLK_SKIP(val, n), REB_NONE); // set value to none (so the old one may be GCed)
}
return R_ARG1;

Expand Down
1 change: 1 addition & 0 deletions src/include/reb-config.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ These are now obsolete (as of A107) and should be removed:
#include REBOL_OPTIONS_FILE
#endif

#define MIN_DICT 16 // Maximum number of keys in a map withouth hashing

//* Common *************************************************************

Expand Down

0 comments on commit da4718b

Please sign in to comment.