Skip to content

Commit

Permalink
cii: Add atom
Browse files Browse the repository at this point in the history
  • Loading branch information
XuShaohua committed Nov 10, 2023
1 parent a6022d3 commit 6b53c56
Show file tree
Hide file tree
Showing 4 changed files with 147 additions and 0 deletions.
2 changes: 2 additions & 0 deletions cii/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
/cmake-build-debug
/cmake-build-release
19 changes: 19 additions & 0 deletions cii/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
cmake_minimum_required(VERSION 3.20)
project(cii C)

set(CMAKE_C_STANDARD 11)

set(SOURCE_FILES
src/atom.c
)
set(HEADER_FILES
include/cii/atom.h
)

add_library(cii STATIC
${SOURCE_FILES}
${HEADER_FILES}
)
target_include_directories(cii
PUBLIC include
)
70 changes: 70 additions & 0 deletions cii/include/cii/atom.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
// Copyright (c) 2023 Xu Shaohua <[email protected]>. All rights reserved.
// Use of this source is governed by GNU General Public License
// that can be found in the LICENSE file.

#ifndef CII_ATOM_H_
#define CII_ATOM_H_

/**
* An atom is a pointer to a unique, immutable sequence of zero or more
* arbitrary bytes. Most atoms are null-terminated strings, but a pointer to
* any sequence of bytes can be an atom.
*
* There is only a single occurrence(instance) of any atom, which is why it's
* called an atom.
*
* Two atoms are identical if they point to the same location. Comparing two byte
* sequences for equality by simply comparing pointers is one of the advantages
* of atoms. Another advantage is that using atoms saves space because there's
* only one occurrence of each sequence.
*
* Once an atom is created, it exists for the duration of the client's execution.
* An atom is always terminated with a null character.
*/

#include <stdint.h>
#include <stddef.h>

/**
* It adds a copy of the sequence to the table of atoms, if necessary, and
* returns the pointer to that atom.
*
* Never returns null pointer.
*
* @param str sequence of bytes, shall not be null pointer.
* @param len number of bytes
* @return the atom, which is a pointer to the copy of the sequence in the atom
* table.
*/
const char* atom_new(const char* str, size_t len);

/**
* Like |atom_new|, it caters to the common use of character strings as atoms.
*
* Adds a copy of that string to the atom table, if necessary, and returns the
* pointer to that atom.
* @param str null-terminated string
* @return the atom, which is a pointer to the copy of the sequence in the atom
* table.
*/
const char* atom_string(const char* str);

/**
* Returns the atom for the string representation of the integer |n|.
* @param n integer value to be represented as string.
* @return the atom, which is a pointer to the copy of the integer in the atom
* table.
*/
const char* atom_int(int64_t n);

/**
* Returns length of its atom argument.
*
* Will raise error if |str| is not a pointer to an atom.
*
* @param str pointer to an atom.
* @return the string length of an atom.
*/
size_t atom_length(const char* str);

#endif // CII_ATOM_H_
56 changes: 56 additions & 0 deletions cii/src/atom.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// Copyright (c) 2023 Xu Shaohua <[email protected]>. All rights reserved.
// Use of this source is governed by GNU General Public License
// that can be found in the LICENSE file.

#include "cii/atom.h"

#include <assert.h>
#include <limits.h>
#include <string.h>

struct atom_s {
struct atom_s* link;
size_t len;
char* str;
};

typedef struct atom_s atom_t;

const size_t kBucketCap = 2048;
static atom_t g_buckets[kBucketCap];

const char* atom_string(const char* str) {
assert(str != NULL);
return atom_new(str, strlen(str));
}

const char* atom_int(int64_t n) {
uint64_t m;
if (n == INT64_MIN) {
m = INT64_MAX + 1UL;
} else if (n < 0) {
m = -n;
} else {
m = n;
}

// 43 characters can hold decimal representation of 128-bit integers.
const size_t kBufLen = 43;
char buf[kBufLen];
char* end_buf = buf + kBufLen;
char* s = end_buf;
while (m > 0) {
s -= 1;
*s = m % 10 + '0';
m /= 10;
}
if (n < 0) {
s -= 1;
*s = '-';
}
assert(s >= buf);

return atom_new(s, end_buf - s);
}


0 comments on commit 6b53c56

Please sign in to comment.