From 6b53c5655277f2c00f27f826d2ed7e6c6c722e9a Mon Sep 17 00:00:00 2001 From: Xu Shaohua Date: Fri, 10 Nov 2023 11:41:35 +0800 Subject: [PATCH] cii: Add atom --- cii/.gitignore | 2 ++ cii/CMakeLists.txt | 19 ++++++++++++ cii/include/cii/atom.h | 70 ++++++++++++++++++++++++++++++++++++++++++ cii/src/atom.c | 56 +++++++++++++++++++++++++++++++++ 4 files changed, 147 insertions(+) create mode 100644 cii/.gitignore create mode 100644 cii/CMakeLists.txt create mode 100644 cii/include/cii/atom.h create mode 100644 cii/src/atom.c diff --git a/cii/.gitignore b/cii/.gitignore new file mode 100644 index 00000000..999ff7b0 --- /dev/null +++ b/cii/.gitignore @@ -0,0 +1,2 @@ +/cmake-build-debug +/cmake-build-release \ No newline at end of file diff --git a/cii/CMakeLists.txt b/cii/CMakeLists.txt new file mode 100644 index 00000000..1503bf89 --- /dev/null +++ b/cii/CMakeLists.txt @@ -0,0 +1,19 @@ +cmake_minimum_required(VERSION 3.20) +project(cii C) + +set(CMAKE_C_STANDARD 11) + +set(SOURCE_FILES + src/atom.c + ) +set(HEADER_FILES + include/cii/atom.h + ) + +add_library(cii STATIC + ${SOURCE_FILES} + ${HEADER_FILES} + ) +target_include_directories(cii + PUBLIC include + ) \ No newline at end of file diff --git a/cii/include/cii/atom.h b/cii/include/cii/atom.h new file mode 100644 index 00000000..3925f617 --- /dev/null +++ b/cii/include/cii/atom.h @@ -0,0 +1,70 @@ +// Copyright (c) 2023 Xu Shaohua . All rights reserved. +// Use of this source is governed by GNU General Public License +// that can be found in the LICENSE file. + +#ifndef CII_ATOM_H_ +#define CII_ATOM_H_ + +/** + * An atom is a pointer to a unique, immutable sequence of zero or more + * arbitrary bytes. Most atoms are null-terminated strings, but a pointer to + * any sequence of bytes can be an atom. + * + * There is only a single occurrence(instance) of any atom, which is why it's + * called an atom. + * + * Two atoms are identical if they point to the same location. Comparing two byte + * sequences for equality by simply comparing pointers is one of the advantages + * of atoms. Another advantage is that using atoms saves space because there's + * only one occurrence of each sequence. + * + * Once an atom is created, it exists for the duration of the client's execution. + * An atom is always terminated with a null character. + */ + +#include +#include + +/** + * It adds a copy of the sequence to the table of atoms, if necessary, and + * returns the pointer to that atom. + * + * Never returns null pointer. + * + * @param str sequence of bytes, shall not be null pointer. + * @param len number of bytes + * @return the atom, which is a pointer to the copy of the sequence in the atom + * table. + */ +const char* atom_new(const char* str, size_t len); + +/** + * Like |atom_new|, it caters to the common use of character strings as atoms. + * + * Adds a copy of that string to the atom table, if necessary, and returns the + * pointer to that atom. + * @param str null-terminated string + * @return the atom, which is a pointer to the copy of the sequence in the atom + * table. + */ +const char* atom_string(const char* str); + +/** + * Returns the atom for the string representation of the integer |n|. + * @param n integer value to be represented as string. + * @return the atom, which is a pointer to the copy of the integer in the atom + * table. + */ +const char* atom_int(int64_t n); + +/** + * Returns length of its atom argument. + * + * Will raise error if |str| is not a pointer to an atom. + * + * @param str pointer to an atom. + * @return the string length of an atom. + */ +size_t atom_length(const char* str); + +#endif // CII_ATOM_H_ diff --git a/cii/src/atom.c b/cii/src/atom.c new file mode 100644 index 00000000..74565a11 --- /dev/null +++ b/cii/src/atom.c @@ -0,0 +1,56 @@ +// Copyright (c) 2023 Xu Shaohua . All rights reserved. +// Use of this source is governed by GNU General Public License +// that can be found in the LICENSE file. + +#include "cii/atom.h" + +#include +#include +#include + +struct atom_s { + struct atom_s* link; + size_t len; + char* str; +}; + +typedef struct atom_s atom_t; + +const size_t kBucketCap = 2048; +static atom_t g_buckets[kBucketCap]; + +const char* atom_string(const char* str) { + assert(str != NULL); + return atom_new(str, strlen(str)); +} + +const char* atom_int(int64_t n) { + uint64_t m; + if (n == INT64_MIN) { + m = INT64_MAX + 1UL; + } else if (n < 0) { + m = -n; + } else { + m = n; + } + + // 43 characters can hold decimal representation of 128-bit integers. + const size_t kBufLen = 43; + char buf[kBufLen]; + char* end_buf = buf + kBufLen; + char* s = end_buf; + while (m > 0) { + s -= 1; + *s = m % 10 + '0'; + m /= 10; + } + if (n < 0) { + s -= 1; + *s = '-'; + } + assert(s >= buf); + + return atom_new(s, end_buf - s); +} + +