Description
Hi, I'm trying to use that C binding in SPDK.
I'm trying to test the movement before that,
Here is the code I use:
`#include <stdio.h>
#include <stdlib.h>
#include "tokenizers_c.h"
#include <string.h>
#include <assert.h>
size_t file_length(FILE *file){
assert(file);
fseek(file, 0, SEEK_END);
size_t length = ftell(file);
rewind(file);
return length;
}
//
char* read_file(const char* filename, size_t* out_size) {
FILE* file = fopen(filename, "r");
if (!file) {
printf("Error: Cannot open file: %s\n", filename);
return NULL;
}
out_size = file_length(file);
char buffer = (char*)malloc(*out_size + 1);
if (!buffer) {
printf("Memory allocation failed!\n");
fclose(file);
return NULL;
}
size_t read_bytes = fread(buffer, 1, *out_size, file);
fclose(file);
if (read_bytes != *out_size) {
printf("Error: File read mismatch (%zu != %zu)\n", read_bytes, *out_size);
free(buffer);
return NULL;
}
buffer[*out_size] = '\0'; //
return buffer;
}
int main() {
//
size_t vocab_len, merge_len;
char* vocab = read_file("./tokenizer_files/vocab.json", &vocab_len);
char* merges = read_file("./tokenizer_files/merges.json", &merge_len);
if (!vocab || !merges) {
printf("Error: Failed to load vocab or merges file.\n");
return 1;
}
printf("vocab.json & merges.json successfully loaded! (Vocab: %zu bytes, Merges: %zu bytes)\n", vocab_len, merge_len);
//
TokenizerHandle tokenizer = byte_level_bpe_tokenizers_new_from_str(vocab, vocab_len, merges, merge_len, NULL, 0);
free(vocab);
free(merges);
if (!tokenizer) {
printf("Tokenizer creation failed!\n");
return 1;
}
printf("Tokenizer successfully created!\n");
//
const char* test_sentence = "Hello, this is a BPE tokenizer test!";
TokenizerEncodeResult result;
tokenizers_encode(tokenizer, test_sentence, strlen(test_sentence), 1, &result);
if (!result.token_ids || result.len == 0) {
printf("❌ Tokenization failed!\n");
tokenizers_free(tokenizer);
return 1;
}
//
printf("Tokenized: ");
for (size_t i = 0; i < result.len; i++) {
printf("%d ", result.token_ids[i]);
}
printf("\n");
//
tokenizers_free_encode_results(&result, 1);
tokenizers_free(tokenizer);
printf(" Tokenizer cleanup completed.\n");
return 0;
}`
However, the following error occurs:
thread '<unnamed>' panicked at src/lib.rs:38:75: called
Result::unwrap()on an
Errvalue: Error("EOF while parsing a value", line: 1, column: 0) note: run with
RUST_BACKTRACE=1` environment variable to display a backtrace
thread '' panicked at core/src/panicking.rs:221:5:
panic in a function that cannot unwind
stack backtrace:
0: 0x7fae7cd9f1fa - std::backtrace_rs::backtrace::libunwind::trace::h5a5b8284f2d0c266
at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/std/src/../../backtrace/src/backtrace/libunwind.rs:116:5
1: 0x7fae7cd9f1fa - std::backtrace_rs::backtrace::trace_unsynchronized::h76d4f1c9b0b875e3
at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/std/src/../../backtrace/src/backtrace/mod.rs:66:5
2: 0x7fae7cd9f1fa - std::sys::backtrace::_print_fmt::hc4546b8364a537c6
at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/std/src/sys/backtrace.rs:66:9
3: 0x7fae7cd9f1fa - <std::sys::backtrace::BacktraceLock::print::DisplayBacktrace as core::fmt::Display>::fmt::h5b6bd5631a6d1f6b
at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/std/src/sys/backtrace.rs:39:26
4: 0x7fae7cded593 - core::fmt::rt::Argument::fmt::h270f6602a2b96f62
at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/core/src/fmt/rt.rs:177:76
5: 0x7fae7cded593 - core::fmt::write::h7550c97b06c86515
at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/core/src/fmt/mod.rs:1186:21
6: 0x7fae7cd935c3 - std::io::Write::write_fmt::h7b09c64fe0be9c84
at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/std/src/io/mod.rs:1839:15
7: 0x7fae7cd9f042 - std::sys::backtrace::BacktraceLock::print::h2395ccd2c84ba3aa
at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/std/src/sys/backtrace.rs:42:9
8: 0x7fae7cda164a - std::panicking::default_hook::{{closure}}::he19d4c7230e07961
at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/std/src/panicking.rs:268:22
9: 0x7fae7cda1490 - std::panicking::default_hook::hf614597d3c67bbdb
at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/std/src/panicking.rs:295:9
10: 0x7fae7cda1c87 - std::panicking::rust_panic_with_hook::h8942133a8b252070
at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/std/src/panicking.rs:801:13
11: 0x7fae7cda1ae6 - std::panicking::begin_panic_handler::{{closure}}::hb5f5963570096b29
at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/std/src/panicking.rs:667:13
12: 0x7fae7cd9f6d9 - std::sys::backtrace::__rust_end_short_backtrace::h6208cedc1922feda
at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/std/src/sys/backtrace.rs:170:18
13: 0x7fae7cda17ac - rust_begin_unwind
at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/std/src/panicking.rs:665:5
14: 0x7fae7caa82dd - core::panicking::panic_nounwind_fmt::runtime::h1f507a806003dfb2
at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/core/src/panicking.rs:112:18
15: 0x7fae7caa82dd - core::panicking::panic_nounwind_fmt::h357fc035dc231634
at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/core/src/panicking.rs:122:5
16: 0x7fae7caa8372 - core::panicking::panic_nounwind::hd0dad372654c389a
at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/core/src/panicking.rs:221:5
17: 0x7fae7caa8536 - core::panicking::panic_cannot_unwind::h65aefd062253eb19
at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/core/src/panicking.rs:310:5
18: 0x7fae7cab1cbb - byte_level_bpe_tokenizers_new_from_str
19: 0x55d8368165ad - main
20: 0x7fae7c63e083 - __libc_start_main
at /build/glibc-FcRMwW/glibc-2.31/csu/../csu/libc-start.c:308:16
21: 0x55d8368162ae - _start
22: 0x0 -
thread caused non-unwinding panic. aborting.
`
I would like to know more about examples of using C language headers, or to get advice on errors. Thank you.