diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..d2c5772 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,5 @@ +language: c + +script: + - make + - make test diff --git a/README.md b/README.md index 4c8af4c..3f8e03b 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,6 @@ -Netstring parsing -================= +Netstring for C +=============== +[![Build Status](https://travis-ci.org/liteserver/netstring-c.svg?branch=master)](https://travis-ci.org/liteserver/netstring-c) A [netstring](http://en.wikipedia.org/wiki/Netstring) is a way of encoding a sequence of bytes for transmission over a network, or for serialization. They're very easy to work with. They encode the data's length, and can be concatenated trivially. The format was [defined by D. J. Bernstein](http://cr.yp.to/proto/netstrings.txt) and is used in various software. Examples of netstrings: @@ -18,19 +19,65 @@ Basic API All the code is in `netstring.c` and `netstring.h`, and these have no external dependencies. To use them, just include them in your application. Include `netstring.h` and link with the C code. -**Parsing netstrings** +Creating netstrings +------------------- -To parse a netstring, use `netstring_read()`. +You can create your netstrings manually like in this example: - int netstring_read(char *buffer, size_t buffer_length, - char **netstring_start, size_t *netstring_length); +```C +sprintf(buf, "%lu:%s,", strlen(str), str); +``` + +This code provides a convenience function for creating netstrings: + +```C +size_t netstring_add(char **netstring, char *data); +``` + +Here is how to use it: + +```C +char *netstring=0; /* we must initialize it to zero */ + +netstring_add(&netstring, "first"); +netstring_add(&netstring, "second"); +netstring_add(&netstring, "third"); + +do_something(netstring); + +free(netstring); /* we must free after using it */ +``` + +The extended version `netstring_add_ex` accepts a string length as the last argument: + +```C +size_t netstring_add_ex(char **netstring, char *data, size_t len); +``` + +This allocates and creates a netstring containing the first `len` bytes of `data`. If `len` is 0 then no data will be read from `data`, and it may be null. + +Parsing netstrings +------------------ + +To parse a netstring use `netstring_read()`: + +```C +int netstring_read(char **buffer_start, size_t *buffer_length, + char **netstring_start, size_t *netstring_length); +``` + +It reads a netstring from `buffer_start` of initial `buffer_length` and writes +to `netstring_start` a pointer to the beginning of the string in the +buffer and to `netstring_length` the length of the string. It also updates +the `buffer_start` to the start of the next netstring item and `buffer_length` +to the number of remaining bytes not processed in the buffer. + +It does not allocate any memory. -Reads a netstring from a `buffer` of length `buffer_length`. Writes to -`netstring_start` a pointer to the beginning of the string in the -buffer, and to `netstring_length` the length of the string. Does not -allocate any memory. If it reads successfully, then it returns 0. If -there is an error, then the return value will be negative. The error -values are: +### Return Value + +If it reads successfully then it returns 0. If there is an error then the +return value will be negative. The error values are: NETSTRING_ERROR_TOO_LONG More than 999999999 bytes in a field NETSTRING_ERROR_NO_COLON No colon was found after the number @@ -39,27 +86,82 @@ values are: NETSTRING_ERROR_LEADING_ZERO Leading zeros are not allowed NETSTRING_ERROR_NO_LENGTH Length not given at start of netstring -If you're sending messages with more than 999999999 bytes -- about 2 -GB -- then you probably should not be doing so in the form of a single +Usage Example: + +```C +char *str, *base = buffer; +size_t len, size = bytes_read; + +while(netstring_read(&base, &size, &str, &len) == 0) { + do_something(str, len); +} +``` + +We can replace the comma with a null terminator when reading (zero copy): + +```C +while(netstring_read(&base, &size, &str, &len) == 0) { + str[len] = 0; + puts(str); + str[len] = ','; /* and optionally restore it */ +} +``` + +If you're sending messages with more than 999999999 bytes (about 2 +GB) then you probably should not be doing so in the form of a single netstring. This restriction is in place partially to protect from malicious or erroneous input, and partly to be compatible with D. J. Bernstein's reference implementation. -**Creating netstrings** +Message Framing on stream-based connections (sockets, pipes...) +--------------------------------------------------------------- -To create a netstring, there are a few ways to do it. You could do something really simple like this example from [the spec](http://cr.yp.to/proto/netstrings.txt): +On stream-based connections the messages can arrive coalesced or fragmented. - if (printf("%lu:", len) < 0) barf(); - if (fwrite(buf, 1, len, stdout) < len) barf(); - if (putchar(',') < 0) barf(); - -This code provides a convenience function for creating a new netstring: +Here is an example of reading those messages using netstring for message framing: +```C +char buffer[1024], *buffer_base, *str; +int bytes_read, buffer_used = 0, len; + +while(1) { + /* read data from socket */ + bytes_read = recv(sock, &buffer[buffer_used], sizeof(buffer) - buffer_used); + if (bytes_read < 0) break; if (bytes_read == 0) continue; + buffer_used += bytes_read; + + /* parse the strings from the read buffer */ + buffer_base = buffer; + while(netstring_read(&buffer_base, &buffer_used, &str, &len) == 0) { + do_something(str, len); + } + + /* if there are remaining bytes, move to the beginning of buffer */ + if (buffer_base > buffer && buffer_used > 0) + memmove(buffer, buffer_base, buffer_used); +} +``` +Note: this example is lacking error checking from netstring_read function and it does not allocate memory for bigger messages. + +Additional Functions +-------------------- + +### netstring_list_size + +Retrieves the size of the netstring list (concatenated netstrings) discarding trailing spaces. + +```C +int netstring_list_size(char *buffer, size_t size, size_t *ptotal); +``` + +### netstring_list_count - size_t netstring_encode_new(char **netstring, char *data, size_t len); +Retrieves the number of items in a netstring list. -This allocates and creates a netstring containing the first `len` bytes of `data`. This must be manually freed by the client. If `len` is 0 then no data will be read from `data`, and it may be null. +```C +int netstring_list_count(char *buffer, size_t size, int *pcount); +``` Contributing ------------ -All this code is Public Domain. If you want to contribute, you can send bug reports, or fork the project on GitHub. Contributions are welcomed with open arms. \ No newline at end of file +All this code is Public Domain. If you want to contribute, you can send bug reports, or fork the project on GitHub. Contributions are welcomed with open arms. diff --git a/makefile b/makefile index 180c501..f71d417 100644 --- a/makefile +++ b/makefile @@ -1,5 +1,4 @@ CFLAGS = -O2 -Wall -LDFLAGS= -lm .PHONY: test clean diff --git a/netstring.c b/netstring.c index b8e48f0..fc7892c 100644 --- a/netstring.c +++ b/netstring.c @@ -4,7 +4,6 @@ #include #include #include -#include #include "netstring.h" /* Reads a netstring from a `buffer` of length `buffer_length`. Writes @@ -28,12 +27,14 @@ D. J. Bernstein's reference implementation. Example: - if (netstring_read("3:foo,", 6, &str, &len) < 0) explode_and_die(); + if (netstring_read(&buf, &buflen, &str, &len) < 0) failed(); */ -int netstring_read(char *buffer, size_t buffer_length, - char **netstring_start, size_t *netstring_length) { +int netstring_read(char **pbuffer, size_t *pbuffer_length, + char **netstring_start, size_t *netstring_length) { int i; size_t len = 0; + char *buffer = *pbuffer; + size_t buffer_length = *pbuffer_length; /* Write default values for outputs */ *netstring_start = NULL; *netstring_length = 0; @@ -64,41 +65,100 @@ int netstring_read(char *buffer, size_t buffer_length, /* Read the colon */ if (buffer[i++] != ':') return NETSTRING_ERROR_NO_COLON; - /* Test for the trailing comma, and set the return values */ + /* Test for the trailing comma */ if (buffer[i + len] != ',') return NETSTRING_ERROR_NO_COMMA; - *netstring_start = &buffer[i]; *netstring_length = len; + + /* Set the return values */ + *netstring_start = &buffer[i]; + *netstring_length = len; + *pbuffer = *netstring_start + len + 1; + *pbuffer_length = buffer_length - (i + len + 1); return 0; } +/* Retrieves the size of the concatenated netstrings */ +int netstring_list_size(char *buffer, size_t size, size_t *ptotal) { + char *str, *base = buffer; + size_t len, remaining = size; + int rc; + + while( remaining>0 && (rc=netstring_read(&base, &remaining, &str, &len))==0 ){ + } + + if( rc==NETSTRING_ERROR_NO_LENGTH || rc==NETSTRING_ERROR_TOO_SHORT ) rc = 0; + *ptotal = size - remaining; + return rc; +} + +/* Retrieves the number of concatenated netstrings */ +int netstring_list_count(char *buffer, size_t size, int *pcount) { + char *str, *base = buffer; + size_t len, remaining = size; + int rc, count = 0; + + while( remaining>0 && (rc=netstring_read(&base, &remaining, &str, &len))==0 ){ + count++; + } + + if( rc==NETSTRING_ERROR_NO_LENGTH || rc==NETSTRING_ERROR_TOO_SHORT ) rc = 0; + *pcount = count; + return rc; +} + +/* count the number of digits (base 10) in a positive integer */ +int numdigits(size_t len) { + int n = 1; + if ( len >= 100000000 ) { n += 8; len /= 100000000; } + if ( len >= 10000 ) { n += 4; len /= 10000; } + if ( len >= 100 ) { n += 2; len /= 100; } + if ( len >= 10 ) { n += 1; } + return n; +} + /* Return the length, in ASCII characters, of a netstring containing `data_length` bytes. */ size_t netstring_buffer_size(size_t data_length) { - if (data_length == 0) return 3; - return (size_t)ceil(log10((double)data_length + 1)) + data_length + 2; + return (size_t)numdigits(data_length) + data_length + 2; } /* Allocate and create a netstring containing the first `len` bytes of `data`. This must be manually freed by the client. If `len` is 0 - then no data will be read from `data`, and it may be NULL. */ -size_t netstring_encode_new(char **netstring, char *data, size_t len) { - char *ns; - size_t num_len = 1; + then no data will be read from `data`, and it may be NULL. + Returns the netstring size not including the null terminator */ +size_t netstring_add_ex(char **netstring, char *data, size_t len) { + size_t num_len, size_prev=0, size_next; + char *ptr; + + if (netstring == 0 || (len > 0 && data == 0)) return 0; + + num_len = numdigits(len); + size_next = num_len + len + 2; + + if (*netstring == 0) { + ptr = malloc(size_next + 1); + if (ptr == 0) return 0; + *netstring = ptr; + } else { + size_prev = strlen(*netstring); + ptr = realloc(*netstring, size_prev + size_next + 1); + if (ptr == 0) return 0; + *netstring = ptr; + ptr += size_prev; + } if (len == 0) { - ns = malloc(3); - ns[0] = '0'; - ns[1] = ':'; - ns[2] = ','; + strcpy(ptr, "0:,"); } else { - num_len = (size_t)ceil(log10((double)len + 1)); - ns = malloc(num_len + len + 2); - sprintf(ns, "%lu:", (unsigned long)len); - memcpy(ns + num_len + 1, data, len); - ns[num_len + len + 1] = ','; + sprintf(ptr, "%lu:", (unsigned long)len); + ptr += num_len + 1; + memcpy(ptr, data, len); + ptr += len; *ptr = ','; + ptr++; *ptr = 0; } + return size_prev + size_next; +} - *netstring = ns; - return num_len + len + 2; +size_t netstring_add(char **netstring, char *data) { + return netstring_add_ex(netstring, data, strlen(data)); } - diff --git a/netstring.h b/netstring.h index cb5564a..f31343d 100644 --- a/netstring.h +++ b/netstring.h @@ -3,12 +3,16 @@ #include -int netstring_read(char *buffer, size_t buffer_length, - char **netstring_start, size_t *netstring_length); +size_t netstring_add(char **netstring, char *data); +size_t netstring_add_ex(char **netstring, char *data, size_t len); + +int netstring_read(char **buffer_start, size_t *buffer_length, + char **netstring_start, size_t *netstring_length); size_t netstring_buffer_size(size_t data_length); -size_t netstring_encode_new(char **netstring, char *data, size_t len); +int netstring_list_size(char *buffer, size_t size, size_t *ptotal); +int netstring_list_count(char *buffer, size_t size, int *pcount); /* Errors that can occur during netstring parsing */ #define NETSTRING_ERROR_TOO_LONG -1 diff --git a/testsuite.c b/testsuite.c index b377e86..083bfbd 100644 --- a/testsuite.c +++ b/testsuite.c @@ -9,6 +9,7 @@ /* Good examples */ char ex1[] = "12:hello world!,"; char ex2[] = "3:foo,0:,3:bar,"; +char exb[] = "3:foo,0:,3:bar, "; /* Bad examples */ char ex3[] = "12:hello world! "; /* No comma */ @@ -21,66 +22,99 @@ char ex9[] = ":what's up"; /* No number */ void test_netstring_read(void) { - char *netstring; + char *base; + size_t base_len; + char *netstring; size_t netstring_len; int retval; /* ex1: hello world */ - retval = netstring_read(ex1, strlen(ex1), &netstring, &netstring_len); + base = ex1; base_len = strlen(ex1); + retval = netstring_read(&base, &base_len, &netstring, &netstring_len); assert(netstring_len == 12); assert(strncmp(netstring, "hello world!", 12) == 0); + assert(base == ex1 + strlen(ex1)); assert(base_len == 0); assert(retval == 0); /* ex2: three netstrings, concatenated. */ - retval = netstring_read(ex2, strlen(ex2), &netstring, &netstring_len); + base = ex2; base_len = strlen(ex2); + + retval = netstring_read(&base, &base_len, &netstring, &netstring_len); assert(netstring_len == 3); assert(strncmp(netstring, "foo", 3) == 0); assert(retval == 0); - retval = netstring_read(netstring+netstring_len+1, 9, &netstring, &netstring_len); + retval = netstring_read(&base, &base_len, &netstring, &netstring_len); assert(netstring_len == 0); assert(retval == 0); - retval = netstring_read(netstring+netstring_len+1, 6, &netstring, &netstring_len); + retval = netstring_read(&base, &base_len, &netstring, &netstring_len); assert(netstring_len == 3); assert(strncmp(netstring, "bar", 3) == 0); - assert(retval == 0); + assert(retval == 0); assert(base_len == 0); /* ex3: no comma */ - retval = netstring_read(ex3, strlen(ex3), &netstring, &netstring_len); + base = ex3; base_len = strlen(ex3); + retval = netstring_read(&base, &base_len, &netstring, &netstring_len); assert(netstring_len == 0); assert(netstring == NULL); assert(retval == NETSTRING_ERROR_NO_COMMA); /* ex4: too short */ - retval = netstring_read(ex4, strlen(ex4), &netstring, &netstring_len); + base = ex4; base_len = strlen(ex4); + retval = netstring_read(&base, &base_len, &netstring, &netstring_len); assert(netstring_len == 0); assert(netstring == NULL); assert(retval == NETSTRING_ERROR_TOO_SHORT); /* ex5: leading zero */ - retval = netstring_read(ex5, strlen(ex5), &netstring, &netstring_len); + base = ex5; base_len = strlen(ex5); + retval = netstring_read(&base, &base_len, &netstring, &netstring_len); assert(netstring_len == 0); assert(netstring == NULL); assert(retval == NETSTRING_ERROR_LEADING_ZERO); /* ex6: too long */ - retval = netstring_read(ex6, strlen(ex6), &netstring, &netstring_len); + base = ex6; base_len = strlen(ex6); + retval = netstring_read(&base, &base_len, &netstring, &netstring_len); assert(netstring_len == 0); assert(netstring == NULL); assert(retval == NETSTRING_ERROR_TOO_LONG); /* ex7: no colon */ - retval = netstring_read(ex7, strlen(ex7), &netstring, &netstring_len); + base = ex7; base_len = strlen(ex7); + retval = netstring_read(&base, &base_len, &netstring, &netstring_len); assert(netstring_len == 0); assert(netstring == NULL); assert(retval == NETSTRING_ERROR_NO_COLON); /* ex8: no number or colon */ - retval = netstring_read(ex8, strlen(ex8), &netstring, &netstring_len); + base = ex8; base_len = strlen(ex8); + retval = netstring_read(&base, &base_len, &netstring, &netstring_len); assert(netstring_len == 0); assert(netstring == NULL); assert(retval == NETSTRING_ERROR_NO_LENGTH); /* ex9: no number */ - retval = netstring_read(ex9, strlen(ex9), &netstring, &netstring_len); + base = ex9; base_len = strlen(ex9); + retval = netstring_read(&base, &base_len, &netstring, &netstring_len); assert(netstring_len == 0); assert(netstring == NULL); assert(retval == NETSTRING_ERROR_NO_LENGTH); } +void test_netstring_list_size(void) { + size_t size=0; + assert(netstring_list_size(ex1, strlen(ex1), &size) == 0); + assert(size == strlen(ex1)); + assert(netstring_list_size(ex2, strlen(ex2), &size) == 0); + assert(size == strlen(ex2)); + assert(netstring_list_size(exb, strlen(exb), &size) == 0); + assert(size == strlen(ex2)); +} + +void test_netstring_list_count(void) { + int count=0; + assert(netstring_list_count(ex1, strlen(ex1), &count) == 0); + assert(count == 1); + assert(netstring_list_count(ex2, strlen(ex2), &count) == 0); + assert(count == 3); + assert(netstring_list_count(exb, strlen(exb), &count) == 0); + assert(count == 3); +} + void test_netstring_buffer_size(void) { assert(netstring_buffer_size(0) == 3); assert(netstring_buffer_size(1) == 4); @@ -90,27 +124,46 @@ void test_netstring_buffer_size(void) { assert(netstring_buffer_size(12345) == 12345 + 5 + 2); } -void test_netstring_encode_new(void) { - char *ns; size_t bytes; +void test_netstring_add_ex(void) { + char *ns=0; size_t bytes; - bytes = netstring_encode_new(&ns, "foo", 3); + bytes = netstring_add_ex(&ns, "foo", 3); assert(ns != NULL); assert(strncmp(ns, "3:foo,", 6) == 0); assert(bytes == 6); - free(ns); + free(ns); ns = 0; - bytes = netstring_encode_new(&ns, NULL, 0); + bytes = netstring_add_ex(&ns, NULL, 0); assert(ns != NULL); assert(strncmp(ns, "0:,", 3) == 0); assert(bytes == 3); - free(ns); + free(ns); ns = 0; - bytes = netstring_encode_new(&ns, "hello world!", 12); assert(bytes == 16); + bytes = netstring_add_ex(&ns, "hello world!", 12); assert(bytes == 16); assert(ns != NULL); assert(strncmp(ns, "12:hello world!,", 16) == 0); - free(ns); -} + free(ns); ns = 0; + + bytes = netstring_add_ex(&ns, "hello world!", 5); assert(bytes == 8); + assert(ns != NULL); assert(strncmp(ns, "5:hello,", 8) == 0); + free(ns); ns = 0; +} + +void test_netstring_add(void) { + char *list=0; + + netstring_add(&list, "first"); + netstring_add(&list, "second"); + netstring_add(&list, "third"); + netstring_add(&list, ""); + assert(strcmp(list, "5:first,6:second,5:third,0:,") == 0); + free(list); + +} int main(void) { printf("Running test suite...\n"); test_netstring_read(); + test_netstring_list_size(); + test_netstring_list_count(); test_netstring_buffer_size(); - test_netstring_encode_new(); + test_netstring_add_ex(); + test_netstring_add(); printf("All tests passed!\n"); return 0; }