From c234cc9cbe9383e2dfa9ca5386a4a545818eee1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toni=20R=C3=B6nkk=C3=B6?= Date: Tue, 13 Oct 2020 22:26:08 +0300 Subject: [PATCH] Added strverscmp() --- .gitignore | 6 + CMakeLists.txt | 1 + include/dirent.h | 68 ++++++- tests/3/{sane-1.2.3.dat => sane-1.2.30.dat} | 0 tests/t-scandir.c | 38 +++- tests/t-strverscmp.c | 186 ++++++++++++++++++++ 6 files changed, 294 insertions(+), 5 deletions(-) rename tests/3/{sane-1.2.3.dat => sane-1.2.30.dat} (100%) create mode 100644 tests/t-strverscmp.c diff --git a/.gitignore b/.gitignore index 2c54d6b..a14ce79 100644 --- a/.gitignore +++ b/.gitignore @@ -9,8 +9,14 @@ /find /locate /ls +/scandir +/cat /t-compile /t-dirent +/t-scandir +/t-cplusplus +/t-unicode +/t-strverscmp /updatedb /*.filters /*.vcxproj diff --git a/CMakeLists.txt b/CMakeLists.txt index 0a6168e..537cc4a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -56,4 +56,5 @@ if(DIRENT_BUILD_TESTS) add_test_executable (t-scandir tests/t-scandir.c) add_test_executable (t-unicode tests/t-unicode.c) add_test_executable (t-cplusplus tests/t-cplusplus.cpp) + add_test_executable (t-strverscmp tests/t-strverscmp.c) endif(DIRENT_BUILD_TESTS) diff --git a/include/dirent.h b/include/dirent.h index 1b14e16..d95d3a0 100644 --- a/include/dirent.h +++ b/include/dirent.h @@ -313,6 +313,7 @@ static int alphasort (const struct dirent **a, const struct dirent **b); static int versionsort (const struct dirent **a, const struct dirent **b); +static int strverscmp (const char *a, const char *b); /* For compatibility with Symbian */ #define wdirent _wdirent @@ -1033,8 +1034,71 @@ static int versionsort( const struct dirent **a, const struct dirent **b) { - /* FIXME: implement strverscmp and use that */ - return alphasort (a, b); + return strverscmp ((*a)->d_name, (*b)->d_name); +} + +/* Compare strings */ +static int +strverscmp( + const char *a, const char *b) +{ + unsigned int i = 0; + unsigned int j; + + /* Find first difference */ + while (a[i] != '\0' && a[i] == b[i]) { + ++i; + } + if (a[i] == b[i]) { + /* No difference found */ + return 0; + } + + /* Count backwards and find the leftmost digit of the differing number */ + j = i; + while (j > 0 && '0' <= a[j-1] && a[j-1] <= '9') { + --j; + } + + /* Determine mode of comparison */ + if (a[j] == '0' || b[j] == '0') { + /* Zero mode */ + unsigned int k = j; + + /* Find the next non-zero digit */ + while (a[k] == '0' && a[k] == b[k]) { + k++; + } + + /* String with greater number of digits is smaller, e.g 002 < 01 */ + if (isdigit(a[k])) { + if (!isdigit(b[k])) { + return -1; + } + } else if (isdigit(b[k])) { + return 1; + } + } else if (isdigit(a[j]) && isdigit(b[j])) { + /* Numeric comparison */ + unsigned int k = j; + + /* Find the next non-digit */ + while (isdigit(a[k]) && isdigit(b[k])) { + k++; + } + + /* Number with greater number of digits is bigger */ + if (isdigit(a[k])) { + if (!isdigit(b[k])) { + return 1; + } + } else if (isdigit(b[k])) { + return -1; + } + } + + /* Alphabetical comparison */ + return (int) ((unsigned char) a[i]) - ((unsigned char) b[i]); } /* Convert multi-byte string to wide character string */ diff --git a/tests/3/sane-1.2.3.dat b/tests/3/sane-1.2.30.dat similarity index 100% rename from tests/3/sane-1.2.3.dat rename to tests/3/sane-1.2.30.dat diff --git a/tests/t-scandir.c b/tests/t-scandir.c index 7a54c74..77f27f3 100644 --- a/tests/t-scandir.c +++ b/tests/t-scandir.c @@ -7,9 +7,12 @@ * https://github.com/tronkko/dirent */ -/* Silence warning about fopen being insecure */ +/* Silence warning about fopen being insecure (MS Visual Studio) */ #define _CRT_SECURE_NO_WARNINGS +/* Include prototype for versionsort (Linux) */ +#define _GNU_SOURCE + #include #include #include @@ -76,7 +79,7 @@ main( assert (strcmp (files[7]->d_name, "dirent.dat") == 0); assert (strcmp (files[8]->d_name, "empty.dat") == 0); assert (strcmp (files[9]->d_name, "sane-1.12.0.dat") == 0); - assert (strcmp (files[10]->d_name, "sane-1.2.3.dat") == 0); + assert (strcmp (files[10]->d_name, "sane-1.2.30.dat") == 0); assert (strcmp (files[11]->d_name, "sane-1.2.4.dat") == 0); assert (strcmp (files[12]->d_name, "zebra.dat") == 0); @@ -96,7 +99,7 @@ main( /* Make sure that we got all the FILE names in the REVERSE order */ assert (strcmp (files[0]->d_name, "zebra.dat") == 0); assert (strcmp (files[1]->d_name, "sane-1.2.4.dat") == 0); - assert (strcmp (files[2]->d_name, "sane-1.2.3.dat") == 0); + assert (strcmp (files[2]->d_name, "sane-1.2.30.dat") == 0); assert (strcmp (files[3]->d_name, "sane-1.12.0.dat") == 0); assert (strcmp (files[4]->d_name, "empty.dat") == 0); assert (strcmp (files[5]->d_name, "dirent.dat") == 0); @@ -131,6 +134,35 @@ main( assert (errno == ENOTDIR); } + /* Sort files using versionsort() */ + { + files = NULL; + n = scandir ("tests/3", &files, no_directories, versionsort); + assert (n == 11); + + /* + * Make sure that we got all the file names in the proper order: + * 1.2.4 < 1.2.30 < 1.12.0 + */ + assert (strcmp (files[0]->d_name, "3zero.dat") == 0); + assert (strcmp (files[1]->d_name, "666.dat") == 0); + assert (strcmp (files[2]->d_name, "Qwerty-my-aunt.dat") == 0); + assert (strcmp (files[3]->d_name, "README.txt") == 0); + assert (strcmp (files[4]->d_name, "aaa.dat") == 0); + assert (strcmp (files[5]->d_name, "dirent.dat") == 0); + assert (strcmp (files[6]->d_name, "empty.dat") == 0); + assert (strcmp (files[7]->d_name, "sane-1.2.4.dat") == 0); + assert (strcmp (files[8]->d_name, "sane-1.2.30.dat") == 0); + assert (strcmp (files[9]->d_name, "sane-1.12.0.dat") == 0); + assert (strcmp (files[10]->d_name, "zebra.dat") == 0); + + /* Release file names */ + for (i = 0; i < n; i++) { + free (files[i]); + } + free (files); + } + /* Scan large directory */ { char dirname[PATH_MAX+1]; diff --git a/tests/t-strverscmp.c b/tests/t-strverscmp.c new file mode 100644 index 0000000..0683a07 --- /dev/null +++ b/tests/t-strverscmp.c @@ -0,0 +1,186 @@ +/* + * Test program to make sure that strverscmp works correctly + * + * Copyright (C) 1998-2019 Toni Ronkko + * This file is part of dirent. Dirent may be freely distributed + * under the MIT license. For all details and documentation, see + * https://github.com/tronkko/dirent + */ + +/* Include prototype for strverscmp */ +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include + +int +main( + int argc, char *argv[]) +{ + (void) argc; + (void) argv; + + /* Strings without digits are compared as in strcmp() */ + assert (strverscmp ("", "") == 0); + assert (strverscmp ("abc", "abc") == 0); + assert (strverscmp ("a", "b") < 0); + assert (strverscmp ("b", "a") > 0); + + /* Shorter string is smaller, other things being equal */ + assert (strverscmp ("a", "aa") < 0); + assert (strverscmp ("aa", "a") > 0); + assert (strverscmp ("abcdef", "abcdefg") < 0); + assert (strverscmp ("abcdefg", "abcdef") > 0); + + /* Integers with equal length are compared as in strcmp() */ + assert (strverscmp ("0", "0") == 0); + assert (strverscmp ("000", "000") == 0); + assert (strverscmp ("1", "2") < 0); + assert (strverscmp ("2", "1") > 0); + assert (strverscmp ("001", "100") < 0); + assert (strverscmp ("100", "001") > 0); + assert (strverscmp ("2020-07-01", "2020-07-02") < 0); + assert (strverscmp ("2020-07-02", "2020-07-01") > 0); + assert (strverscmp ("jan999", "jan999") == 0); + + /* Integers of different length are compared as numbers */ + assert (strverscmp ("jan9", "jan10") < 0); + assert (strverscmp ("jan10", "jan9") > 0); + assert (strverscmp ("999", "1000") < 0); + assert (strverscmp ("1000", "999") > 0); + assert (strverscmp ("t12-1000", "t12-9999") < 0); + assert (strverscmp ("t12-9999", "t12-1000") > 0); + + /* If numbers starts with zero, then longer number is smaller */ + assert (strverscmp ("00", "0") < 0); + assert (strverscmp ("0", "00") > 0); + assert (strverscmp ("a000", "a00") < 0); + assert (strverscmp ("a00", "a000") > 0); + assert (strverscmp ("0000", "000") < 0); + assert (strverscmp ("000", "0000") > 0); + assert (strverscmp ("1.01", "1.0") < 0); + assert (strverscmp ("1.0", "1.01") > 0); + + /* Number having more leading zeros is considered smaller */ + assert (strverscmp ("item-0001", "item-001") < 0); + assert (strverscmp ("item-001", "item-0001") > 0); + assert (strverscmp ("item-001", "item-01") < 0); + assert (strverscmp ("item-01", "item-001") > 0); + assert (strverscmp (".0001000", ".001") < 0); + assert (strverscmp (".001", ".0001000") > 0); + assert (strverscmp (".0001000", ".01") < 0); + assert (strverscmp (".01", ".0001000") > 0); + assert (strverscmp (".0001000", ".1") < 0); + assert (strverscmp (".1", ".0001000") > 0); + assert (strverscmp ("1.0002", "1.0010000") < 0); + assert (strverscmp ("1.0010000", "1.0002") > 0); + + /* Number starting with zero is smaller than any number */ + assert (strverscmp ("item-009", "item-1") < 0); + assert (strverscmp ("item-1", "item-009") > 0); + assert (strverscmp ("item-099", "item-2") < 0); + assert (strverscmp ("item-2", "item-099") > 0); + + /* Number vs alphabetical comparison */ + assert (strverscmp ("1.001", "1.00!") < 0); + assert (strverscmp ("1.00!", "1.001") > 0); + assert (strverscmp ("1.001", "1.00x") < 0); + assert (strverscmp ("1.00x", "1.001") > 0); + assert (strverscmp ("1", "x") < 0); + assert (strverscmp ("x", "1") > 0); + assert (strverscmp ("1", "!") > 0); + assert (strverscmp ("!", "1") < 0); + + /* Handling the end of string */ + assert (strverscmp ("01", "011") < 0); + assert (strverscmp ("011", "01") > 0); + assert (strverscmp ("0100", "01000") < 0); + assert (strverscmp ("01000", "0100") > 0); + assert (strverscmp ("1", "1!") < 0); + assert (strverscmp ("1!", "1") > 0); + assert (strverscmp ("1", "1z") < 0); + assert (strverscmp ("1z", "1") > 0); + + /* Ordering 000 < 00 < 01 < 010 < 09 < 0 < 1 < 9 < 10 */ + assert (strverscmp ("000", "00") < 0); + assert (strverscmp ("000", "01") < 0); + assert (strverscmp ("000", "010") < 0); + assert (strverscmp ("000", "09") < 0); + assert (strverscmp ("000", "0") < 0); + assert (strverscmp ("000", "1") < 0); + assert (strverscmp ("000", "9") < 0); + assert (strverscmp ("000", "10") < 0); + + assert (strverscmp ("00", "01") < 0); + assert (strverscmp ("00", "010") < 0); + assert (strverscmp ("00", "09") < 0); + assert (strverscmp ("00", "0") < 0); + assert (strverscmp ("00", "1") < 0); + assert (strverscmp ("00", "9") < 0); + assert (strverscmp ("00", "10") < 0); + + assert (strverscmp ("01", "010") < 0); + assert (strverscmp ("01", "09") < 0); + assert (strverscmp ("01", "0") < 0); + assert (strverscmp ("01", "1") < 0); + assert (strverscmp ("01", "9") < 0); + assert (strverscmp ("01", "10") < 0); + + assert (strverscmp ("010", "09") < 0); + assert (strverscmp ("010", "0") < 0); + assert (strverscmp ("010", "1") < 0); + assert (strverscmp ("010", "9") < 0); + assert (strverscmp ("010", "10") < 0); + + assert (strverscmp ("09", "0") < 0); + assert (strverscmp ("09", "1") < 0); + assert (strverscmp ("09", "9") < 0); + assert (strverscmp ("09", "10") < 0); + + assert (strverscmp ("0", "1") < 0); + assert (strverscmp ("0", "9") < 0); + assert (strverscmp ("0", "10") < 0); + + assert (strverscmp ("1", "9") < 0); + assert (strverscmp ("1", "10") < 0); + + assert (strverscmp ("9", "10") < 0); + + /* Compare speed */ + { +#define LENGTH 100 +#define REPEAT 1000000 + char a[LENGTH+1]; + char b[LENGTH+1]; + unsigned int i; + unsigned int j; + char letters[] = "01234567890123456789abdefghjkpqrtwxyz-/."; + unsigned int n = strlen(letters); + + /* Repeat test */ + for (i = 0; i < REPEAT; i++) { + /* + * Generate random string of LENGTH characters. Be ware that + * the string has to be created inside the loop or otherwise the + * compiler can reduce the loop into single call! + */ + for (j = 0; j < LENGTH; j++) { + char c = letters[rand() % n]; + a[j] = c; + b[j] = c; + } + a[j] = '\0'; + b[j] = '\0'; + + /* Both strings must be equal */ + assert (strverscmp (a, b) == 0); + } + } + + printf ("OK\n"); + return EXIT_SUCCESS; +}