diff --git a/Makefile b/Makefile index fc1f5a4..13d6604 100644 --- a/Makefile +++ b/Makefile @@ -34,7 +34,7 @@ dut_donnabad: $(OBJS_DONNABAD) examples/donnabad/dut_donnabad.c dut_simple: examples/simple/example.c # higher compiler optimization levels can make this constant time - $(CC) $(LDFLAGS) -O0 $(INCS) -o dudect_simple_O0 examples/simple/example.c $(LIBS) + $(CC) $(LDFLAGS) -O0 $(INCS) -DMEASUREMENTS_PER_CHUNK=100000 -o dudect_simple_O0 examples/simple/example.c $(LIBS) $(CC) $(LDFLAGS) -O2 $(INCS) -DMEASUREMENTS_PER_CHUNK=100000 -o dudect_simple_O2 examples/simple/example.c $(LIBS) .c.o: diff --git a/README.md b/README.md index db6197d..67bbe95 100644 --- a/README.md +++ b/README.md @@ -195,6 +195,7 @@ The following people have contributed to `dudect` through code, bug reports, iss * RashidAlsuwaidi * paul90317 * Fabian Albert (https://github.com/FAlbertDev) +* Anjan Roy (https://github.com/itzmeanjan) The approach is described in this paper > Oscar Reparaz, Josep Balasch and Ingrid Verbauwhede diff --git a/examples/simple/example.c b/examples/simple/example.c index 0728143..3b89384 100644 --- a/examples/simple/example.c +++ b/examples/simple/example.c @@ -10,7 +10,7 @@ int check_tag(uint8_t *x, uint8_t *y, size_t len) { return memcmp(x, y, len); } -#define SECRET_LEN_BYTES (16) +#define SECRET_LEN_BYTES (512) uint8_t secret[SECRET_LEN_BYTES] = {0, 1, 2, 3, 4, 5, 6, 42}; diff --git a/src/dudect.h b/src/dudect.h index b08bb57..ee40912 100644 --- a/src/dudect.h +++ b/src/dudect.h @@ -71,6 +71,8 @@ extern "C" { #include #include +#include +#include #ifdef DUDECT_VISIBLITY_STATIC #define DUDECT_VISIBILITY static @@ -202,7 +204,6 @@ static int cmp(const int64_t *a, const int64_t *b) { return (int)(*a - *b); } static int64_t percentile(int64_t *a_sorted, double which, size_t size) { size_t array_position = (size_t)((double)size * (double)which); - assert(array_position >= 0); assert(array_position < size); return a_sorted[array_position]; } @@ -262,16 +263,20 @@ uint8_t randombit(void) { } /* - Intel actually recommends calling CPUID to serialize the execution flow - and reduce variance in measurement due to out-of-order execution. - We don't do that here yet. - see ยง3.2.1 http://www.intel.com/content/www/us/en/embedded/training/ia-32-ia-64-benchmark-code-execution-paper.html -*/ -static int64_t cpucycles(void) { - unsigned int hi, lo; + Returns current CPU tick count from *T*ime *S*tamp *C*ounter. + + To enforce CPU to issue RDTSC instruction where we want it to, we put a `mfence` instruction before + issuing `rdtsc`, which should make all memory load/ store operations, prior to RDTSC, globally visible. - __asm__ volatile("rdtsc\n\t" : "=a"(lo), "=d"(hi)); - return ((int64_t)lo) | (((int64_t)hi) << 32); + See https://github.com/oreparaz/dudect/issues/32 + See RDTSC documentation @ https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.htm#text=rdtsc&ig_expand=4395,5273 + See MFENCE documentation @ https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.htm#text=mfence&ig_expand=4395,5273,4395 + + Also see https://stackoverflow.com/a/12634857 +*/ +static inline int64_t cpucycles(void) { + _mm_mfence(); + return (int64_t)__rdtsc(); } // threshold values for Welch's t-test