From bf5f3193a78821283dfe90b45c5681d05a4319eb Mon Sep 17 00:00:00 2001 From: Swann Perarnau Date: Wed, 7 Nov 2018 13:52:34 -0600 Subject: [PATCH 01/47] [feature] Add layout building block A layout is a more fundamental building block that was hidden inside tilings so far. This building block only describes the layout of a data structure, without any information about how to split it. See #21 for more explanations. --- src/Makefile.am | 5 +++-- src/aml-layout.h | 54 +++++++++++++++++++++++++++++++++++++++++++++++ src/aml.h | 3 ++- src/layout.c | 42 ++++++++++++++++++++++++++++++++++++ tests/Makefile.am | 3 ++- tests/layout.c | 34 +++++++++++++++++++++++++++++ 6 files changed, 137 insertions(+), 4 deletions(-) create mode 100644 src/aml-layout.h create mode 100644 src/layout.c create mode 100644 tests/layout.c diff --git a/src/Makefile.am b/src/Makefile.am index f8fca5cf..9eacb281 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -36,9 +36,10 @@ LIBCSOURCES = aml.c area.c arena.c \ $(TILING_CSOURCES) \ $(BINDING_CSOURCES) \ $(DMA_CSOURCES) \ - $(SCRATCH_CSOURCES) + $(SCRATCH_CSOURCES) \ + layout.c -LIBHSOURCES = aml.h +LIBHSOURCES = aml.h aml-layout.h libaml_la_SOURCES = $(LIBCSOURCES) $(LIBHSOURCES) libaml_la_LIBADD = -L$(top_srcdir)/jemalloc/lib/ -ljemalloc-aml diff --git a/src/aml-layout.h b/src/aml-layout.h new file mode 100644 index 00000000..4681329c --- /dev/null +++ b/src/aml-layout.h @@ -0,0 +1,54 @@ +#ifndef AML_LAYOUT_H +#define AML_LAYOUT_H 1 + +/******************************************************************************* + * Data Layout Management: + ******************************************************************************/ + +struct aml_layout; + +/******************************************************************************* + * Generic layout, with support for sparsity and strides. + ******************************************************************************/ + +/* Layout: describes how a multi-dimensional data structure is collapsed into a + * linear (and contiguous) virtual address range. + * "ptr": base pointer of the address range + * "ndims": number of dimensions + * "dims": dimensions, in element size, of the data structure, by order of + * appearance in memory. + * "pitch": cumulative distances between two elements in the same dimension + * (pitch[0] is the element size in bytes). + * "stride": offset between elements of the same dimension. + */ + +struct aml_layout { + void *ptr; + size_t ndims; + size_t *dims; + size_t *pitch; + size_t *stride; +}; + +#define AML_LAYOUT_ALLOCSIZE(ndims) (sizeof(struct aml_layout) +\ + ndims * 3 * sizeof(size_t)) + +#define AML_LAYOUT_DECL(name, ndims) \ + size_t __ ##name## _inner_data[ndims * 3]; \ + struct aml_layout name = { \ + NULL, \ + ndims, \ + __ ##name## _inner_data, \ + __ ##name## _inner_data + ndims, \ + __ ##name## _inner_data + 2 * ndims, \ + }; + +int aml_layout_struct_init(struct aml_layout *l, size_t ndims, void *data); +int aml_layout_init(struct aml_layout *l, void *ptr, size_t ndims, + const size_t *dims, const size_t *pitch, + const size_t *stride); +int aml_layout_create(struct aml_layout **l, void *ptr, size_t ndims, + const size_t *dims, const size_t *pitch, + const size_t *stride); + +#endif diff --git a/src/aml.h b/src/aml.h index 73f23992..492ef518 100644 --- a/src/aml.h +++ b/src/aml.h @@ -1,6 +1,7 @@ #ifndef AML_H #define AML_H 1 +#include #include #include #include @@ -18,7 +19,7 @@ #define PAGE_SIZE 4096 #endif - +#include "aml-layout.h" /******************************************************************************* * Forward Declarations: ******************************************************************************/ diff --git a/src/layout.c b/src/layout.c new file mode 100644 index 00000000..f9d870d3 --- /dev/null +++ b/src/layout.c @@ -0,0 +1,42 @@ +#include + +int aml_layout_struct_init(struct aml_layout *p, + size_t ndims, void *data_ptr) +{ + p->ndims = ndims; + p->dims = (size_t *)data_ptr; + p->pitch = p->dims + ndims; + p->stride = p->pitch + ndims; + return 0; +} + +int aml_layout_init(struct aml_layout *p, void *ptr, + size_t ndims, const size_t *dims, + const size_t *pitch, + const size_t *stride) +{ + assert(p->ndims == ndims); + assert(p->dims); + assert(p->pitch); + assert(p->stride); + p->ptr = ptr; + memcpy(p->dims, dims, ndims * sizeof(size_t)); + memcpy(p->pitch, pitch, ndims * sizeof(size_t)); + memcpy(p->stride, stride, ndims * sizeof(size_t)); + return 0; +} + +int aml_layout_create(struct aml_layout **p, void *ptr, + size_t ndims, const size_t *dims, + const size_t *pitch, + const size_t *stride) +{ + assert(ndims > 0); + void *baseptr = calloc(1, AML_LAYOUT_ALLOCSIZE(ndims)); + *p = (struct aml_layout *)baseptr; + baseptr = (void *)((uintptr_t)baseptr + + sizeof(struct aml_layout)); + aml_layout_struct_init(*p, ndims, baseptr); + aml_layout_init(*p, ptr, ndims, dims, pitch, stride); + return 0; +} diff --git a/tests/Makefile.am b/tests/Makefile.am index 7a053236..a864bea6 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -39,7 +39,8 @@ UNIT_TESTS = $(ARENA_JEMALLOC_TESTS) \ $(AREA_LINUX_TESTS) \ $(AREA_POSIX_TESTS) \ $(DMA_LINUX_TESTS) \ - $(SCRATCH_TESTS) + $(SCRATCH_TESTS) \ + layout # all tests TST_PROGS = $(UNIT_TESTS) diff --git a/tests/layout.c b/tests/layout.c new file mode 100644 index 00000000..d0a729f6 --- /dev/null +++ b/tests/layout.c @@ -0,0 +1,34 @@ +#include +#include + +int main(int argc, char *argv[]) +{ + struct aml_layout *a; + AML_LAYOUT_DECL(b, 5); + + /* padd the dims to the closest multiple of 2 */ + float memory[4][4][8][12][16]; + size_t dims[5] = {2,3,7,11,13}; + size_t pitch[5] = {4, 4*4, 4*4*4, 4*4*4*8, 4*4*4*8*12}; + size_t stride[5] = {1,1,1,1,1}; + + /* library initialization */ + aml_init(&argc, &argv); + + /* initialize the layouts */ + aml_layout_create(&a, (void *)memory, 5, dims, pitch, stride); + aml_layout_init(&b, (void *)memory, 5, dims, pitch, stride); + + /* some simple checks */ + assert(!memcmp(a->dims, dims, sizeof(size_t)*5)); + assert(!memcmp(a->pitch, pitch, sizeof(size_t)*5)); + assert(!memcmp(a->stride, stride, sizeof(size_t)*5)); + assert(!memcmp(b.dims, dims, sizeof(size_t)*5)); + assert(!memcmp(b.pitch, pitch, sizeof(size_t)*5)); + assert(!memcmp(b.stride, stride, sizeof(size_t)*5)); + + free(a); + + aml_finalize(); + return 0; +} From 6ee737bd4880a0a4d0615c7c275379d49b179bc6 Mon Sep 17 00:00:00 2001 From: Swann Perarnau Date: Wed, 7 Nov 2018 14:25:42 -0600 Subject: [PATCH 02/47] [feature] Add copy/transform utilities Layout-based copy and transform utils. These utilities will be merged with DMAs in the near future, to provide the ability of transforming layouts on the fly during data movement. --- src/Makefile.am | 4 +- src/aml-copy.h | 177 ++++++++ src/aml.h | 1 + src/copy.c | 533 ++++++++++++++++++++++++ tests/Makefile.am | 2 +- tests/copy.c | 1014 +++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 1728 insertions(+), 3 deletions(-) create mode 100644 src/aml-copy.h create mode 100644 src/copy.c create mode 100644 tests/copy.c diff --git a/src/Makefile.am b/src/Makefile.am index 9eacb281..4d143709 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -37,9 +37,9 @@ LIBCSOURCES = aml.c area.c arena.c \ $(BINDING_CSOURCES) \ $(DMA_CSOURCES) \ $(SCRATCH_CSOURCES) \ - layout.c + layout.c copy.c -LIBHSOURCES = aml.h aml-layout.h +LIBHSOURCES = aml.h aml-layout.h aml-copy.h libaml_la_SOURCES = $(LIBCSOURCES) $(LIBHSOURCES) libaml_la_LIBADD = -L$(top_srcdir)/jemalloc/lib/ -ljemalloc-aml diff --git a/src/aml-copy.h b/src/aml-copy.h new file mode 100644 index 00000000..ecf4c6ff --- /dev/null +++ b/src/aml-copy.h @@ -0,0 +1,177 @@ +#ifndef AML_COPY_H +#define AML_COPY_H 1 + + /******************************************************************************* + * Hypervolume copy and transpose functions. + ******************************************************************************/ + +/* + * Copies a (sub-)hypervolume to another (sub-)hypervolume. + * "d": number of dimensions. + * "dst": pointer to the destination hypervolume. + * "dst_pitch": pointer to d-1 pitch values representing the pitch + * in each dimension of the destination hypervolume. + * "src": pointer to the source hypervolume. + * "src_pitch": pointer to d-1 pitch values representing the pitch + * in each dimension of the source hypervolume. + * "elem_number": pointer to d values representing the number of elements + * in each dimension of the (sub-)hypervolume to copy. + * "elem_size": size of memory elements. + * Returns 0 if successful; an error code otherwise. + */ +int aml_copy_nd(size_t d, void *dst, const size_t *dst_pitch, + const void *src, const size_t *src_pitch, + const size_t *elem_number, const size_t elem_size); +/* + * Copies a (sub-)hypervolume to another (sub-)hypervolume while transposing. + * Reverse of aml_copy_rtnd. + * Example a[3][4][5] -> b[5][3][4] (C notation). + * "d": number of dimensions. + * "dst": pointer to the destination hypervolume. + * "dst_pitch": pointer to d-1 pitch values representing the pitch + * in each dimension of the destination hypervolume. + * "src": pointer to the source hypervolume. + * "src_pitch": pointer to d-1 pitch values representing the pitch + * in each dimension of the source hypervolume. + * "elem_number": pointer to d values representing the number of elements + * in each dimension of the (sub-)hypervolume to copy. + * "elem_size": size of memory elements in the src hypervolume order. + * Returns 0 if successful; an error code otherwise. + */ +int aml_copy_tnd(size_t d, void *dst, const size_t *dst_pitch, + const void *src, const size_t *src_pitch, + const size_t *elem_number, const size_t elem_size); +/* + * Copies a (sub-)hypervolume to another (sub-)hypervolume while transposing. + * Reverse of aml_copy_tnd. + * Example a[3][4][5] -> b[4][5][3] (C notation). + * "d": number of dimensions. + * "dst": pointer to the destination hypervolume. + * "dst_pitch": pointer to d-1 pitch values representing the pitch + * in each dimension of the destination hypervolume. + * "src": pointer to the source hypervolume. + * "src_pitch": pointer to d-1 pitch values representing the pitch + * in each dimension of the source hypervolume. + * "elem_number": pointer to d values representing the number of elements + * in each dimension of the (sub-)hypervolume to copy. + * "elem_size": size of memory elements in the src hypervolume order. + * Returns 0 if successful; an error code otherwise. + */ +int aml_copy_rtnd(size_t d, void *dst, const size_t *dst_pitch, + const void *src, const size_t *src_pitch, + const size_t *elem_number, const size_t elem_size); + +/* + * Copies a (sub-)hypervolume to another (sub-)hypervolume while shuffling + * dimensions. Example a[4][2][3][5] -> b[5][4][3][2] (C notation). + * "d": number of dimensions. + * "target_dims": array of d dimension index representing the mapping + * between the source dimensions and the target dimensions. + * Example [3, 1, 0, 2] + * "dst": pointer to the destination hypervolume. + * "dst_pitch": pointer to d-1 pitch values representing the pitch + * in each dimension of the destination hypervolume. + * "src": pointer to the source hypervolume. + * "src_pitch": pointer to d-1 pitch values representing the pitch + * in each dimension of the source hypervolume. + * "elem_number": pointer to d values representing the number of elements + * in each dimension of the (sub-)hypervolume to copy. + * "elem_size": size of memory elements in the src hypervolume order. + * Returns 0 if successful; an error code otherwise. + */ +int aml_copy_shnd(size_t d, const size_t *target_dims, void *dst, + const size_t *dst_pitch, const void *src, + const size_t *src_pitch, const size_t *elem_number, + const size_t elem_size); +/* + * Strided version of aml_copy_nd. + */ +int aml_copy_ndstr(size_t d, void *dst, const size_t *dst_pitch, + const size_t *dst_stride, const void *src, + const size_t *src_pitch, const size_t *src_stride, + const size_t *elem_number, const size_t elem_size); +/* + * Strided version of aml_copy_tnd. + */ +int aml_copy_tndstr(size_t d, void *dst, const size_t *dst_pitch, + const size_t *dst_stride, const void *src, + const size_t *src_pitch, const size_t *src_stride, + const size_t *elem_number, const size_t elem_size); +/* + * Strided version of aml_copy_rtnd. + */ +int aml_copy_rtndstr(size_t d, void *dst, const size_t *dst_pitch, + const size_t *dst_stride, const void *src, + const size_t *src_pitch, const size_t *src_stride, + const size_t *elem_number, const size_t elem_size); +/* + * Strided version of aml_copy_shnd. + */ +int aml_copy_shndstr(size_t d, const size_t *target_dims, void *dst, + const size_t *dst_pitch, const size_t *dst_stride, + const void *src, const size_t *src_pitch, + const size_t *src_stride, const size_t *elem_number, + const size_t elem_size); +/* + * Version of aml_copy_nd using cumulative pitch. + */ +int aml_copy_nd_c(size_t d, void *dst, const size_t *cumul_dst_pitch, + const void *src, const size_t *cumul_src_pitch, + const size_t *elem_number, const size_t elem_size); +/* + * Version of aml_copy_ndstr using cumulative pitch. + */ +int aml_copy_ndstr_c(size_t d, void *dst, const size_t *dst_pitch, + const size_t *cumul_dst_stride, const void *src, + const size_t *src_pitch, const size_t *cumul_src_stride, + const size_t *elem_number, const size_t elem_size); +/* + * Version of aml_copy_nd using cumulative pitch. + */ +int aml_copy_tnd_c(size_t d, void *dst, const size_t *cumul_dst_pitch, + const void *src, const size_t *cumul_src_pitch, + const size_t *elem_number, const size_t elem_size); +/* + * Version of aml_copy_nd using cumulative pitch. + */ +int aml_copy_rtnd_c(size_t d, void *dst, const size_t *cumul_dst_pitch, + const void *src, const size_t *cumul_src_pitch, + const size_t *elem_number, const size_t elem_size); +/* + * Version of aml_copy_shnd using cumulative pitch. + */ +int aml_copy_shnd_c(size_t d, const size_t *target_dims, void *dst, + const size_t *cumul_dst_pitch, const void *src, + const size_t *cumul_src_pitch, const size_t *elem_number, + const size_t elem_size); +/* + * Version of aml_copy_tndstr using cumulative pitch. + */ +int aml_copy_tndstr_c(size_t d, void *dst, const size_t *cumul_dst_pitch, + const size_t *dst_stride, const void *src, + const size_t *cumul_src_pitch, const size_t *src_stride, + const size_t *elem_number, const size_t elem_size); +/* + * Version of aml_copy_rtndstr using cumulative pitch. + */ +int aml_copy_rtndstr_c(size_t d, void *dst, const size_t *cumul_dst_pitch, + const size_t *dst_stride, const void *src, + const size_t *cumul_src_pitch, const size_t *src_stride, + const size_t *elem_number, const size_t elem_size); +/* + * Version of aml_copy_shndstr using cumulative pitch. + */ +int aml_copy_shndstr_c(size_t d, const size_t *target_dims, void *dst, + const size_t *cumul_dst_pitch, const size_t *dst_stride, + const void *src, const size_t *cumul_src_pitch, + const size_t *src_stride, const size_t *elem_number, + const size_t elem_size); + +int aml_copy_layout(struct aml_layout *dst, const struct aml_layout *src); +int aml_transform_layout(struct aml_layout *dst, const struct aml_layout *src, + const size_t *target_dims); +int aml_transpose_layout(struct aml_layout *dst, const struct aml_layout *src); +int aml_reverse_transpose_layout(struct aml_layout *dst, + const struct aml_layout *src); + +#endif diff --git a/src/aml.h b/src/aml.h index 492ef518..5a1718bd 100644 --- a/src/aml.h +++ b/src/aml.h @@ -20,6 +20,7 @@ #endif #include "aml-layout.h" +#include "aml-copy.h" /******************************************************************************* * Forward Declarations: ******************************************************************************/ diff --git a/src/copy.c b/src/copy.c new file mode 100644 index 00000000..04204c45 --- /dev/null +++ b/src/copy.c @@ -0,0 +1,533 @@ +#include +#include +#include +#include +#include + +static inline void aml_compute_cumulative_pitch(size_t d, + size_t *cumul_dst_pitch, + size_t *cumul_src_pitch, + const size_t *dst_pitch, + const size_t *src_pitch, + size_t elem_size) +{ + cumul_dst_pitch[0] = elem_size; + cumul_src_pitch[0] = elem_size; + for (int i = 0; i < d - 1; i++) { + cumul_dst_pitch[i + 1] = dst_pitch[i] * cumul_dst_pitch[i]; + cumul_src_pitch[i + 1] = src_pitch[i] * cumul_src_pitch[i]; + } +} + +static inline void aml_copy_2d_helper(void *dst, const size_t *cumul_dst_pitch, + const void *src, + const size_t *cumul_src_pitch, + const size_t *elem_number, + size_t elem_size) +{ + if (cumul_dst_pitch[0] == elem_size && cumul_src_pitch[0] == elem_size) + for (int i = 0; i < elem_number[1]; i++) { + memcpy(dst, src, elem_number[0] * elem_size); + dst = (void *)((uintptr_t) dst + cumul_dst_pitch[1]); + src = (void *)((uintptr_t) src + cumul_src_pitch[1]); + } + else + for (int j = 0; j < elem_number[1]; j++) + for (int i = 0; i < elem_number[0]; i++) + memcpy((void *)((uintptr_t) dst + + i * cumul_dst_pitch[0] + + j * cumul_dst_pitch[1]), + (void *)((uintptr_t) src + + i * cumul_src_pitch[0] + + j * cumul_src_pitch[1]), + elem_size); +} + +static void aml_copy_nd_helper(size_t d, void *dst, + const size_t *cumul_dst_pitch, const void *src, + const size_t *cumul_src_pitch, + const size_t *elem_number, + const size_t elem_size) +{ + if (d == 1) + if (cumul_dst_pitch[0] == elem_size && + cumul_src_pitch[0] == elem_size) + memcpy(dst, src, elem_number[0] * elem_size); + else + for (int i = 0; i < elem_number[0]; i++) + memcpy((void *)((uintptr_t) dst + + i * cumul_dst_pitch[0]), + (void *)((uintptr_t) src + + i * cumul_src_pitch[0]), + elem_size); + else if (d == 2) + aml_copy_2d_helper(dst, cumul_dst_pitch, src, cumul_src_pitch, + elem_number, elem_size); + else { + for (int i = 0; i < elem_number[d - 1]; i++) { + aml_copy_nd_helper(d - 1, dst, cumul_dst_pitch, src, + cumul_src_pitch, elem_number, + elem_size); + dst = + (void *)((uintptr_t) dst + cumul_dst_pitch[d - 1]); + src = + (void *)((uintptr_t) src + cumul_src_pitch[d - 1]); + } + } +} + +int aml_copy_nd_c(size_t d, void *dst, const size_t *cumul_dst_pitch, + const void *src, const size_t *cumul_src_pitch, + const size_t *elem_number, size_t elem_size) +{ + assert(d > 0); + for (int i = 0; i < d - 1; i++) { + assert(cumul_dst_pitch[i + 1] >= cumul_dst_pitch[i] * + elem_number[i]); + assert(cumul_src_pitch[i + 1] >= cumul_src_pitch[i] * + elem_number[i]); + } + aml_copy_nd_helper(d, dst, cumul_dst_pitch, src, cumul_src_pitch, + elem_number, elem_size); + return 0; +} + +int aml_copy_nd(size_t d, void *dst, const size_t *dst_pitch, const void *src, + const size_t *src_pitch, const size_t *elem_number, + size_t elem_size) +{ + assert(d > 0); + size_t *cumul_dst_pitch = (size_t *) alloca(d * sizeof(size_t)); + size_t *cumul_src_pitch = (size_t *) alloca(d * sizeof(size_t)); + + aml_compute_cumulative_pitch(d, cumul_dst_pitch, cumul_src_pitch, + dst_pitch, src_pitch, elem_size); + aml_copy_nd_c(d, dst, cumul_dst_pitch, src, cumul_src_pitch, + elem_number, elem_size); + return 0; +} + +static void aml_copy_ndstr_helper(size_t d, void *dst, + const size_t *cumul_dst_pitch, + const size_t *dst_stride, const void *src, + const size_t *cumul_src_pitch, + const size_t *src_stride, + const size_t *elem_number, size_t elem_size) +{ + if (d == 1) + for (int i = 0; i < elem_number[0]; i++) + memcpy((void *)((uintptr_t) dst + + i * dst_stride[0] * cumul_dst_pitch[0]), + (void *)((uintptr_t) src + + i * src_stride[0] * cumul_src_pitch[0]), + elem_size); + else { + for (int i = 0; i < elem_number[d - 1]; i++) { + aml_copy_ndstr_helper(d - 1, dst, cumul_dst_pitch, + dst_stride, src, cumul_src_pitch, + src_stride, elem_number, + elem_size); + dst = + (void *)((uintptr_t) dst + + cumul_dst_pitch[d - 1] * dst_stride[d - + 1]); + src = + (void *)((uintptr_t) src + + cumul_src_pitch[d - 1] * src_stride[d - + 1]); + } + } +} + +int aml_copy_ndstr_c(size_t d, void *dst, const size_t *cumul_dst_pitch, + const size_t *dst_stride, const void *src, + const size_t *cumul_src_pitch, const size_t *src_stride, + const size_t *elem_number, const size_t elem_size) +{ + assert(d > 0); + for (int i = 0; i < d - 1; i++) { + assert(cumul_dst_pitch[i + 1] >= + cumul_dst_pitch[i] * elem_number[i] * + dst_stride[i]); + assert(cumul_src_pitch[i + 1] >= + cumul_src_pitch[i] * elem_number[i] * + src_stride[i]); + } + aml_copy_ndstr_helper(d, dst, cumul_dst_pitch, dst_stride, src, + cumul_src_pitch, src_stride, elem_number, + elem_size); + return 0; +} + +int aml_copy_ndstr(size_t d, void *dst, const size_t *dst_pitch, + const size_t *dst_stride, const void *src, + const size_t *src_pitch, const size_t *src_stride, + const size_t *elem_number, const size_t elem_size) +{ + assert(d > 0); + size_t *cumul_dst_pitch = (size_t *) alloca(d * sizeof(size_t)); + size_t *cumul_src_pitch = (size_t *) alloca(d * sizeof(size_t)); + + aml_compute_cumulative_pitch(d, cumul_dst_pitch, cumul_src_pitch, + dst_pitch, src_pitch, elem_size); + aml_copy_ndstr_c(d, dst, cumul_dst_pitch, dst_stride, src, + cumul_src_pitch, src_stride, elem_number, + elem_size); + return 0; +} + +static void aml_copy_sh2d_helper(const size_t *target_dims, void *dst, + const size_t *cumul_dst_pitch, + const void *src, + const size_t *cumul_src_pitch, + const size_t *elem_number, + const size_t elem_size) +{ + for (int j = 0; j < elem_number[1]; j++) + for (int i = 0; i < elem_number[0]; i++) + memcpy((void *)((uintptr_t) dst + + i * cumul_dst_pitch[target_dims[0]] + + j * cumul_dst_pitch[target_dims[1]]), + (void *)((uintptr_t) src + + i * cumul_src_pitch[0] + + j * cumul_src_pitch[1]), elem_size); +} + +static void aml_copy_shnd_helper(size_t d, const size_t *target_dims, + void *dst, const size_t *cumul_dst_pitch, + const void *src, + const size_t *cumul_src_pitch, + const size_t *elem_number, + const size_t elem_size) +{ + if (d == 1) + for (int i = 0; i < elem_number[0]; i++) + memcpy((void *)((uintptr_t) dst + + i * cumul_dst_pitch[target_dims[0]]), + (void *)((uintptr_t) src + + i * cumul_src_pitch[0]), elem_size); + if (d == 2) + aml_copy_sh2d_helper(target_dims, dst, cumul_dst_pitch, src, + cumul_src_pitch, elem_number, elem_size); + else { + // process dimension d-1 + for (int i = 0; i < elem_number[d - 1]; i++) { + aml_copy_shnd_helper(d - 1, target_dims, dst, + cumul_dst_pitch, src, + cumul_src_pitch, elem_number, + elem_size); + dst = + (void *)((uintptr_t) dst + + cumul_dst_pitch[target_dims[d - 1]]); + src = + (void *)((uintptr_t) src + cumul_src_pitch[d - 1]); + } + } +} + +int aml_copy_shnd_c(size_t d, const size_t *target_dims, void *dst, + const size_t *cumul_dst_pitch, const void *src, + const size_t *cumul_src_pitch, const size_t *elem_number, + const size_t elem_size) +{ + assert(d > 0); + size_t present_dims = 0; + + for (int i = 0; i < d; i++) { + assert(target_dims[i] < d); + if (target_dims[i] < d - 1) + assert(cumul_dst_pitch[target_dims[i] + 1] >= + cumul_dst_pitch[target_dims[i]] * + elem_number[i]); + present_dims |= 1 << target_dims[i]; + } + for (int i = 0; i < d; i++) + assert(present_dims & (1 << i)); + for (int i = 0; i < d - 1; i++) + assert(cumul_src_pitch[i + 1] >= cumul_src_pitch[i] * + elem_number[i]); + aml_copy_shnd_helper(d, target_dims, dst, cumul_dst_pitch, src, + cumul_src_pitch, elem_number, elem_size); + return 0; +} + +int aml_copy_shnd(size_t d, const size_t *target_dims, void *dst, + const size_t *dst_pitch, const void *src, + const size_t *src_pitch, const size_t *elem_number, + const size_t elem_size) +{ + assert(d > 0); + size_t *cumul_dst_pitch = (size_t *) alloca(d * sizeof(size_t)); + size_t *cumul_src_pitch = (size_t *) alloca(d * sizeof(size_t)); + + aml_compute_cumulative_pitch(d, cumul_dst_pitch, cumul_src_pitch, + dst_pitch, src_pitch, elem_size); + aml_copy_shnd_c(d, target_dims, dst, cumul_dst_pitch, src, + cumul_src_pitch, elem_number, elem_size); + return 0; +} + +int aml_copy_tnd(size_t d, void *dst, const size_t *dst_pitch, const void *src, + const size_t *src_pitch, const size_t *elem_number, + const size_t elem_size) +{ + assert(d > 0); + size_t *target_dims = (size_t *) alloca(d * sizeof(size_t)); + + target_dims[0] = d - 1; + for (int i = 1; i < d; i++) + target_dims[i] = i - 1; + aml_copy_shnd(d, target_dims, dst, dst_pitch, src, src_pitch, + elem_number, elem_size); + return 0; +} + +int aml_copy_tnd_c(size_t d, void *dst, const size_t *cumul_dst_pitch, + const void *src, const size_t *cumul_src_pitch, + const size_t *elem_number, const size_t elem_size) +{ + assert(d > 0); + size_t *target_dims = (size_t *) alloca(d * sizeof(size_t)); + + target_dims[0] = d - 1; + for (int i = 1; i < d; i++) + target_dims[i] = i - 1; + aml_copy_shnd_c(d, target_dims, dst, cumul_dst_pitch, src, + cumul_src_pitch, elem_number, elem_size); + return 0; +} + +int aml_copy_rtnd(size_t d, void *dst, const size_t *dst_pitch, + const void *src, const size_t *src_pitch, + const size_t *elem_number, const size_t elem_size) +{ + assert(d > 0); + size_t *target_dims = (size_t *) alloca(d * sizeof(size_t)); + + target_dims[d - 1] = 0; + for (int i = 0; i < d - 1; i++) + target_dims[i] = i + 1; + aml_copy_shnd(d, target_dims, dst, dst_pitch, src, src_pitch, + elem_number, elem_size); + return 0; +} + +int aml_copy_rtnd_c(size_t d, void *dst, const size_t *cumul_dst_pitch, + const void *src, const size_t *cumul_src_pitch, + const size_t *elem_number, const size_t elem_size) +{ + assert(d > 0); + size_t *target_dims = (size_t *) alloca(d * sizeof(size_t)); + + target_dims[d - 1] = 0; + for (int i = 0; i < d - 1; i++) + target_dims[i] = i + 1; + aml_copy_shnd_c(d, target_dims, dst, cumul_dst_pitch, src, + cumul_src_pitch, elem_number, elem_size); + return 0; +} + +static void aml_copy_shndstr_helper(size_t d, const size_t *target_dims, + void *dst, const size_t *cumul_dst_pitch, + const size_t *dst_stride, const void *src, + const size_t *cumul_src_pitch, + const size_t *src_stride, + const size_t *elem_number, + const size_t elem_size) +{ + if (d == 1) + for (int i = 0; i < elem_number[0]; i++) + memcpy((void *)((uintptr_t) dst + + i * cumul_dst_pitch[target_dims[0]] * + dst_stride[target_dims[0]]), + (void *)((uintptr_t) src + + i * cumul_src_pitch[0] * src_stride[0]), + elem_size); + else { + // process dimension d-1 + for (int i = 0; i < elem_number[d - 1]; i++) { + aml_copy_shndstr_helper(d - 1, target_dims, dst, + cumul_dst_pitch, dst_stride, + src, cumul_src_pitch, + src_stride, elem_number, + elem_size); + dst = + (void *)((uintptr_t) dst + + cumul_dst_pitch[target_dims[d - 1]] * + dst_stride[target_dims[d - 1]]); + src = + (void *)((uintptr_t) src + + cumul_src_pitch[d - 1] * src_stride[d - + 1]); + } + } +} + +int aml_copy_shndstr_c(size_t d, const size_t *target_dims, void *dst, + const size_t *cumul_dst_pitch, const size_t *dst_stride, + const void *src, const size_t *cumul_src_pitch, + const size_t *src_stride, const size_t *elem_number, + const size_t elem_size) +{ + assert(d > 0); + size_t present_dims = 0; + + for (int i = 0; i < d; i++) { + assert(target_dims[i] < d); + if (target_dims[i] < d - 1) + assert(cumul_dst_pitch[target_dims[i] + 1] >= + cumul_dst_pitch[target_dims[i]] * + elem_number[i] * + dst_stride[target_dims[i]]); + present_dims |= 1 << target_dims[i]; + } + for (int i = 0; i < d; i++) + assert(present_dims & (1 << i)); + for (int i = 0; i < d - 1; i++) + assert(cumul_src_pitch[i + 1] >= cumul_src_pitch[i] * + elem_number[i] * + src_stride[i]); + aml_copy_shndstr_helper(d, target_dims, dst, cumul_dst_pitch, + dst_stride, src, cumul_src_pitch, + src_stride, elem_number, elem_size); + return 0; +} + +int aml_copy_shndstr(size_t d, const size_t *target_dims, void *dst, + const size_t *dst_pitch, const size_t *dst_stride, + const void *src, const size_t *src_pitch, + const size_t *src_stride, const size_t *elem_number, + const size_t elem_size) +{ + assert(d > 0); + size_t *cumul_dst_pitch = (size_t *) alloca(d * sizeof(size_t)); + size_t *cumul_src_pitch = (size_t *) alloca(d * sizeof(size_t)); + + aml_compute_cumulative_pitch(d, cumul_dst_pitch, cumul_src_pitch, + dst_pitch, src_pitch, elem_size); + aml_copy_shndstr_c(d, target_dims, dst, cumul_dst_pitch, + dst_stride, src, cumul_src_pitch, + src_stride, elem_number, elem_size); + return 0; +} + +int aml_copy_tndstr(size_t d, void *dst, const size_t *dst_pitch, + const size_t *dst_stride, const void *src, + const size_t *src_pitch, const size_t *src_stride, + const size_t *elem_number, const size_t elem_size) +{ + assert(d > 0); + size_t *target_dims = (size_t *) alloca(d * sizeof(size_t)); + + target_dims[0] = d - 1; + for (int i = 1; i < d; i++) + target_dims[i] = i - 1; + aml_copy_shndstr(d, target_dims, dst, dst_pitch, dst_stride, src, + src_pitch, src_stride, elem_number, elem_size); + return 0; +} + +int aml_copy_tndstr_c(size_t d, void *dst, const size_t *cumul_dst_pitch, + const size_t *dst_stride, const void *src, + const size_t *cumul_src_pitch, const size_t *src_stride, + const size_t *elem_number, const size_t elem_size) +{ + assert(d > 0); + size_t *target_dims = (size_t *) alloca(d * sizeof(size_t)); + + target_dims[0] = d - 1; + for (int i = 1; i < d; i++) + target_dims[i] = i - 1; + aml_copy_shndstr_c(d, target_dims, dst, cumul_dst_pitch, dst_stride, + src, cumul_src_pitch, src_stride, elem_number, + elem_size); + return 0; +} + +int aml_copy_rtndstr(size_t d, void *dst, const size_t *dst_pitch, + const size_t *dst_stride, const void *src, + const size_t *src_pitch, const size_t *src_stride, + const size_t *elem_number, const size_t elem_size) +{ + assert(d > 0); + size_t *target_dims = (size_t *) alloca(d * sizeof(size_t)); + + target_dims[d - 1] = 0; + for (int i = 0; i < d - 1; i++) + target_dims[i] = i + 1; + aml_copy_shndstr(d, target_dims, dst, dst_pitch, dst_stride, src, + src_pitch, src_stride, elem_number, elem_size); + return 0; +} + +int aml_copy_rtndstr_c(size_t d, void *dst, const size_t *cumul_dst_pitch, + const size_t *dst_stride, const void *src, + const size_t *cumul_src_pitch, const size_t *src_stride, + const size_t *elem_number, const size_t elem_size) +{ + assert(d > 0); + size_t *target_dims = (size_t *) alloca(d * sizeof(size_t)); + + target_dims[d - 1] = 0; + for (int i = 0; i < d - 1; i++) + target_dims[i] = i + 1; + aml_copy_shndstr_c(d, target_dims, dst, cumul_dst_pitch, dst_stride, + src, cumul_src_pitch, src_stride, elem_number, + elem_size); + return 0; +} + +int aml_copy_layout(struct aml_layout *dst, const struct aml_layout *src) +{ + size_t d = src->ndims; + assert(d > 0); + + size_t elem_size = src->pitch[0]; + assert(d == dst->ndims); + assert(elem_size == dst->pitch[0]); + for (int i = 0; i < d; i++) + assert( src->dims[i] == dst->dims[i] ); + return aml_copy_ndstr_c(d, dst->ptr, dst->pitch, dst->stride, src->ptr, + src->pitch, src->stride, src->dims, elem_size); +} + +int aml_transform_layout(struct aml_layout *dst, const struct aml_layout *src, + const size_t *target_dims) +{ + size_t d = src->ndims; + assert(d > 0); + + size_t elem_size = src->pitch[0]; + assert(d == dst->ndims); + assert(elem_size == dst->pitch[0]); + for (int i = 0; i < d; i++) + assert( src->dims[i] == dst->dims[target_dims[i]]); + return aml_copy_shndstr_c(d, target_dims, dst->ptr, dst->pitch, + dst->stride, src->ptr, src->pitch, + src->stride, src->dims, elem_size); +} + +int aml_transpose_layout(struct aml_layout *dst, const struct aml_layout *src) +{ + size_t d = src->ndims; + assert(d > 0); + + size_t *target_dims = (size_t *) alloca(d * sizeof(size_t)); + target_dims[0] = d - 1; + for (int i = 1; i < d; i++) + target_dims[i] = i - 1; + return aml_transform_layout(dst, src, target_dims); +} + +int aml_reverse_transpose_layout(struct aml_layout *dst, + const struct aml_layout *src) +{ + size_t d = src->ndims; + assert(d > 0); + + size_t *target_dims = (size_t *) alloca(d * sizeof(size_t)); + target_dims[d - 1] = 0; + for (int i = 0; i < d - 1; i++) + target_dims[i] = i + 1; + return aml_transform_layout(dst, src, target_dims); +} diff --git a/tests/Makefile.am b/tests/Makefile.am index a864bea6..16d34b70 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -40,7 +40,7 @@ UNIT_TESTS = $(ARENA_JEMALLOC_TESTS) \ $(AREA_POSIX_TESTS) \ $(DMA_LINUX_TESTS) \ $(SCRATCH_TESTS) \ - layout + layout copy # all tests TST_PROGS = $(UNIT_TESTS) diff --git a/tests/copy.c b/tests/copy.c new file mode 100644 index 00000000..ffa12971 --- /dev/null +++ b/tests/copy.c @@ -0,0 +1,1014 @@ +#include +#include + +void test_copy_2d(void) +{ + size_t elem_number[2] = { 5, 3 }; + size_t src_pitch[2] = { 10, 6 }; + size_t dst_pitch[2] = { 5, 3 }; + + double src[6][10]; + double dst[3][5]; + double dst2[6][10]; + + double ref_dst2[6][10]; + double ref_dst[3][5]; + + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) { + src[j][i] = (double)(i + j * 10); + ref_dst2[j][i] = 0.0; + dst2[j][i] = 0.0; + } + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) { + dst[j][i] = 0.0; + ref_dst[j][i] = src[j][i]; + ref_dst2[j][i] = src[j][i]; + } + + aml_copy_nd(2, dst, dst_pitch, src, src_pitch, elem_number, + sizeof(double)); + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst[j][i] == dst[j][i]); + + aml_copy_nd(2, dst2, src_pitch, dst, dst_pitch, elem_number, + sizeof(double)); + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) + assert(ref_dst2[j][i] == dst2[j][i]); + +} + +void test_copy_t2d(void) +{ + size_t elem_number[2] = { 5, 3 }; + size_t elem_number2[2] = { 3, 5 }; + size_t src_pitch[2] = { 10, 6 }; + size_t dst_pitch[2] = { 3, 5 }; + + double src[6][10]; + double dst[5][3]; + double dst2[6][10]; + + double ref_dst2[6][10]; + double ref_dst[5][3]; + + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) { + src[j][i] = (double)(i + j * 10); + ref_dst2[j][i] = 0.0; + dst2[j][i] = 0.0; + } + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) { + dst[i][j] = 0.0; + ref_dst[i][j] = src[j][i]; + ref_dst2[j][i] = src[j][i]; + } + + aml_copy_tnd(2, dst, dst_pitch, src, src_pitch, elem_number, + sizeof(double)); + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst[i][j] == dst[i][j]); + + aml_copy_tnd(2, dst2, src_pitch, dst, dst_pitch, elem_number2, + sizeof(double)); + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) + assert(ref_dst2[j][i] == dst2[j][i]); + +} + +void test_copy_3d(void) +{ + size_t elem_number[3] = { 5, 3, 2 }; + size_t src_pitch[3] = { 10, 6, 4 }; + size_t dst_pitch[3] = { 5, 3, 2 }; + + double src[4][6][10]; + double dst[2][3][5]; + double dst2[4][6][10]; + + double ref_dst2[4][6][10]; + double ref_dst[2][3][5]; + + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) { + src[k][j][i] = + (double)(i + j * 10 + k * 10 * 6); + ref_dst2[k][j][i] = 0.0; + dst2[k][j][i] = 0.0; + } + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) { + dst[k][j][i] = 0.0; + ref_dst[k][j][i] = src[k][j][i]; + ref_dst2[k][j][i] = src[k][j][i]; + } + + aml_copy_nd(3, dst, dst_pitch, src, src_pitch, elem_number, + sizeof(double)); + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst[k][j][i] == dst[k][j][i]); + + aml_copy_nd(3, dst2, src_pitch, dst, dst_pitch, elem_number, + sizeof(double)); + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) + assert(ref_dst2[k][j][i] == dst2[k][j][i]); + +} + +void test_copy_3d_c(void) +{ + size_t elem_number[3] = { 5, 3, 2 }; + size_t c_src_pitch[4] = { 8, 8 * 10, 8 * 10 * 6, 8 * 10 * 6 * 4 }; + size_t c_dst_pitch[4] = { 8, 8 * 5, 8 * 5 * 3, 8 * 5 * 3 * 2 }; + + double src[4][6][10]; + double dst[2][3][5]; + double dst2[4][6][10]; + + double ref_dst2[4][6][10]; + double ref_dst[2][3][5]; + + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) { + src[k][j][i] = + (double)(i + j * 10 + k * 10 * 6); + ref_dst2[k][j][i] = 0.0; + dst2[k][j][i] = 0.0; + } + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) { + dst[k][j][i] = 0.0; + ref_dst[k][j][i] = src[k][j][i]; + ref_dst2[k][j][i] = src[k][j][i]; + } + + aml_copy_nd_c(3, dst, c_dst_pitch, src, c_src_pitch, elem_number, + sizeof(double)); + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst[k][j][i] == dst[k][j][i]); + + aml_copy_nd_c(3, dst2, c_src_pitch, dst, c_dst_pitch, elem_number, + sizeof(double)); + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) + assert(ref_dst2[k][j][i] == dst2[k][j][i]); + +} + +void test_copy_3dstr(void) +{ + size_t elem_number[3] = { 5, 3, 2 }; + size_t src_pitch[3] = { 10, 6, 4 }; + size_t src_stride[3] = { 2, 2, 2 }; + size_t dst_pitch[3] = { 5, 3, 2 }; + size_t dst_stride[3] = { 1, 1, 1 }; + + double src[4][6][10]; + double dst[2][3][5]; + double dst2[4][6][10]; + + double ref_dst2[4][6][10]; + double ref_dst[2][3][5]; + + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) { + src[k][j][i] = + (double)(i + j * 10 + k * 10 * 6); + ref_dst2[k][j][i] = 0.0; + dst2[k][j][i] = 0.0; + } + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) { + dst[k][j][i] = 0.0; + ref_dst[k][j][i] = src[2 * k][2 * j][2 * i]; + ref_dst2[2 * k][2 * j][2 * i] = + src[2 * k][2 * j][2 * i]; + } + + aml_copy_ndstr(3, dst, dst_pitch, dst_stride, src, src_pitch, + src_stride, elem_number, sizeof(double)); + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst[k][j][i] == dst[k][j][i]); + + aml_copy_ndstr(3, dst2, src_pitch, src_stride, dst, dst_pitch, + dst_stride, elem_number, sizeof(double)); + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) + assert(ref_dst2[k][j][i] == dst2[k][j][i]); +} + +void test_copy_3dstr_c(void) +{ + size_t elem_number[3] = { 5, 3, 2 }; + size_t c_src_pitch[4] = { 8, 8 * 10, 8 * 10 * 6, 8 * 10 * 6 * 4 }; + size_t src_stride[3] = { 2, 2, 2 }; + size_t c_dst_pitch[4] = { 8, 8 * 5, 8 * 5 * 3, 8 * 5 * 3 * 2 }; + size_t dst_stride[3] = { 1, 1, 1 }; + + double src[4][6][10]; + double dst[2][3][5]; + double dst2[4][6][10]; + + double ref_dst2[4][6][10]; + double ref_dst[2][3][5]; + + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) { + src[k][j][i] = + (double)(i + j * 10 + k * 10 * 6); + ref_dst2[k][j][i] = 0.0; + dst2[k][j][i] = 0.0; + } + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) { + dst[k][j][i] = 0.0; + ref_dst[k][j][i] = src[2 * k][2 * j][2 * i]; + ref_dst2[2 * k][2 * j][2 * i] = + src[2 * k][2 * j][2 * i]; + } + + aml_copy_ndstr_c(3, dst, c_dst_pitch, dst_stride, src, c_src_pitch, + src_stride, elem_number, sizeof(double)); + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst[k][j][i] == dst[k][j][i]); + + aml_copy_ndstr_c(3, dst2, c_src_pitch, src_stride, dst, c_dst_pitch, + dst_stride, elem_number, sizeof(double)); + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) + assert(ref_dst2[k][j][i] == dst2[k][j][i]); +} + +void test_copy_t3d(void) +{ + size_t elem_number[3] = { 5, 3, 2 }; + size_t elem_number2[3] = { 3, 2, 5 }; + size_t elem_number3[3] = { 2, 5, 3 }; + size_t src_pitch[3] = { 10, 6, 4 }; + size_t dst_pitch[3] = { 3, 2, 5 }; + size_t dst_pitch2[3] = { 2, 5, 3 }; + + double src[4][6][10]; + double dst[5][2][3]; + double dst2[3][5][2]; + double dst3[4][6][10]; + + double ref_dst[5][2][3]; + double ref_dst2[3][5][2]; + double ref_dst3[4][6][10]; + + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) { + src[k][j][i] = + (double)(i + j * 10 + k * 10 * 6); + ref_dst3[k][j][i] = 0.0; + dst3[k][j][i] = 0.0; + } + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) { + dst[i][k][j] = 0.0; + dst2[j][i][k] = 0.0; + ref_dst[i][k][j] = src[k][j][i]; + ref_dst2[j][i][k] = src[k][j][i]; + ref_dst3[k][j][i] = src[k][j][i]; + } + + aml_copy_tnd(3, dst, dst_pitch, src, src_pitch, elem_number, + sizeof(double)); + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst[i][k][j] == dst[i][k][j]); + + aml_copy_tnd(3, dst2, dst_pitch2, dst, dst_pitch, elem_number2, + sizeof(double)); + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst2[j][i][k] == dst2[j][i][k]); + + aml_copy_tnd(3, dst3, src_pitch, dst2, dst_pitch2, elem_number3, + sizeof(double)); + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) + assert(ref_dst3[k][j][i] == dst3[k][j][i]); +} + +void test_copy_rt3d(void) +{ + size_t elem_number[3] = { 5, 3, 2 }; + size_t elem_number2[3] = { 2, 5, 3 }; + size_t elem_number3[3] = { 3, 2, 5 }; + size_t src_pitch[3] = { 10, 6, 4 }; + size_t dst_pitch[3] = { 2, 5, 3 }; + size_t dst_pitch2[3] = { 3, 2, 5 }; + + double src[4][6][10]; + double dst[3][5][2]; + double dst2[5][2][3]; + double dst3[4][6][10]; + + double ref_dst[3][5][2]; + double ref_dst2[5][2][3]; + double ref_dst3[4][6][10]; + + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) { + src[k][j][i] = + (double)(i + j * 10 + k * 10 * 6); + ref_dst3[k][j][i] = 0.0; + dst3[k][j][i] = 0.0; + } + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) { + dst[j][i][k] = 0.0; + dst2[i][k][j] = 0.0; + ref_dst[j][i][k] = src[k][j][i]; + ref_dst2[i][k][j] = src[k][j][i]; + ref_dst3[k][j][i] = src[k][j][i]; + } + + aml_copy_rtnd(3, dst, dst_pitch, src, src_pitch, elem_number, + sizeof(double)); + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst[j][i][k] == dst[j][i][k]); + + aml_copy_rtnd(3, dst2, dst_pitch2, dst, dst_pitch, elem_number2, + sizeof(double)); + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst2[i][k][j] == dst2[i][k][j]); + + aml_copy_rtnd(3, dst3, src_pitch, dst2, dst_pitch2, elem_number3, + sizeof(double)); + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) + assert(ref_dst3[k][j][i] == dst3[k][j][i]); +} + +void test_copy_t4d(void) +{ + size_t elem_number[4] = { 5, 3, 2, 4 }; + size_t elem_number2[4] = { 3, 2, 4, 5 }; + size_t src_pitch[4] = { 10, 6, 4, 8 }; + size_t dst_pitch[4] = { 3, 2, 4, 5 }; + + double src[8][4][6][10]; + double dst[5][4][2][3]; + double dst2[8][4][6][10]; + + double ref_dst[5][4][2][3]; + double ref_dst2[8][4][6][10]; + + for (int l = 0; l < 8; l++) + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) { + src[l][k][j][i] = + (double)(i + j * 10 + k * 10 * 6 + + l * 10 * 6 * 4); + ref_dst2[l][k][j][i] = 0.0; + dst2[l][k][j][i] = 0.0; + } + for (int l = 0; l < 4; l++) + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) { + dst[i][l][k][j] = 0.0; + ref_dst[i][l][k][j] = src[l][k][j][i]; + ref_dst2[l][k][j][i] = src[l][k][j][i]; + } + + aml_copy_tnd(4, dst, dst_pitch, src, src_pitch, elem_number, + sizeof(double)); + for (int l = 0; l < 4; l++) + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst[i][l][k][j] == + dst[i][l][k][j]); + + aml_copy_rtnd(4, dst2, src_pitch, dst, dst_pitch, elem_number2, + sizeof(double)); + for (int l = 0; l < 8; l++) + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) + assert(ref_dst2[l][k][j][i] == + dst2[l][k][j][i]); + +} + +void test_copy_t4d_c(void) +{ + size_t elem_number[4] = { 5, 3, 2, 4 }; + size_t elem_number2[4] = { 3, 2, 4, 5 }; + size_t c_src_pitch[5] = { 8, 8 * 10, 8 * 10 * 6, 8 * 10 * 6 * 4, + 8 * 10 * 6 * 4 * 8 }; + size_t c_dst_pitch[5] = { 8, 8 * 3, 8 * 3 * 2, 8 * 3 * 2 * 4, + 8 * 3 * 2 * 4 * 5 }; + + double src[8][4][6][10]; + double dst[5][4][2][3]; + double dst2[8][4][6][10]; + + double ref_dst[5][4][2][3]; + double ref_dst2[8][4][6][10]; + + for (int l = 0; l < 8; l++) + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) { + src[l][k][j][i] = + (double)(i + j * 10 + k * 10 * 6 + + l * 10 * 6 * 4); + ref_dst2[l][k][j][i] = 0.0; + dst2[l][k][j][i] = 0.0; + } + for (int l = 0; l < 4; l++) + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) { + dst[i][l][k][j] = 0.0; + ref_dst[i][l][k][j] = src[l][k][j][i]; + ref_dst2[l][k][j][i] = src[l][k][j][i]; + } + + aml_copy_tnd_c(4, dst, c_dst_pitch, src, c_src_pitch, elem_number, + sizeof(double)); + for (int l = 0; l < 4; l++) + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst[i][l][k][j] == + dst[i][l][k][j]); + + aml_copy_rtnd_c(4, dst2, c_src_pitch, dst, c_dst_pitch, elem_number2, + sizeof(double)); + for (int l = 0; l < 8; l++) + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) + assert(ref_dst2[l][k][j][i] == + dst2[l][k][j][i]); + +} + +void test_copy_t4dstr(void) +{ + size_t elem_number[4] = { 5, 3, 2, 4 }; + size_t elem_number2[4] = { 3, 2, 4, 5 }; + size_t src_pitch[4] = { 10, 6, 4, 8 }; + size_t src_stride[4] = { 2, 2, 2, 2 }; + size_t dst_pitch[4] = { 3, 2, 4, 5 }; + size_t dst_stride[4] = { 1, 1, 1, 1 }; + + double src[8][4][6][10]; + double dst[5][4][2][3]; + double dst2[8][4][6][10]; + + double ref_dst[5][4][2][3]; + double ref_dst2[8][4][6][10]; + + for (int l = 0; l < 8; l++) + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) { + src[l][k][j][i] = + (double)(i + j * 10 + k * 10 * 6 + + l * 10 * 6 * 4); + ref_dst2[l][k][j][i] = 0.0; + dst2[l][k][j][i] = 0.0; + } + for (int l = 0; l < 4; l++) + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) { + dst[i][l][k][j] = 0.0; + ref_dst[i][l][k][j] = + src[2 * l][2 * k][2 * j][2 * i]; + ref_dst2[2 * l][2 * k][2 * j][2 * i] = + src[2 * l][2 * k][2 * j][2 * i]; + } + + aml_copy_tndstr(4, dst, dst_pitch, dst_stride, src, src_pitch, + src_stride, elem_number, sizeof(double)); + for (int l = 0; l < 4; l++) + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst[i][l][k][j] == + dst[i][l][k][j]); + + aml_copy_rtndstr(4, dst2, src_pitch, src_stride, dst, dst_pitch, + dst_stride, elem_number2, sizeof(double)); + for (int l = 0; l < 8; l++) + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) + assert(ref_dst2[l][k][j][i] == + dst2[l][k][j][i]); + +} + +void test_copy_t4dstr_c(void) +{ + size_t elem_number[4] = { 5, 3, 2, 4 }; + size_t elem_number2[4] = { 3, 2, 4, 5 }; + size_t c_src_pitch[5] = { 8, 8 * 10, 8 * 10 * 6, 8 * 10 * 6 * 4, + 8 * 10 * 6 * 4 * 8 }; + size_t src_stride[4] = { 2, 2, 2, 2 }; + size_t c_dst_pitch[5] = { 8, 8 * 3, 8 * 3 * 2, 8 * 3 * 2 * 4, + 8 * 3 * 2 * 4 * 5 }; + size_t dst_stride[4] = { 1, 1, 1, 1 }; + + double src[8][4][6][10]; + double dst[5][4][2][3]; + double dst2[8][4][6][10]; + + double ref_dst[5][4][2][3]; + double ref_dst2[8][4][6][10]; + + for (int l = 0; l < 8; l++) + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) { + src[l][k][j][i] = + (double)(i + j * 10 + k * 10 * 6 + + l * 10 * 6 * 4); + ref_dst2[l][k][j][i] = 0.0; + dst2[l][k][j][i] = 0.0; + } + for (int l = 0; l < 4; l++) + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) { + dst[i][l][k][j] = 0.0; + ref_dst[i][l][k][j] = + src[2 * l][2 * k][2 * j][2 * i]; + ref_dst2[2 * l][2 * k][2 * j][2 * i] = + src[2 * l][2 * k][2 * j][2 * i]; + } + + aml_copy_tndstr_c(4, dst, c_dst_pitch, dst_stride, src, c_src_pitch, + src_stride, elem_number, sizeof(double)); + for (int l = 0; l < 4; l++) + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst[i][l][k][j] == + dst[i][l][k][j]); + + aml_copy_rtndstr_c(4, dst2, c_src_pitch, src_stride, dst, c_dst_pitch, + dst_stride, elem_number2, sizeof(double)); + for (int l = 0; l < 8; l++) + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) + assert(ref_dst2[l][k][j][i] == + dst2[l][k][j][i]); + +} + +void test_copy_sh4d(void) +{ + size_t elem_number[4] = { 5, 3, 2, 4 }; + size_t elem_number2[4] = { 2, 3, 4, 5 }; + size_t target_dims[4] = { 3, 1, 0, 2 }; + size_t target_dims2[4] = { 2, 1, 3, 0 }; + size_t src_pitch[4] = { 10, 6, 4, 8 }; + size_t dst_pitch[4] = { 2, 3, 4, 5 }; + + double src[8][4][6][10]; + double dst[5][4][3][2]; + double dst2[8][4][6][10]; + + double ref_dst[5][4][3][2]; + double ref_dst2[8][4][6][10]; + + for (int l = 0; l < 8; l++) + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) { + src[l][k][j][i] = + (double)(i + j * 10 + k * 10 * 6 + + l * 10 * 6 * 4); + ref_dst2[l][k][j][i] = 0.0; + dst2[l][k][j][i] = 0.0; + } + + for (int l = 0; l < 4; l++) + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) { + dst[i][l][j][k] = 0.0; + ref_dst[i][l][j][k] = src[l][k][j][i]; + ref_dst2[l][k][j][i] = src[l][k][j][i]; + } + + aml_copy_shnd(4, target_dims, dst, dst_pitch, src, src_pitch, + elem_number, sizeof(double)); + for (int l = 0; l < 4; l++) + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst[i][l][j][k] == + dst[i][l][j][k]); + + aml_copy_shnd(4, target_dims2, dst2, src_pitch, dst, dst_pitch, + elem_number2, sizeof(double)); + for (int l = 0; l < 8; l++) + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) + assert(ref_dst2[l][k][j][i] == + dst2[l][k][j][i]); + +} + +void test_copy_sh4d_c(void) +{ + size_t elem_number[4] = { 5, 3, 2, 4 }; + size_t elem_number2[4] = { 2, 3, 4, 5 }; + size_t target_dims[4] = { 3, 1, 0, 2 }; + size_t target_dims2[4] = { 2, 1, 3, 0 }; + size_t c_src_pitch[5] = { 8, 8 * 10, 8 * 10 * 6, 8 * 10 * 6 * 4, + 8 * 10 * 6 * 4 * 8 }; + size_t c_dst_pitch[5] = { 8, 8 * 2, 8 * 2 * 3, 8 * 2 * 3 * 4, + 8 * 2 * 3 * 4 * 5 }; + + double src[8][4][6][10]; + double dst[5][4][3][2]; + double dst2[8][4][6][10]; + + double ref_dst[5][4][3][2]; + double ref_dst2[8][4][6][10]; + + for (int l = 0; l < 8; l++) + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) { + src[l][k][j][i] = + (double)(i + j * 10 + k * 10 * 6 + + l * 10 * 6 * 4); + ref_dst2[l][k][j][i] = 0.0; + dst2[l][k][j][i] = 0.0; + } + + for (int l = 0; l < 4; l++) + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) { + dst[i][l][j][k] = 0.0; + ref_dst[i][l][j][k] = src[l][k][j][i]; + ref_dst2[l][k][j][i] = src[l][k][j][i]; + } + + aml_copy_shnd_c(4, target_dims, dst, c_dst_pitch, src, c_src_pitch, + elem_number, sizeof(double)); + for (int l = 0; l < 4; l++) + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst[i][l][j][k] == + dst[i][l][j][k]); + + aml_copy_shnd_c(4, target_dims2, dst2, c_src_pitch, dst, c_dst_pitch, + elem_number2, sizeof(double)); + for (int l = 0; l < 8; l++) + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) + assert(ref_dst2[l][k][j][i] == + dst2[l][k][j][i]); + +} + +void test_copy_sh4dstr(void) +{ + size_t elem_number[4] = { 5, 3, 2, 4 }; + size_t elem_number2[4] = { 2, 3, 4, 5 }; + size_t target_dims[4] = { 3, 1, 0, 2 }; + size_t target_dims2[4] = { 2, 1, 3, 0 }; + size_t src_pitch[4] = { 10, 6, 4, 8 }; + size_t src_stride[4] = { 2, 2, 2, 2 }; + size_t dst_pitch[4] = { 2, 3, 4, 5 }; + size_t dst_stride[4] = { 1, 1, 1, 1 }; + + double src[8][4][6][10]; + double dst[5][4][3][2]; + double dst2[8][4][6][10]; + + double ref_dst[5][4][3][2]; + double ref_dst2[8][4][6][10]; + + for (int l = 0; l < 8; l++) + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) { + src[l][k][j][i] = + (double)(i + j * 10 + k * 10 * 6 + + l * 10 * 6 * 4); + ref_dst2[l][k][j][i] = 0.0; + dst2[l][k][j][i] = 0.0; + } + + for (int l = 0; l < 4; l++) + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) { + dst[i][l][j][k] = 0.0; + ref_dst[i][l][j][k] = + src[2 * l][2 * k][2 * j][2 * i]; + ref_dst2[2 * l][2 * k][2 * j][2 * i] = + src[2 * l][2 * k][2 * j][2 * i]; + } + + aml_copy_shndstr(4, target_dims, dst, dst_pitch, dst_stride, src, + src_pitch, src_stride, elem_number, sizeof(double)); + for (int l = 0; l < 4; l++) + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst[i][l][j][k] == + dst[i][l][j][k]); + + aml_copy_shndstr(4, target_dims2, dst2, src_pitch, src_stride, dst, + dst_pitch, dst_stride, elem_number2, sizeof(double)); + for (int l = 0; l < 8; l++) + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) + assert(ref_dst2[l][k][j][i] == + dst2[l][k][j][i]); + +} + +void test_copy_sh4dstr_c(void) +{ + size_t elem_number[4] = { 5, 3, 2, 4 }; + size_t elem_number2[4] = { 2, 3, 4, 5 }; + size_t target_dims[4] = { 3, 1, 0, 2 }; + size_t target_dims2[4] = { 2, 1, 3, 0 }; + size_t c_src_pitch[5] = { 8, 8 * 10, 8 * 10 * 6, 8 * 10 * 6 * 4, + 8 * 10 * 6 * 4 * 8 }; + size_t src_stride[4] = { 2, 2, 2, 2 }; + size_t c_dst_pitch[5] = { 8, 8 * 2, 8 * 2 * 3, 8 * 2 * 3 * 4, + 8 * 2 * 3 * 4 * 5 }; + size_t dst_stride[4] = { 1, 1, 1, 1 }; + + double src[8][4][6][10]; + double dst[5][4][3][2]; + double dst2[8][4][6][10]; + + double ref_dst[5][4][3][2]; + double ref_dst2[8][4][6][10]; + + for (int l = 0; l < 8; l++) + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) { + src[l][k][j][i] = + (double)(i + j * 10 + k * 10 * 6 + + l * 10 * 6 * 4); + ref_dst2[l][k][j][i] = 0.0; + dst2[l][k][j][i] = 0.0; + } + + for (int l = 0; l < 4; l++) + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) { + dst[i][l][j][k] = 0.0; + ref_dst[i][l][j][k] = + src[2 * l][2 * k][2 * j][2 * i]; + ref_dst2[2 * l][2 * k][2 * j][2 * i] = + src[2 * l][2 * k][2 * j][2 * i]; + } + + aml_copy_shndstr_c(4, target_dims, dst, c_dst_pitch, dst_stride, src, + c_src_pitch, src_stride, elem_number, + sizeof(double)); + for (int l = 0; l < 4; l++) + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst[i][l][j][k] == + dst[i][l][j][k]); + + aml_copy_shndstr_c(4, target_dims2, dst2, c_src_pitch, src_stride, dst, + c_dst_pitch, dst_stride, elem_number2, + sizeof(double)); + for (int l = 0; l < 8; l++) + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) + assert(ref_dst2[l][k][j][i] == + dst2[l][k][j][i]); + +} + +void test_copy_layout(void) +{ + size_t elem_number[3] = { 5, 3, 2 }; + size_t c_src_pitch[4] = { 8, 8 * 10, 8 * 10 * 6, 8 * 10 * 6 * 4 }; + size_t src_stride[3] = { 1, 1, 1}; + size_t c_dst_pitch[4] = { 8, 8 * 5, 8 * 5 * 3, 8 * 5 * 3 * 2 }; + size_t dst_stride[3] = { 1, 1, 1}; + + double src[4][6][10]; + double dst[2][3][5]; + double dst2[4][6][10]; + + double ref_dst2[4][6][10]; + double ref_dst[2][3][5]; + + struct aml_layout src_layout = { + (void *)src, + 3, + elem_number, + c_src_pitch, + src_stride + }; + struct aml_layout dst_layout = { + (void *)dst, + 3, + elem_number, + c_dst_pitch, + dst_stride + }; + struct aml_layout dst2_layout = { + (void *)dst2, + 3, + elem_number, + c_src_pitch, + src_stride + }; + + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) { + src[k][j][i] = + (double)(i + j * 10 + k * 10 * 6); + ref_dst2[k][j][i] = 0.0; + dst2[k][j][i] = 0.0; + } + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) { + dst[k][j][i] = 0.0; + ref_dst[k][j][i] = src[k][j][i]; + ref_dst2[k][j][i] = src[k][j][i]; + } + + aml_copy_layout(&dst_layout, &src_layout); + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst[k][j][i] == dst[k][j][i]); + + aml_copy_layout(&dst2_layout, &dst_layout); + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) + assert(ref_dst2[k][j][i] == dst2[k][j][i]); + +} + +void test_transpose_layout(void) +{ + size_t elem_number[4] = { 5, 3, 2, 4 }; + size_t elem_number2[4] = { 3, 2, 4, 5 }; + size_t c_src_pitch[5] = { 8, 8 * 10, 8 * 10 * 6, 8 * 10 * 6 * 4, + 8 * 10 * 6 * 4 * 8 }; + size_t src_stride[4] = { 2, 2, 2, 2 }; + size_t c_dst_pitch[5] = { 8, 8 * 3, 8 * 3 * 2, 8 * 3 * 2 * 4, + 8 * 3 * 2 * 4 * 5 }; + size_t dst_stride[4] = { 1, 1, 1, 1 }; + + double src[8][4][6][10]; + double dst[5][4][2][3]; + double dst2[8][4][6][10]; + + double ref_dst[5][4][2][3]; + double ref_dst2[8][4][6][10]; + + struct aml_layout src_layout = { + (void *)src, + 4, + elem_number, + c_src_pitch, + src_stride + }; + struct aml_layout dst_layout = { + (void *)dst, + 4, + elem_number2, + c_dst_pitch, + dst_stride + }; + struct aml_layout dst2_layout = { + (void *)dst2, + 4, + elem_number, + c_src_pitch, + src_stride + }; + + for (int l = 0; l < 8; l++) + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) { + src[l][k][j][i] = + (double)(i + j * 10 + k * 10 * 6 + + l * 10 * 6 * 4); + ref_dst2[l][k][j][i] = 0.0; + dst2[l][k][j][i] = 0.0; + } + for (int l = 0; l < 4; l++) + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) { + dst[i][l][k][j] = 0.0; + ref_dst[i][l][k][j] = + src[2 * l][2 * k][2 * j][2 * i]; + ref_dst2[2 * l][2 * k][2 * j][2 * i] = + src[2 * l][2 * k][2 * j][2 * i]; + } + + aml_transpose_layout(&dst_layout, &src_layout); + for (int l = 0; l < 4; l++) + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst[i][l][k][j] == + dst[i][l][k][j]); + + aml_reverse_transpose_layout(&dst2_layout, &dst_layout); + for (int l = 0; l < 8; l++) + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) + assert(ref_dst2[l][k][j][i] == + dst2[l][k][j][i]); + +} + +int main(int argc, char *argv[]) +{ + test_copy_2d(); + test_copy_t2d(); + test_copy_3d(); + test_copy_3d_c(); + test_copy_3dstr(); + test_copy_3dstr_c(); + test_copy_t3d(); + test_copy_rt3d(); + test_copy_t4d(); + test_copy_t4d_c(); + test_copy_t4dstr(); + test_copy_t4dstr_c(); + test_copy_sh4d(); + test_copy_sh4d_c(); + test_copy_sh4dstr(); + test_copy_sh4dstr_c(); + test_copy_layout(); + test_transpose_layout(); + return 0; +} From a821c11432951d70e78e6084ac7cbdfe7d34125f Mon Sep 17 00:00:00 2001 From: Swann Perarnau Date: Fri, 7 Dec 2018 15:14:45 -0600 Subject: [PATCH 03/47] [wip/feature] add new layout design Missing implementation of the operators, but good enough to see where we're going. --- src/Makefile.am | 6 +- src/aml-layout.h | 75 ++++++++++++++++--- src/layout.c | 176 ++++++++++++++++++++++++++++++++++++++------- src/layout_dense.c | 77 ++++++++++++++++++++ 4 files changed, 297 insertions(+), 37 deletions(-) create mode 100644 src/layout_dense.c diff --git a/src/Makefile.am b/src/Makefile.am index 4d143709..f498a625 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -10,6 +10,9 @@ AREA_LINUX_CSOURCES = area_linux.c \ AREA_POSIX_CSOURCES = area_posix.c +LAYOUT_CSOURCES = layout.c \ + layout_dense.c + TILING_CSOURCES = tiling.c \ tiling_1d.c \ tiling_2d.c @@ -37,7 +40,8 @@ LIBCSOURCES = aml.c area.c arena.c \ $(BINDING_CSOURCES) \ $(DMA_CSOURCES) \ $(SCRATCH_CSOURCES) \ - layout.c copy.c + $(LAYOUT_CSOURCES) \ + copy.c LIBHSOURCES = aml.h aml-layout.h aml-copy.h diff --git a/src/aml-layout.h b/src/aml-layout.h index 4681329c..e88a8cec 100644 --- a/src/aml-layout.h +++ b/src/aml-layout.h @@ -6,6 +6,7 @@ ******************************************************************************/ struct aml_layout; +struct aml_layout_data; /******************************************************************************* * Generic layout, with support for sparsity and strides. @@ -22,7 +23,10 @@ struct aml_layout; * "stride": offset between elements of the same dimension. */ -struct aml_layout { +#define AML_TYPE_LAYOUT_COLUMN_ORDER 0 +#define AML_TYPE_LAYOUT_ROW_ORDER 1 + +struct aml_layout_data { void *ptr; size_t ndims; size_t *dims; @@ -30,25 +34,78 @@ struct aml_layout { size_t *stride; }; +struct aml_layout_ops { + void *(*deref)(const struct aml_layout_data *, va_list coords); + void *(*aderef)(const struct aml_layout_data *, const size_t *coords); + int (*order)(const struct aml_layout_data *); + int (*dims)(const struct aml_layout_data *, va_list dim_ptrs); + int (*adims)(const struct aml_layout_data *, const size_t *dims); +}; + +struct aml_layout { + uint64_t tags; + struct aml_layout_ops *ops; + struct aml_layout_data *data; +}; + #define AML_LAYOUT_ALLOCSIZE(ndims) (sizeof(struct aml_layout) +\ - ndims * 3 * sizeof(size_t)) + sizeof(struct aml_layout_data) +\ + ndims * 3 * sizeof(size_t)) #define AML_LAYOUT_DECL(name, ndims) \ size_t __ ##name## _inner_data[ndims * 3]; \ - struct aml_layout name = { \ + struct aml_layout_data __ ##name## _inner_struct = { \ NULL, \ ndims, \ __ ##name## _inner_data, \ __ ##name## _inner_data + ndims, \ __ ##name## _inner_data + 2 * ndims, \ + }; \ + struct aml_layout name = { \ + 0, \ + NULL, \ + & __ ##name## _inner_struct, \ }; int aml_layout_struct_init(struct aml_layout *l, size_t ndims, void *data); -int aml_layout_init(struct aml_layout *l, void *ptr, size_t ndims, - const size_t *dims, const size_t *pitch, - const size_t *stride); -int aml_layout_create(struct aml_layout **l, void *ptr, size_t ndims, - const size_t *dims, const size_t *pitch, - const size_t *stride); +int aml_layout_ainit(struct aml_layout *l, uint64_t tags, void *ptr, + const size_t element_size, size_t ndims, + const size_t *dims, const size_t *stride, + const size_t *pitch); +int aml_layout_vinit(struct aml_layout *l, uint64_t tags, void *ptr, + const size_t element_size, size_t ndims, va_list data); +int aml_layout_init(struct aml_layout *l, uint64_t tags, void *ptr, + const size_t element_size, size_t ndims, ...); +int aml_layout_acreate(struct aml_layout **l, uint64_t tags, void *ptr, + const size_t element_size, size_t ndims, + const size_t *dims, const size_t *stride, + const size_t *pitch); +int aml_layout_vcreate(struct aml_layout **l, uint64_t tags, void *ptr, + const size_t element_size, size_t ndims, va_list data); +int aml_layout_create(struct aml_layout **l, uint64_t tags, void *ptr, + const size_t element_size, size_t ndims, ...); + +void *aml_layout_deref(const struct aml_layout *l, ...); +int aml_layout_order(const struct aml_layout *l); +int aml_layout_dims(const struct aml_layout *l, ...); + +/******************************************************************************* + * Dense Layout Operators. + ******************************************************************************/ + +void *aml_layout_column_deref(const struct aml_layout_data *d, va_list coords); +void *aml_layout_column_aderef(const struct aml_layout_data *d, size_t *coords); +int aml_layout_column_order(const struct aml_layout_data *d); +int aml_layout_column_dims(const struct aml_layout_data *d, va_list dims); +int aml_layout_column_adims(const struct aml_layout_data *d, const size_t *dims); + +extern struct aml_layout_ops aml_layout_column_ops; + +void *aml_layout_row_deref(const struct aml_layout_data *d, va_list coords); +void *aml_layout_row_aderef(const struct aml_layout_data *d, size_t *coords); +int aml_layout_row_order(const struct aml_layout_data *d); +int aml_layout_row_dims(const struct aml_layout_data *d, va_list dims); +int aml_layout_row_adims(const struct aml_layout_data *d, const size_t *dims); +extern struct aml_layout_ops aml_layout_row_ops; #endif diff --git a/src/layout.c b/src/layout.c index f9d870d3..b8d3ebc7 100644 --- a/src/layout.c +++ b/src/layout.c @@ -1,42 +1,164 @@ #include -int aml_layout_struct_init(struct aml_layout *p, - size_t ndims, void *data_ptr) +/******************************************************************************* + * General API: common operators: + ******************************************************************************/ + +void *aml_layout_deref(const struct aml_layout *layout, ...) +{ + assert(layout != NULL); + assert(layout->ops != NULL); + va_list ap; + void *ret; + va_start(ap, layout); + ret = layout->ops->deref(layout->data, ap); + va_end(ap); + return ret; +} + +int aml_layout_order(const struct aml_layout *layout) { - p->ndims = ndims; - p->dims = (size_t *)data_ptr; - p->pitch = p->dims + ndims; - p->stride = p->pitch + ndims; + assert(layout != NULL); + assert(layout->ops != NULL); + return layout->ops->order(layout->data); +} + +int aml_layout_dims(const struct aml_layout *layout, ...) +{ + assert(layout != NULL); + assert(layout->ops != NULL); + va_list ap; + int ret; + va_start(ap, layout); + ret = layout->ops->dims(layout->data, ap); + va_end(ap); + return ret; +} + +/******************************************************************************* + * Layout initialization: + ******************************************************************************/ + +int aml_layout_struct_init(struct aml_layout *layout, + size_t ndims, void *memory) +{ + assert(layout == (struct aml_layout *)memory); + memory = (void *)((uintptr_t)memory + + sizeof(struct aml_layout)); + layout->data = memory; + memory = (void *)((uintptr_t)memory + + sizeof(struct aml_layout_data)); + layout->data->ndims = ndims; + layout->data->dims = (size_t *)memory; + layout->data->pitch = layout->data->dims + ndims; + layout->data->stride = layout->data->pitch + ndims; return 0; } -int aml_layout_init(struct aml_layout *p, void *ptr, - size_t ndims, const size_t *dims, - const size_t *pitch, - const size_t *stride) +int aml_layout_ainit(struct aml_layout *layout, uint64_t tags, void *ptr, + const size_t element_size, size_t ndims, + const size_t *dims, const size_t *stride, + const size_t *pitch) { - assert(p->ndims == ndims); - assert(p->dims); - assert(p->pitch); - assert(p->stride); - p->ptr = ptr; - memcpy(p->dims, dims, ndims * sizeof(size_t)); - memcpy(p->pitch, pitch, ndims * sizeof(size_t)); - memcpy(p->stride, stride, ndims * sizeof(size_t)); + assert(layout != NULL); + assert(layout->data != NULL); + struct aml_layout_data *data = layout->data; + assert(data->ndims == ndims); + assert(data->dims); + assert(data->pitch); + assert(data->stride); + data->ptr = ptr; + if(tags == AML_TYPE_LAYOUT_COLUMN_ORDER) + { + layout->tags = tags; + layout->ops = &aml_layout_column_ops; + for(size_t i = 0; i < ndims; i++) + { + data->dims[i] = dims[ndims-i-1]; + data->stride[i] = stride[ndims-i-1]; + } + data->pitch[0] = element_size; + for(size_t i = 1; i < ndims; i++) + data->pitch[i] = data->pitch[i-1]*pitch[ndims-i-1]; + } + else if(tags == AML_TYPE_LAYOUT_ROW_ORDER) + { + layout->tags = tags; + layout->ops = &aml_layout_row_ops; + memcpy(data->dims, dims, ndims * sizeof(size_t)); + /* pitches are only necessary for ndims-1 dimensions. Since we + * store element size as p->pitch[0], there's still ndims + * elements in the array. + */ + data->pitch[0] = element_size; + for(size_t i = 1; i < ndims; i++) + data->pitch[i] = data->pitch[i-1]*pitch[i-1]; + memcpy(data->stride, stride, ndims * sizeof(size_t)); + } return 0; } -int aml_layout_create(struct aml_layout **p, void *ptr, - size_t ndims, const size_t *dims, - const size_t *pitch, - const size_t *stride) +int aml_layout_vinit(struct aml_layout *p, uint64_t tags, void *ptr, + const size_t element_size, size_t ndims, va_list ap) +{ + size_t dims[ndims]; + size_t stride[ndims]; + size_t pitch[ndims-1]; + for(size_t i = 0; i < ndims; i++) + dims[i] = va_arg(ap, size_t); + for(size_t i = 0; i < ndims; i++) + stride[i] = va_arg(ap, size_t); + for(size_t i = 0; i < ndims-1; i++) + pitch[i] = va_arg(ap, size_t); + return aml_layout_ainit(p, tags, ptr, element_size, ndims, dims, stride, + pitch); +} + +int aml_layout_init(struct aml_layout *p, uint64_t tags, void *ptr, + const size_t element_size, size_t ndims, ...) +{ + int err; + va_list ap; + va_start(ap, ndims); + err = aml_layout_vinit(p, tags, ptr, element_size, ndims, ap); + va_end(ap); + return err; +} + +int aml_layout_acreate(struct aml_layout **layout, uint64_t tags, void *ptr, + const size_t element_size, + size_t ndims, const size_t *dims, const size_t *stride, + const size_t *pitch) { assert(ndims > 0); void *baseptr = calloc(1, AML_LAYOUT_ALLOCSIZE(ndims)); - *p = (struct aml_layout *)baseptr; - baseptr = (void *)((uintptr_t)baseptr + - sizeof(struct aml_layout)); - aml_layout_struct_init(*p, ndims, baseptr); - aml_layout_init(*p, ptr, ndims, dims, pitch, stride); + *layout = (struct aml_layout *)baseptr; + aml_layout_struct_init(*layout, ndims, baseptr); + aml_layout_init(*layout, tags, ptr, element_size, ndims, dims, stride, pitch); return 0; } + +int aml_layout_vcreate(struct aml_layout **layout, uint64_t tags, void *ptr, + const size_t element_size, size_t ndims, va_list ap) +{ + assert(ndims > 0); + void *baseptr = calloc(1, AML_LAYOUT_ALLOCSIZE(ndims)); + *layout = (struct aml_layout *)baseptr; + aml_layout_struct_init(*layout, ndims, baseptr); + return aml_layout_vinit(*layout, tags, ptr, element_size, ndims, ap); +} + +int aml_layout_create(struct aml_layout **layout, uint64_t tags, void *ptr, + const size_t element_size, size_t ndims, ...) +{ + int err; + va_list ap; + assert(ndims > 0); + void *baseptr = calloc(1, AML_LAYOUT_ALLOCSIZE(ndims)); + *layout = (struct aml_layout *)baseptr; + aml_layout_struct_init(*layout, ndims, baseptr); + va_start(ap, ndims); + err = aml_layout_vinit(*layout, tags, ptr, element_size, ndims, ap); + va_end(ap); + return err; +} diff --git a/src/layout_dense.c b/src/layout_dense.c new file mode 100644 index 00000000..abcaa5b3 --- /dev/null +++ b/src/layout_dense.c @@ -0,0 +1,77 @@ +#include + +/******************************************************************************* + * COLUMN OPERATORS: + ******************************************************************************/ + +void *aml_layout_column_deref(const struct aml_layout_data *d, va_list coords) +{ + return NULL; +} + +void *aml_layout_column_aderef(const struct aml_layout_data *d, size_t *coords) +{ + return NULL; +} + +int aml_layout_column_order(const struct aml_layout_data *d) +{ + return 0; +} + +int aml_layout_column_dims(const struct aml_layout_data *d, va_list dims) +{ + return 0; +} + +int aml_layout_column_adims(const struct aml_layout_data *d, const size_t *dims) +{ + return 0; +} + +struct aml_layout_ops aml_layout_column_ops = { + aml_layout_column_deref, + aml_layout_column_aderef, + aml_layout_column_order, + aml_layout_column_dims, + aml_layout_column_adims, +}; + + +/******************************************************************************* + * ROW OPERATORS: + ******************************************************************************/ + +void *aml_layout_row_deref(const struct aml_layout_data *d, va_list coords) +{ + return NULL; +} + +void *aml_layout_row_aderef(const struct aml_layout_data *d, size_t *coords) +{ + return NULL; +} + +int aml_layout_row_order(const struct aml_layout_data *d) +{ + return 0; +} + +int aml_layout_row_dims(const struct aml_layout_data *d, va_list dims) +{ + return 0; +} + +int aml_layout_row_adims(const struct aml_layout_data *d, const size_t *dims) +{ + return 0; +} + +struct aml_layout_ops aml_layout_row_ops = { + aml_layout_row_deref, + aml_layout_row_aderef, + aml_layout_row_order, + aml_layout_row_dims, + aml_layout_row_adims, +}; + From 5b9b77ee66ca3e38aabd64c63e268af196fb0257 Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Fri, 7 Dec 2018 21:30:32 -0600 Subject: [PATCH 04/47] Bugfix for layout. --- src/layout.c | 6 +++--- tests/layout.c | 26 +++++++++++++++++--------- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/src/layout.c b/src/layout.c index b8d3ebc7..eef22bbc 100644 --- a/src/layout.c +++ b/src/layout.c @@ -50,8 +50,8 @@ int aml_layout_struct_init(struct aml_layout *layout, sizeof(struct aml_layout_data)); layout->data->ndims = ndims; layout->data->dims = (size_t *)memory; - layout->data->pitch = layout->data->dims + ndims; - layout->data->stride = layout->data->pitch + ndims; + layout->data->stride = layout->data->dims + ndims; + layout->data->pitch = layout->data->stride + ndims; return 0; } @@ -134,7 +134,7 @@ int aml_layout_acreate(struct aml_layout **layout, uint64_t tags, void *ptr, void *baseptr = calloc(1, AML_LAYOUT_ALLOCSIZE(ndims)); *layout = (struct aml_layout *)baseptr; aml_layout_struct_init(*layout, ndims, baseptr); - aml_layout_init(*layout, tags, ptr, element_size, ndims, dims, stride, pitch); + aml_layout_ainit(*layout, tags, ptr, element_size, ndims, dims, stride, pitch); return 0; } diff --git a/tests/layout.c b/tests/layout.c index d0a729f6..cef525c7 100644 --- a/tests/layout.c +++ b/tests/layout.c @@ -9,23 +9,31 @@ int main(int argc, char *argv[]) /* padd the dims to the closest multiple of 2 */ float memory[4][4][8][12][16]; size_t dims[5] = {2,3,7,11,13}; - size_t pitch[5] = {4, 4*4, 4*4*4, 4*4*4*8, 4*4*4*8*12}; + size_t cpitch[5] = {4, 4*4, 4*4*4, 4*4*4*8, 4*4*4*8*12}; + size_t pitch[4] = {4, 4, 8, 12}; size_t stride[5] = {1,1,1,1,1}; /* library initialization */ aml_init(&argc, &argv); /* initialize the layouts */ - aml_layout_create(&a, (void *)memory, 5, dims, pitch, stride); - aml_layout_init(&b, (void *)memory, 5, dims, pitch, stride); + aml_layout_acreate(&a, AML_TYPE_LAYOUT_ROW_ORDER, (void *)memory, + sizeof(float), 5, dims, stride, pitch); + aml_layout_ainit(&b, AML_TYPE_LAYOUT_ROW_ORDER, (void *)memory, + sizeof(float), 5, dims, stride, pitch); + + assert( (intptr_t)(a->data->stride) - (intptr_t)(a->data->dims) + == 5*sizeof(size_t) ); + assert( (intptr_t)(a->data->pitch) - (intptr_t)(a->data->dims) + == 10*sizeof(size_t) ); /* some simple checks */ - assert(!memcmp(a->dims, dims, sizeof(size_t)*5)); - assert(!memcmp(a->pitch, pitch, sizeof(size_t)*5)); - assert(!memcmp(a->stride, stride, sizeof(size_t)*5)); - assert(!memcmp(b.dims, dims, sizeof(size_t)*5)); - assert(!memcmp(b.pitch, pitch, sizeof(size_t)*5)); - assert(!memcmp(b.stride, stride, sizeof(size_t)*5)); + assert(!memcmp(a->data->dims, dims, sizeof(size_t)*5)); + assert(!memcmp(a->data->pitch, cpitch, sizeof(size_t)*5)); + assert(!memcmp(a->data->stride, stride, sizeof(size_t)*5)); + assert(!memcmp(b.data->dims, dims, sizeof(size_t)*5)); + assert(!memcmp(b.data->pitch, cpitch, sizeof(size_t)*5)); + assert(!memcmp(b.data->stride, stride, sizeof(size_t)*5)); free(a); From 01ed44a4679ec4e5c4fe33a01cb658957714caba Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Fri, 7 Dec 2018 21:31:26 -0600 Subject: [PATCH 05/47] Adapted copy operators. For now transforms are expressed in row major mode... --- src/copy.c | 39 ++++++++++++++------------ tests/copy.c | 78 ++++++++++++++++++++-------------------------------- 2 files changed, 52 insertions(+), 65 deletions(-) diff --git a/src/copy.c b/src/copy.c index 04204c45..dbad3930 100644 --- a/src/copy.c +++ b/src/copy.c @@ -479,37 +479,42 @@ int aml_copy_rtndstr_c(size_t d, void *dst, const size_t *cumul_dst_pitch, int aml_copy_layout(struct aml_layout *dst, const struct aml_layout *src) { - size_t d = src->ndims; + struct aml_layout_data *ddst = dst->data; + struct aml_layout_data *dsrc = src->data; + size_t d = dsrc->ndims; assert(d > 0); - size_t elem_size = src->pitch[0]; - assert(d == dst->ndims); - assert(elem_size == dst->pitch[0]); + size_t elem_size = dsrc->pitch[0]; + assert(d == ddst->ndims); + assert(elem_size == ddst->pitch[0]); for (int i = 0; i < d; i++) - assert( src->dims[i] == dst->dims[i] ); - return aml_copy_ndstr_c(d, dst->ptr, dst->pitch, dst->stride, src->ptr, - src->pitch, src->stride, src->dims, elem_size); + assert( dsrc->dims[i] == ddst->dims[i] ); + return aml_copy_ndstr_c(d, ddst->ptr, ddst->pitch, ddst->stride, + dsrc->ptr, dsrc->pitch, dsrc->stride, + dsrc->dims, elem_size); } int aml_transform_layout(struct aml_layout *dst, const struct aml_layout *src, const size_t *target_dims) { - size_t d = src->ndims; + struct aml_layout_data *ddst = dst->data; + struct aml_layout_data *dsrc = src->data; + size_t d = dsrc->ndims; assert(d > 0); - size_t elem_size = src->pitch[0]; - assert(d == dst->ndims); - assert(elem_size == dst->pitch[0]); + size_t elem_size = dsrc->pitch[0]; + assert(d == ddst->ndims); + assert(elem_size == ddst->pitch[0]); for (int i = 0; i < d; i++) - assert( src->dims[i] == dst->dims[target_dims[i]]); - return aml_copy_shndstr_c(d, target_dims, dst->ptr, dst->pitch, - dst->stride, src->ptr, src->pitch, - src->stride, src->dims, elem_size); + assert( dsrc->dims[i] == ddst->dims[target_dims[i]]); + return aml_copy_shndstr_c(d, target_dims, ddst->ptr, ddst->pitch, + ddst->stride, dsrc->ptr, dsrc->pitch, + dsrc->stride, dsrc->dims, elem_size); } int aml_transpose_layout(struct aml_layout *dst, const struct aml_layout *src) { - size_t d = src->ndims; + size_t d = src->data->ndims; assert(d > 0); size_t *target_dims = (size_t *) alloca(d * sizeof(size_t)); @@ -522,7 +527,7 @@ int aml_transpose_layout(struct aml_layout *dst, const struct aml_layout *src) int aml_reverse_transpose_layout(struct aml_layout *dst, const struct aml_layout *src) { - size_t d = src->ndims; + size_t d = src->data->ndims; assert(d > 0); size_t *target_dims = (size_t *) alloca(d * sizeof(size_t)); diff --git a/tests/copy.c b/tests/copy.c index ffa12971..26f12be1 100644 --- a/tests/copy.c +++ b/tests/copy.c @@ -847,9 +847,9 @@ void test_copy_sh4dstr_c(void) void test_copy_layout(void) { size_t elem_number[3] = { 5, 3, 2 }; - size_t c_src_pitch[4] = { 8, 8 * 10, 8 * 10 * 6, 8 * 10 * 6 * 4 }; + size_t c_src_pitch[3] = { 10, 6, 4 }; size_t src_stride[3] = { 1, 1, 1}; - size_t c_dst_pitch[4] = { 8, 8 * 5, 8 * 5 * 3, 8 * 5 * 3 * 2 }; + size_t c_dst_pitch[3] = { 5, 3, 2 }; size_t dst_stride[3] = { 1, 1, 1}; double src[4][6][10]; @@ -859,27 +859,19 @@ void test_copy_layout(void) double ref_dst2[4][6][10]; double ref_dst[2][3][5]; - struct aml_layout src_layout = { - (void *)src, - 3, - elem_number, - c_src_pitch, - src_stride - }; - struct aml_layout dst_layout = { - (void *)dst, - 3, - elem_number, - c_dst_pitch, - dst_stride - }; - struct aml_layout dst2_layout = { - (void *)dst2, - 3, - elem_number, - c_src_pitch, - src_stride - }; + AML_LAYOUT_DECL(src_layout, 3); + AML_LAYOUT_DECL(dst_layout, 3); + AML_LAYOUT_DECL(dst2_layout, 3); + + aml_layout_ainit(&src_layout, AML_TYPE_LAYOUT_ROW_ORDER, + (void *)src, sizeof(double), 3, elem_number, + src_stride, c_src_pitch); + aml_layout_ainit(&dst_layout, AML_TYPE_LAYOUT_ROW_ORDER, + (void *)dst, sizeof(double), 3, elem_number, + dst_stride, c_dst_pitch); + aml_layout_ainit(&dst2_layout, AML_TYPE_LAYOUT_ROW_ORDER, + (void *)dst2, sizeof(double), 3, elem_number, + src_stride, c_src_pitch); for (int k = 0; k < 4; k++) for (int j = 0; j < 6; j++) @@ -915,11 +907,9 @@ void test_transpose_layout(void) { size_t elem_number[4] = { 5, 3, 2, 4 }; size_t elem_number2[4] = { 3, 2, 4, 5 }; - size_t c_src_pitch[5] = { 8, 8 * 10, 8 * 10 * 6, 8 * 10 * 6 * 4, - 8 * 10 * 6 * 4 * 8 }; + size_t c_src_pitch[4] = { 10, 6, 4, 8 }; size_t src_stride[4] = { 2, 2, 2, 2 }; - size_t c_dst_pitch[5] = { 8, 8 * 3, 8 * 3 * 2, 8 * 3 * 2 * 4, - 8 * 3 * 2 * 4 * 5 }; + size_t c_dst_pitch[4] = { 3, 2, 4, 5 }; size_t dst_stride[4] = { 1, 1, 1, 1 }; double src[8][4][6][10]; @@ -929,27 +919,19 @@ void test_transpose_layout(void) double ref_dst[5][4][2][3]; double ref_dst2[8][4][6][10]; - struct aml_layout src_layout = { - (void *)src, - 4, - elem_number, - c_src_pitch, - src_stride - }; - struct aml_layout dst_layout = { - (void *)dst, - 4, - elem_number2, - c_dst_pitch, - dst_stride - }; - struct aml_layout dst2_layout = { - (void *)dst2, - 4, - elem_number, - c_src_pitch, - src_stride - }; + AML_LAYOUT_DECL(src_layout, 4); + AML_LAYOUT_DECL(dst_layout, 4); + AML_LAYOUT_DECL(dst2_layout, 4); + + aml_layout_ainit(&src_layout, AML_TYPE_LAYOUT_ROW_ORDER, + (void *)src, sizeof(double), 4, elem_number, + src_stride, c_src_pitch); + aml_layout_ainit(&dst_layout, AML_TYPE_LAYOUT_ROW_ORDER, + (void *)dst, sizeof(double), 4, elem_number2, + dst_stride, c_dst_pitch); + aml_layout_ainit(&dst2_layout, AML_TYPE_LAYOUT_ROW_ORDER, + (void *)dst2, sizeof(double), 4, elem_number, + src_stride, c_src_pitch); for (int l = 0; l < 8; l++) for (int k = 0; k < 4; k++) From 163ddf93fc2d30e44397317a25762b4aa446e684 Mon Sep 17 00:00:00 2001 From: Swann Perarnau Date: Mon, 10 Dec 2018 14:02:44 -0600 Subject: [PATCH 06/47] [refactor] use a bitfield to add type information We might use an actual bitfield later on, but for now using a set of macro is fine. --- src/aml-layout.h | 16 ++++++++++++---- src/layout.c | 9 +++++---- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/src/aml-layout.h b/src/aml-layout.h index e88a8cec..e0a08c47 100644 --- a/src/aml-layout.h +++ b/src/aml-layout.h @@ -12,6 +12,18 @@ struct aml_layout_data; * Generic layout, with support for sparsity and strides. ******************************************************************************/ +/* Layout type tags. Defined as the bit offset to set to one. */ +#define AML_TYPE_LAYOUT_ORDER (1 << 0) +#define AML_TYPE_MAX (1 << 1) + +#define AML_TYPE_LAYOUT_ROW_ORDER 0 +#define AML_TYPE_LAYOUT_COLUMN_ORDER 1 + +#define AML_TYPE_GET(tags, bit) (tags & bit) +#define AML_TYPE_CLEAR(tags, bit) (tags & ~bit) +#define AML_TYPE_SET(tags, bit) (tags | bit) + + /* Layout: describes how a multi-dimensional data structure is collapsed into a * linear (and contiguous) virtual address range. * "ptr": base pointer of the address range @@ -22,10 +34,6 @@ struct aml_layout_data; * (pitch[0] is the element size in bytes). * "stride": offset between elements of the same dimension. */ - -#define AML_TYPE_LAYOUT_COLUMN_ORDER 0 -#define AML_TYPE_LAYOUT_ROW_ORDER 1 - struct aml_layout_data { void *ptr; size_t ndims; diff --git a/src/layout.c b/src/layout.c index eef22bbc..7854451f 100644 --- a/src/layout.c +++ b/src/layout.c @@ -68,9 +68,10 @@ int aml_layout_ainit(struct aml_layout *layout, uint64_t tags, void *ptr, assert(data->pitch); assert(data->stride); data->ptr = ptr; - if(tags == AML_TYPE_LAYOUT_COLUMN_ORDER) + int type = AML_TYPE_GET(tags, AML_TYPE_LAYOUT_ORDER); + if(type == AML_TYPE_LAYOUT_COLUMN_ORDER) { - layout->tags = tags; + AML_TYPE_SET(layout->tags, AML_TYPE_LAYOUT_COLUMN_ORDER); layout->ops = &aml_layout_column_ops; for(size_t i = 0; i < ndims; i++) { @@ -81,9 +82,9 @@ int aml_layout_ainit(struct aml_layout *layout, uint64_t tags, void *ptr, for(size_t i = 1; i < ndims; i++) data->pitch[i] = data->pitch[i-1]*pitch[ndims-i-1]; } - else if(tags == AML_TYPE_LAYOUT_ROW_ORDER) + else if(type == AML_TYPE_LAYOUT_ROW_ORDER) { - layout->tags = tags; + AML_TYPE_SET(layout->tags, AML_TYPE_LAYOUT_ROW_ORDER); layout->ops = &aml_layout_row_ops; memcpy(data->dims, dims, ndims * sizeof(size_t)); /* pitches are only necessary for ndims-1 dimensions. Since we From ab9d6617427f60030f1b9f974b2188a53dee12d0 Mon Sep 17 00:00:00 2001 From: Swann Perarnau Date: Mon, 10 Dec 2018 14:23:48 -0600 Subject: [PATCH 07/47] [refactor] better names for copy_layout functions Part of the copy namespace... --- src/aml-copy.h | 17 ++++++++++++----- src/copy.c | 12 ++++++------ tests/copy.c | 8 ++++---- 3 files changed, 22 insertions(+), 15 deletions(-) diff --git a/src/aml-copy.h b/src/aml-copy.h index ecf4c6ff..d7877350 100644 --- a/src/aml-copy.h +++ b/src/aml-copy.h @@ -167,11 +167,18 @@ int aml_copy_shndstr_c(size_t d, const size_t *target_dims, void *dst, const size_t *src_stride, const size_t *elem_number, const size_t elem_size); -int aml_copy_layout(struct aml_layout *dst, const struct aml_layout *src); -int aml_transform_layout(struct aml_layout *dst, const struct aml_layout *src, - const size_t *target_dims); -int aml_transpose_layout(struct aml_layout *dst, const struct aml_layout *src); -int aml_reverse_transpose_layout(struct aml_layout *dst, + /******************************************************************************* + * Generic building block API: Native version + * Native means using AML-internal layouts. + ******************************************************************************/ + +int aml_copy_layout_native(struct aml_layout *dst, + const struct aml_layout *src); +int aml_copy_layout_transform_native(struct aml_layout *dst, + const struct aml_layout *src, + const size_t *target_dims); +int aml_copy_layout_transpose_native(struct aml_layout *dst, const struct aml_layout *src); +int aml_copy_layout_reverse_transpose_native(struct aml_layout *dst, const struct aml_layout *src); #endif diff --git a/src/copy.c b/src/copy.c index dbad3930..cafbb120 100644 --- a/src/copy.c +++ b/src/copy.c @@ -477,7 +477,7 @@ int aml_copy_rtndstr_c(size_t d, void *dst, const size_t *cumul_dst_pitch, return 0; } -int aml_copy_layout(struct aml_layout *dst, const struct aml_layout *src) +int aml_copy_layout_native(struct aml_layout *dst, const struct aml_layout *src) { struct aml_layout_data *ddst = dst->data; struct aml_layout_data *dsrc = src->data; @@ -494,7 +494,7 @@ int aml_copy_layout(struct aml_layout *dst, const struct aml_layout *src) dsrc->dims, elem_size); } -int aml_transform_layout(struct aml_layout *dst, const struct aml_layout *src, +int aml_copy_layout_transform_native(struct aml_layout *dst, const struct aml_layout *src, const size_t *target_dims) { struct aml_layout_data *ddst = dst->data; @@ -512,7 +512,7 @@ int aml_transform_layout(struct aml_layout *dst, const struct aml_layout *src, dsrc->stride, dsrc->dims, elem_size); } -int aml_transpose_layout(struct aml_layout *dst, const struct aml_layout *src) +int aml_copy_layout_transpose_native(struct aml_layout *dst, const struct aml_layout *src) { size_t d = src->data->ndims; assert(d > 0); @@ -521,10 +521,10 @@ int aml_transpose_layout(struct aml_layout *dst, const struct aml_layout *src) target_dims[0] = d - 1; for (int i = 1; i < d; i++) target_dims[i] = i - 1; - return aml_transform_layout(dst, src, target_dims); + return aml_copy_layout_transform_native(dst, src, target_dims); } -int aml_reverse_transpose_layout(struct aml_layout *dst, +int aml_copy_layout_reverse_transpose_native(struct aml_layout *dst, const struct aml_layout *src) { size_t d = src->data->ndims; @@ -534,5 +534,5 @@ int aml_reverse_transpose_layout(struct aml_layout *dst, target_dims[d - 1] = 0; for (int i = 0; i < d - 1; i++) target_dims[i] = i + 1; - return aml_transform_layout(dst, src, target_dims); + return aml_copy_layout_transform_native(dst, src, target_dims); } diff --git a/tests/copy.c b/tests/copy.c index 26f12be1..43df708c 100644 --- a/tests/copy.c +++ b/tests/copy.c @@ -889,13 +889,13 @@ void test_copy_layout(void) ref_dst2[k][j][i] = src[k][j][i]; } - aml_copy_layout(&dst_layout, &src_layout); + aml_copy_layout_native(&dst_layout, &src_layout); for (int k = 0; k < 2; k++) for (int j = 0; j < 3; j++) for (int i = 0; i < 5; i++) assert(ref_dst[k][j][i] == dst[k][j][i]); - aml_copy_layout(&dst2_layout, &dst_layout); + aml_copy_layout_native(&dst2_layout, &dst_layout); for (int k = 0; k < 4; k++) for (int j = 0; j < 6; j++) for (int i = 0; i < 10; i++) @@ -954,7 +954,7 @@ void test_transpose_layout(void) src[2 * l][2 * k][2 * j][2 * i]; } - aml_transpose_layout(&dst_layout, &src_layout); + aml_copy_layout_transpose_native(&dst_layout, &src_layout); for (int l = 0; l < 4; l++) for (int k = 0; k < 2; k++) for (int j = 0; j < 3; j++) @@ -962,7 +962,7 @@ void test_transpose_layout(void) assert(ref_dst[i][l][k][j] == dst[i][l][k][j]); - aml_reverse_transpose_layout(&dst2_layout, &dst_layout); + aml_copy_layout_reverse_transpose_native(&dst2_layout, &dst_layout); for (int l = 0; l < 8; l++) for (int k = 0; k < 4; k++) for (int j = 0; j < 6; j++) From 55de883c70b08c6548388485738a36f3ab691b3d Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Mon, 10 Dec 2018 15:17:33 -0600 Subject: [PATCH 08/47] Change arguments of transform (transpose) to match those of Python and Ruby. --- src/copy.c | 150 ++++++++++++++++++++++++++------------------------- tests/copy.c | 16 +++--- 2 files changed, 86 insertions(+), 80 deletions(-) diff --git a/src/copy.c b/src/copy.c index cafbb120..bf6ab96b 100644 --- a/src/copy.c +++ b/src/copy.c @@ -183,14 +183,15 @@ static void aml_copy_sh2d_helper(const size_t *target_dims, void *dst, const size_t *elem_number, const size_t elem_size) { - for (int j = 0; j < elem_number[1]; j++) - for (int i = 0; i < elem_number[0]; i++) + for (int j = 0; j < elem_number[target_dims[1]]; j++) + for (int i = 0; i < elem_number[target_dims[0]]; i++) memcpy((void *)((uintptr_t) dst + - i * cumul_dst_pitch[target_dims[0]] + - j * cumul_dst_pitch[target_dims[1]]), + i * cumul_dst_pitch[0] + + j * cumul_dst_pitch[1]), (void *)((uintptr_t) src + - i * cumul_src_pitch[0] + - j * cumul_src_pitch[1]), elem_size); + i * cumul_src_pitch[target_dims[0]] + + j * cumul_src_pitch[target_dims[1]]), + elem_size); } static void aml_copy_shnd_helper(size_t d, const size_t *target_dims, @@ -201,26 +202,28 @@ static void aml_copy_shnd_helper(size_t d, const size_t *target_dims, const size_t elem_size) { if (d == 1) - for (int i = 0; i < elem_number[0]; i++) + for (int i = 0; i < elem_number[target_dims[0]]; i++) memcpy((void *)((uintptr_t) dst + - i * cumul_dst_pitch[target_dims[0]]), + i * cumul_dst_pitch[0]), (void *)((uintptr_t) src + - i * cumul_src_pitch[0]), elem_size); + i * cumul_src_pitch[target_dims[0]]), + elem_size); if (d == 2) aml_copy_sh2d_helper(target_dims, dst, cumul_dst_pitch, src, cumul_src_pitch, elem_number, elem_size); else { // process dimension d-1 - for (int i = 0; i < elem_number[d - 1]; i++) { + for (int i = 0; i < elem_number[target_dims[d - 1]]; i++) { aml_copy_shnd_helper(d - 1, target_dims, dst, cumul_dst_pitch, src, cumul_src_pitch, elem_number, elem_size); dst = (void *)((uintptr_t) dst + - cumul_dst_pitch[target_dims[d - 1]]); + cumul_dst_pitch[d - 1]); src = - (void *)((uintptr_t) src + cumul_src_pitch[d - 1]); + (void *)((uintptr_t) src + + cumul_src_pitch[target_dims[d - 1]]); } } } @@ -233,19 +236,19 @@ int aml_copy_shnd_c(size_t d, const size_t *target_dims, void *dst, assert(d > 0); size_t present_dims = 0; - for (int i = 0; i < d; i++) { + for (int i = 0; i < d - 1; i++) { assert(target_dims[i] < d); - if (target_dims[i] < d - 1) - assert(cumul_dst_pitch[target_dims[i] + 1] >= - cumul_dst_pitch[target_dims[i]] * - elem_number[i]); + assert(cumul_src_pitch[i + 1] >= + cumul_src_pitch[i] * elem_number[i]); present_dims |= 1 << target_dims[i]; } + assert(target_dims[d - 1] < d); + present_dims |= 1 << target_dims[d - 1]; for (int i = 0; i < d; i++) assert(present_dims & (1 << i)); for (int i = 0; i < d - 1; i++) - assert(cumul_src_pitch[i + 1] >= cumul_src_pitch[i] * - elem_number[i]); + assert(cumul_dst_pitch[i + 1] >= + cumul_dst_pitch[i] * elem_number[target_dims[i]]); aml_copy_shnd_helper(d, target_dims, dst, cumul_dst_pitch, src, cumul_src_pitch, elem_number, elem_size); return 0; @@ -274,9 +277,9 @@ int aml_copy_tnd(size_t d, void *dst, const size_t *dst_pitch, const void *src, assert(d > 0); size_t *target_dims = (size_t *) alloca(d * sizeof(size_t)); - target_dims[0] = d - 1; - for (int i = 1; i < d; i++) - target_dims[i] = i - 1; + target_dims[d - 1] = 0; + for (int i = 0; i < d - 1; i++) + target_dims[i] = i + 1; aml_copy_shnd(d, target_dims, dst, dst_pitch, src, src_pitch, elem_number, elem_size); return 0; @@ -289,9 +292,9 @@ int aml_copy_tnd_c(size_t d, void *dst, const size_t *cumul_dst_pitch, assert(d > 0); size_t *target_dims = (size_t *) alloca(d * sizeof(size_t)); - target_dims[0] = d - 1; - for (int i = 1; i < d; i++) - target_dims[i] = i - 1; + target_dims[d - 1] = 0; + for (int i = 0; i < d - 1; i++) + target_dims[i] = i + 1; aml_copy_shnd_c(d, target_dims, dst, cumul_dst_pitch, src, cumul_src_pitch, elem_number, elem_size); return 0; @@ -304,9 +307,9 @@ int aml_copy_rtnd(size_t d, void *dst, const size_t *dst_pitch, assert(d > 0); size_t *target_dims = (size_t *) alloca(d * sizeof(size_t)); - target_dims[d - 1] = 0; - for (int i = 0; i < d - 1; i++) - target_dims[i] = i + 1; + target_dims[0] = d - 1; + for (int i = 1; i < d; i++) + target_dims[i] = i - 1; aml_copy_shnd(d, target_dims, dst, dst_pitch, src, src_pitch, elem_number, elem_size); return 0; @@ -319,9 +322,9 @@ int aml_copy_rtnd_c(size_t d, void *dst, const size_t *cumul_dst_pitch, assert(d > 0); size_t *target_dims = (size_t *) alloca(d * sizeof(size_t)); - target_dims[d - 1] = 0; - for (int i = 0; i < d - 1; i++) - target_dims[i] = i + 1; + target_dims[0] = d - 1; + for (int i = 1; i < d; i++) + target_dims[i] = i - 1; aml_copy_shnd_c(d, target_dims, dst, cumul_dst_pitch, src, cumul_src_pitch, elem_number, elem_size); return 0; @@ -336,16 +339,17 @@ static void aml_copy_shndstr_helper(size_t d, const size_t *target_dims, const size_t elem_size) { if (d == 1) - for (int i = 0; i < elem_number[0]; i++) + for (int i = 0; i < elem_number[target_dims[0]]; i++) memcpy((void *)((uintptr_t) dst + - i * cumul_dst_pitch[target_dims[0]] * - dst_stride[target_dims[0]]), + i * cumul_dst_pitch[0] * + dst_stride[0]), (void *)((uintptr_t) src + - i * cumul_src_pitch[0] * src_stride[0]), + i * cumul_src_pitch[target_dims[0]] * + src_stride[target_dims[0]]), elem_size); else { // process dimension d-1 - for (int i = 0; i < elem_number[d - 1]; i++) { + for (int i = 0; i < elem_number[target_dims[d - 1]]; i++) { aml_copy_shndstr_helper(d - 1, target_dims, dst, cumul_dst_pitch, dst_stride, src, cumul_src_pitch, @@ -353,12 +357,12 @@ static void aml_copy_shndstr_helper(size_t d, const size_t *target_dims, elem_size); dst = (void *)((uintptr_t) dst + - cumul_dst_pitch[target_dims[d - 1]] * - dst_stride[target_dims[d - 1]]); + cumul_dst_pitch[d - 1] * + dst_stride[d - 1]); src = (void *)((uintptr_t) src + - cumul_src_pitch[d - 1] * src_stride[d - - 1]); + cumul_src_pitch[target_dims[d - 1]] * + src_stride[target_dims[d - 1]]); } } } @@ -372,21 +376,21 @@ int aml_copy_shndstr_c(size_t d, const size_t *target_dims, void *dst, assert(d > 0); size_t present_dims = 0; - for (int i = 0; i < d; i++) { + for (int i = 0; i < d - 1; i++) { assert(target_dims[i] < d); - if (target_dims[i] < d - 1) - assert(cumul_dst_pitch[target_dims[i] + 1] >= - cumul_dst_pitch[target_dims[i]] * - elem_number[i] * - dst_stride[target_dims[i]]); + assert(cumul_src_pitch[i + 1] >= + cumul_src_pitch[i] * elem_number[i] * + src_stride[i]); present_dims |= 1 << target_dims[i]; } + assert(target_dims[d - 1] < d); + present_dims |= 1 << target_dims[d - 1]; for (int i = 0; i < d; i++) assert(present_dims & (1 << i)); for (int i = 0; i < d - 1; i++) - assert(cumul_src_pitch[i + 1] >= cumul_src_pitch[i] * - elem_number[i] * - src_stride[i]); + assert(cumul_dst_pitch[i + 1] >= + cumul_dst_pitch[i] * elem_number[target_dims[i]] * + dst_stride[i]); aml_copy_shndstr_helper(d, target_dims, dst, cumul_dst_pitch, dst_stride, src, cumul_src_pitch, src_stride, elem_number, elem_size); @@ -419,9 +423,9 @@ int aml_copy_tndstr(size_t d, void *dst, const size_t *dst_pitch, assert(d > 0); size_t *target_dims = (size_t *) alloca(d * sizeof(size_t)); - target_dims[0] = d - 1; - for (int i = 1; i < d; i++) - target_dims[i] = i - 1; + target_dims[d - 1] = 0; + for (int i = 0; i < d - 1; i++) + target_dims[i] = i + 1; aml_copy_shndstr(d, target_dims, dst, dst_pitch, dst_stride, src, src_pitch, src_stride, elem_number, elem_size); return 0; @@ -435,9 +439,9 @@ int aml_copy_tndstr_c(size_t d, void *dst, const size_t *cumul_dst_pitch, assert(d > 0); size_t *target_dims = (size_t *) alloca(d * sizeof(size_t)); - target_dims[0] = d - 1; - for (int i = 1; i < d; i++) - target_dims[i] = i - 1; + target_dims[d - 1] = 0; + for (int i = 0; i < d - 1; i++) + target_dims[i] = i + 1; aml_copy_shndstr_c(d, target_dims, dst, cumul_dst_pitch, dst_stride, src, cumul_src_pitch, src_stride, elem_number, elem_size); @@ -452,9 +456,9 @@ int aml_copy_rtndstr(size_t d, void *dst, const size_t *dst_pitch, assert(d > 0); size_t *target_dims = (size_t *) alloca(d * sizeof(size_t)); - target_dims[d - 1] = 0; - for (int i = 0; i < d - 1; i++) - target_dims[i] = i + 1; + target_dims[0] = d - 1; + for (int i = 1; i < d; i++) + target_dims[i] = i - 1; aml_copy_shndstr(d, target_dims, dst, dst_pitch, dst_stride, src, src_pitch, src_stride, elem_number, elem_size); return 0; @@ -468,9 +472,9 @@ int aml_copy_rtndstr_c(size_t d, void *dst, const size_t *cumul_dst_pitch, assert(d > 0); size_t *target_dims = (size_t *) alloca(d * sizeof(size_t)); - target_dims[d - 1] = 0; - for (int i = 0; i < d - 1; i++) - target_dims[i] = i + 1; + target_dims[0] = d - 1; + for (int i = 1; i < d; i++) + target_dims[i] = i - 1; aml_copy_shndstr_c(d, target_dims, dst, cumul_dst_pitch, dst_stride, src, cumul_src_pitch, src_stride, elem_number, elem_size); @@ -494,8 +498,9 @@ int aml_copy_layout_native(struct aml_layout *dst, const struct aml_layout *src) dsrc->dims, elem_size); } -int aml_copy_layout_transform_native(struct aml_layout *dst, const struct aml_layout *src, - const size_t *target_dims) +int aml_copy_layout_transform_native(struct aml_layout *dst, + const struct aml_layout *src, + const size_t *target_dims) { struct aml_layout_data *ddst = dst->data; struct aml_layout_data *dsrc = src->data; @@ -506,33 +511,34 @@ int aml_copy_layout_transform_native(struct aml_layout *dst, const struct aml_la assert(d == ddst->ndims); assert(elem_size == ddst->pitch[0]); for (int i = 0; i < d; i++) - assert( dsrc->dims[i] == ddst->dims[target_dims[i]]); + assert( dsrc->dims[target_dims[i]] == ddst->dims[i]); return aml_copy_shndstr_c(d, target_dims, ddst->ptr, ddst->pitch, ddst->stride, dsrc->ptr, dsrc->pitch, dsrc->stride, dsrc->dims, elem_size); } -int aml_copy_layout_transpose_native(struct aml_layout *dst, const struct aml_layout *src) +int aml_copy_layout_transpose_native(struct aml_layout *dst, + const struct aml_layout *src) { size_t d = src->data->ndims; assert(d > 0); size_t *target_dims = (size_t *) alloca(d * sizeof(size_t)); - target_dims[0] = d - 1; - for (int i = 1; i < d; i++) - target_dims[i] = i - 1; + target_dims[d - 1] = 0; + for (int i = 0; i < d - 1; i++) + target_dims[i] = i + 1; return aml_copy_layout_transform_native(dst, src, target_dims); } int aml_copy_layout_reverse_transpose_native(struct aml_layout *dst, - const struct aml_layout *src) + const struct aml_layout *src) { size_t d = src->data->ndims; assert(d > 0); size_t *target_dims = (size_t *) alloca(d * sizeof(size_t)); - target_dims[d - 1] = 0; - for (int i = 0; i < d - 1; i++) - target_dims[i] = i + 1; + target_dims[0] = d - 1; + for (int i = 1; i < d; i++) + target_dims[i] = i - 1; return aml_copy_layout_transform_native(dst, src, target_dims); } diff --git a/tests/copy.c b/tests/copy.c index 43df708c..97beb3ee 100644 --- a/tests/copy.c +++ b/tests/copy.c @@ -610,8 +610,8 @@ void test_copy_sh4d(void) { size_t elem_number[4] = { 5, 3, 2, 4 }; size_t elem_number2[4] = { 2, 3, 4, 5 }; - size_t target_dims[4] = { 3, 1, 0, 2 }; - size_t target_dims2[4] = { 2, 1, 3, 0 }; + size_t target_dims[4] = { 2, 1, 3, 0 }; + size_t target_dims2[4] = { 3, 1, 0, 2 }; size_t src_pitch[4] = { 10, 6, 4, 8 }; size_t dst_pitch[4] = { 2, 3, 4, 5 }; @@ -666,8 +666,8 @@ void test_copy_sh4d_c(void) { size_t elem_number[4] = { 5, 3, 2, 4 }; size_t elem_number2[4] = { 2, 3, 4, 5 }; - size_t target_dims[4] = { 3, 1, 0, 2 }; - size_t target_dims2[4] = { 2, 1, 3, 0 }; + size_t target_dims[4] = { 2, 1, 3, 0 }; + size_t target_dims2[4] = { 3, 1, 0, 2 }; size_t c_src_pitch[5] = { 8, 8 * 10, 8 * 10 * 6, 8 * 10 * 6 * 4, 8 * 10 * 6 * 4 * 8 }; size_t c_dst_pitch[5] = { 8, 8 * 2, 8 * 2 * 3, 8 * 2 * 3 * 4, @@ -724,8 +724,8 @@ void test_copy_sh4dstr(void) { size_t elem_number[4] = { 5, 3, 2, 4 }; size_t elem_number2[4] = { 2, 3, 4, 5 }; - size_t target_dims[4] = { 3, 1, 0, 2 }; - size_t target_dims2[4] = { 2, 1, 3, 0 }; + size_t target_dims[4] = { 2, 1, 3, 0 }; + size_t target_dims2[4] = { 3, 1, 0, 2 }; size_t src_pitch[4] = { 10, 6, 4, 8 }; size_t src_stride[4] = { 2, 2, 2, 2 }; size_t dst_pitch[4] = { 2, 3, 4, 5 }; @@ -784,8 +784,8 @@ void test_copy_sh4dstr_c(void) { size_t elem_number[4] = { 5, 3, 2, 4 }; size_t elem_number2[4] = { 2, 3, 4, 5 }; - size_t target_dims[4] = { 3, 1, 0, 2 }; - size_t target_dims2[4] = { 2, 1, 3, 0 }; + size_t target_dims[4] = { 2, 1, 3, 0 }; + size_t target_dims2[4] = { 3, 1, 0, 2 }; size_t c_src_pitch[5] = { 8, 8 * 10, 8 * 10 * 6, 8 * 10 * 6 * 4, 8 * 10 * 6 * 4 * 8 }; size_t src_stride[4] = { 2, 2, 2, 2 }; From bc476470f3e819d47b74a4586cda907a47f2e81d Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Mon, 10 Dec 2018 16:02:00 -0600 Subject: [PATCH 09/47] Swapped column and row to match the classical notations. --- src/layout.c | 8 ++++---- tests/copy.c | 12 ++++++------ tests/layout.c | 4 ++-- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/layout.c b/src/layout.c index 7854451f..22931837 100644 --- a/src/layout.c +++ b/src/layout.c @@ -69,9 +69,9 @@ int aml_layout_ainit(struct aml_layout *layout, uint64_t tags, void *ptr, assert(data->stride); data->ptr = ptr; int type = AML_TYPE_GET(tags, AML_TYPE_LAYOUT_ORDER); - if(type == AML_TYPE_LAYOUT_COLUMN_ORDER) + if(type == AML_TYPE_LAYOUT_ROW_ORDER) { - AML_TYPE_SET(layout->tags, AML_TYPE_LAYOUT_COLUMN_ORDER); + AML_TYPE_SET(layout->tags, AML_TYPE_LAYOUT_ROW_ORDER); layout->ops = &aml_layout_column_ops; for(size_t i = 0; i < ndims; i++) { @@ -82,9 +82,9 @@ int aml_layout_ainit(struct aml_layout *layout, uint64_t tags, void *ptr, for(size_t i = 1; i < ndims; i++) data->pitch[i] = data->pitch[i-1]*pitch[ndims-i-1]; } - else if(type == AML_TYPE_LAYOUT_ROW_ORDER) + else if(type == AML_TYPE_LAYOUT_COLUMN_ORDER) { - AML_TYPE_SET(layout->tags, AML_TYPE_LAYOUT_ROW_ORDER); + AML_TYPE_SET(layout->tags, AML_TYPE_LAYOUT_COLUMN_ORDER); layout->ops = &aml_layout_row_ops; memcpy(data->dims, dims, ndims * sizeof(size_t)); /* pitches are only necessary for ndims-1 dimensions. Since we diff --git a/tests/copy.c b/tests/copy.c index 97beb3ee..c85b6e39 100644 --- a/tests/copy.c +++ b/tests/copy.c @@ -863,13 +863,13 @@ void test_copy_layout(void) AML_LAYOUT_DECL(dst_layout, 3); AML_LAYOUT_DECL(dst2_layout, 3); - aml_layout_ainit(&src_layout, AML_TYPE_LAYOUT_ROW_ORDER, + aml_layout_ainit(&src_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, (void *)src, sizeof(double), 3, elem_number, src_stride, c_src_pitch); - aml_layout_ainit(&dst_layout, AML_TYPE_LAYOUT_ROW_ORDER, + aml_layout_ainit(&dst_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, (void *)dst, sizeof(double), 3, elem_number, dst_stride, c_dst_pitch); - aml_layout_ainit(&dst2_layout, AML_TYPE_LAYOUT_ROW_ORDER, + aml_layout_ainit(&dst2_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, (void *)dst2, sizeof(double), 3, elem_number, src_stride, c_src_pitch); @@ -923,13 +923,13 @@ void test_transpose_layout(void) AML_LAYOUT_DECL(dst_layout, 4); AML_LAYOUT_DECL(dst2_layout, 4); - aml_layout_ainit(&src_layout, AML_TYPE_LAYOUT_ROW_ORDER, + aml_layout_ainit(&src_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, (void *)src, sizeof(double), 4, elem_number, src_stride, c_src_pitch); - aml_layout_ainit(&dst_layout, AML_TYPE_LAYOUT_ROW_ORDER, + aml_layout_ainit(&dst_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, (void *)dst, sizeof(double), 4, elem_number2, dst_stride, c_dst_pitch); - aml_layout_ainit(&dst2_layout, AML_TYPE_LAYOUT_ROW_ORDER, + aml_layout_ainit(&dst2_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, (void *)dst2, sizeof(double), 4, elem_number, src_stride, c_src_pitch); diff --git a/tests/layout.c b/tests/layout.c index cef525c7..bab44d71 100644 --- a/tests/layout.c +++ b/tests/layout.c @@ -17,9 +17,9 @@ int main(int argc, char *argv[]) aml_init(&argc, &argv); /* initialize the layouts */ - aml_layout_acreate(&a, AML_TYPE_LAYOUT_ROW_ORDER, (void *)memory, + aml_layout_acreate(&a, AML_TYPE_LAYOUT_COLUMN_ORDER, (void *)memory, sizeof(float), 5, dims, stride, pitch); - aml_layout_ainit(&b, AML_TYPE_LAYOUT_ROW_ORDER, (void *)memory, + aml_layout_ainit(&b, AML_TYPE_LAYOUT_COLUMN_ORDER, (void *)memory, sizeof(float), 5, dims, stride, pitch); assert( (intptr_t)(a->data->stride) - (intptr_t)(a->data->dims) From 9c05ce63968e95a159b16269c276248c31cccda4 Mon Sep 17 00:00:00 2001 From: Swann Perarnau Date: Thu, 13 Dec 2018 10:01:53 -0600 Subject: [PATCH 10/47] [feature] implement layout dense internals --- src/aml-layout.h | 6 ++-- src/layout_dense.c | 77 +++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 72 insertions(+), 11 deletions(-) diff --git a/src/aml-layout.h b/src/aml-layout.h index e0a08c47..e01c4b94 100644 --- a/src/aml-layout.h +++ b/src/aml-layout.h @@ -47,7 +47,7 @@ struct aml_layout_ops { void *(*aderef)(const struct aml_layout_data *, const size_t *coords); int (*order)(const struct aml_layout_data *); int (*dims)(const struct aml_layout_data *, va_list dim_ptrs); - int (*adims)(const struct aml_layout_data *, const size_t *dims); + int (*adims)(const struct aml_layout_data *, size_t *dims); }; struct aml_layout { @@ -105,7 +105,7 @@ void *aml_layout_column_deref(const struct aml_layout_data *d, va_list coords); void *aml_layout_column_aderef(const struct aml_layout_data *d, size_t *coords); int aml_layout_column_order(const struct aml_layout_data *d); int aml_layout_column_dims(const struct aml_layout_data *d, va_list dims); -int aml_layout_column_adims(const struct aml_layout_data *d, const size_t *dims); +int aml_layout_column_adims(const struct aml_layout_data *d, size_t *dims); extern struct aml_layout_ops aml_layout_column_ops; @@ -113,7 +113,7 @@ void *aml_layout_row_deref(const struct aml_layout_data *d, va_list coords); void *aml_layout_row_aderef(const struct aml_layout_data *d, size_t *coords); int aml_layout_row_order(const struct aml_layout_data *d); int aml_layout_row_dims(const struct aml_layout_data *d, va_list dims); -int aml_layout_row_adims(const struct aml_layout_data *d, const size_t *dims); +int aml_layout_row_adims(const struct aml_layout_data *d, size_t *dims); extern struct aml_layout_ops aml_layout_row_ops; #endif diff --git a/src/layout_dense.c b/src/layout_dense.c index abcaa5b3..f4e0a0de 100644 --- a/src/layout_dense.c +++ b/src/layout_dense.c @@ -6,26 +6,55 @@ void *aml_layout_column_deref(const struct aml_layout_data *d, va_list coords) { - return NULL; + void *ptr; + assert(d != NULL); + assert(d->ptr != NULL); + ptr = d->ptr; + for(size_t i = 0; i < d->ndims; i++) + { + size_t c = va_arg(coords, size_t); + assert(c < d->dims[i]); + ptr += c*d->pitch[i]*d->stride[i]; + } + return ptr; } void *aml_layout_column_aderef(const struct aml_layout_data *d, size_t *coords) { - return NULL; + void *ptr; + assert(d != NULL); + assert(d->ptr != NULL); + ptr = d->ptr; + for(size_t i = 0; i < d->ndims; i++) + { + assert(coords[i] < d->dims[i]); + ptr += coords[i]*d->pitch[i]*d->stride[i]; + } + return ptr; } int aml_layout_column_order(const struct aml_layout_data *d) { - return 0; + return AML_TYPE_LAYOUT_COLUMN_ORDER; } int aml_layout_column_dims(const struct aml_layout_data *d, va_list dims) { + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) + { + size_t *dim = va_arg(dims, size_t*); + assert(dim != NULL); + *dim = d->dims[i]; + } return 0; } -int aml_layout_column_adims(const struct aml_layout_data *d, const size_t *dims) +int aml_layout_column_adims(const struct aml_layout_data *d, size_t *dims) { + assert(d != NULL); + assert(dims != NULL); + memcpy((void*)dims, (void*)d->dims, sizeof(size_t)*d->ndims); return 0; } @@ -44,26 +73,58 @@ struct aml_layout_ops aml_layout_column_ops = { void *aml_layout_row_deref(const struct aml_layout_data *d, va_list coords) { - return NULL; + void *ptr; + assert(d != NULL); + assert(d->ptr != NULL); + ptr = d->ptr; + for(size_t i = 0; i < d->ndims; i++) + { + size_t c = va_arg(coords, size_t); + assert(c < d->dims[d->ndims - i - 1]); + ptr += c*d->pitch[d->ndims - i - 1]*d->stride[d->ndims - i - 1]; + } + return ptr; } void *aml_layout_row_aderef(const struct aml_layout_data *d, size_t *coords) { - return NULL; + void *ptr; + assert(d != NULL); + assert(d->ptr != NULL); + ptr = d->ptr; + for(size_t i = 0; i < d->ndims; i++) + { + size_t c = coords[i]; + assert(c < d->dims[d->ndims - i - 1]); + ptr += c*d->pitch[d->ndims - i - 1]*d->stride[d->ndims - i - 1]; + } + return ptr; } int aml_layout_row_order(const struct aml_layout_data *d) { - return 0; + return AML_TYPE_LAYOUT_ROW_ORDER; } int aml_layout_row_dims(const struct aml_layout_data *d, va_list dims) { + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) + { + size_t *dim = va_arg(dims, size_t*); + assert(dim != NULL); + *dim = d->dims[d->ndims - i - 1]; + } return 0; } -int aml_layout_row_adims(const struct aml_layout_data *d, const size_t *dims) +int aml_layout_row_adims(const struct aml_layout_data *d, size_t *dims) { + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) + { + dims[i] = d->dims[d->ndims - i - 1]; + } return 0; } From 9238feb748f17379df5e2d40310be16dc9c4741e Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Thu, 13 Dec 2018 11:15:07 -0600 Subject: [PATCH 11/47] Added tests for column and row major layouts. --- src/aml-layout.h | 8 +++- src/layout.c | 24 +++++++++--- src/layout_dense.c | 4 +- tests/layout.c | 92 ++++++++++++++++++++++++++++++++++++++++++---- 4 files changed, 111 insertions(+), 17 deletions(-) diff --git a/src/aml-layout.h b/src/aml-layout.h index e01c4b94..0ffcf252 100644 --- a/src/aml-layout.h +++ b/src/aml-layout.h @@ -94,15 +94,18 @@ int aml_layout_create(struct aml_layout **l, uint64_t tags, void *ptr, const size_t element_size, size_t ndims, ...); void *aml_layout_deref(const struct aml_layout *l, ...); +void *aml_layout_aderef(const struct aml_layout *l, const size_t *coords); int aml_layout_order(const struct aml_layout *l); int aml_layout_dims(const struct aml_layout *l, ...); +int aml_layout_adims(const struct aml_layout *l, size_t *dims); /******************************************************************************* * Dense Layout Operators. ******************************************************************************/ void *aml_layout_column_deref(const struct aml_layout_data *d, va_list coords); -void *aml_layout_column_aderef(const struct aml_layout_data *d, size_t *coords); +void *aml_layout_column_aderef(const struct aml_layout_data *d, + const size_t *coords); int aml_layout_column_order(const struct aml_layout_data *d); int aml_layout_column_dims(const struct aml_layout_data *d, va_list dims); int aml_layout_column_adims(const struct aml_layout_data *d, size_t *dims); @@ -110,7 +113,8 @@ int aml_layout_column_adims(const struct aml_layout_data *d, size_t *dims); extern struct aml_layout_ops aml_layout_column_ops; void *aml_layout_row_deref(const struct aml_layout_data *d, va_list coords); -void *aml_layout_row_aderef(const struct aml_layout_data *d, size_t *coords); +void *aml_layout_row_aderef(const struct aml_layout_data *d, + const size_t *coords); int aml_layout_row_order(const struct aml_layout_data *d); int aml_layout_row_dims(const struct aml_layout_data *d, va_list dims); int aml_layout_row_adims(const struct aml_layout_data *d, size_t *dims); diff --git a/src/layout.c b/src/layout.c index 22931837..174881e5 100644 --- a/src/layout.c +++ b/src/layout.c @@ -16,6 +16,13 @@ void *aml_layout_deref(const struct aml_layout *layout, ...) return ret; } +void *aml_layout_aderef(const struct aml_layout *layout, const size_t *coords) +{ + assert(layout != NULL); + assert(layout->ops != NULL); + return layout->ops->aderef(layout->data, coords); +} + int aml_layout_order(const struct aml_layout *layout) { assert(layout != NULL); @@ -35,6 +42,13 @@ int aml_layout_dims(const struct aml_layout *layout, ...) return ret; } +int aml_layout_adims(const struct aml_layout *layout, size_t *dims) +{ + assert(layout != NULL); + assert(layout->ops != NULL); + return layout->ops->adims(layout->data, dims); +} + /******************************************************************************* * Layout initialization: ******************************************************************************/ @@ -65,14 +79,14 @@ int aml_layout_ainit(struct aml_layout *layout, uint64_t tags, void *ptr, struct aml_layout_data *data = layout->data; assert(data->ndims == ndims); assert(data->dims); - assert(data->pitch); assert(data->stride); + assert(data->pitch); data->ptr = ptr; int type = AML_TYPE_GET(tags, AML_TYPE_LAYOUT_ORDER); if(type == AML_TYPE_LAYOUT_ROW_ORDER) { AML_TYPE_SET(layout->tags, AML_TYPE_LAYOUT_ROW_ORDER); - layout->ops = &aml_layout_column_ops; + layout->ops = &aml_layout_row_ops; for(size_t i = 0; i < ndims; i++) { data->dims[i] = dims[ndims-i-1]; @@ -80,12 +94,12 @@ int aml_layout_ainit(struct aml_layout *layout, uint64_t tags, void *ptr, } data->pitch[0] = element_size; for(size_t i = 1; i < ndims; i++) - data->pitch[i] = data->pitch[i-1]*pitch[ndims-i-1]; + data->pitch[i] = data->pitch[i-1]*pitch[ndims-i]; } else if(type == AML_TYPE_LAYOUT_COLUMN_ORDER) { AML_TYPE_SET(layout->tags, AML_TYPE_LAYOUT_COLUMN_ORDER); - layout->ops = &aml_layout_row_ops; + layout->ops = &aml_layout_column_ops; memcpy(data->dims, dims, ndims * sizeof(size_t)); /* pitches are only necessary for ndims-1 dimensions. Since we * store element size as p->pitch[0], there's still ndims @@ -109,7 +123,7 @@ int aml_layout_vinit(struct aml_layout *p, uint64_t tags, void *ptr, dims[i] = va_arg(ap, size_t); for(size_t i = 0; i < ndims; i++) stride[i] = va_arg(ap, size_t); - for(size_t i = 0; i < ndims-1; i++) + for(size_t i = 0; i < ndims; i++) pitch[i] = va_arg(ap, size_t); return aml_layout_ainit(p, tags, ptr, element_size, ndims, dims, stride, pitch); diff --git a/src/layout_dense.c b/src/layout_dense.c index f4e0a0de..1c8a4c7a 100644 --- a/src/layout_dense.c +++ b/src/layout_dense.c @@ -19,7 +19,7 @@ void *aml_layout_column_deref(const struct aml_layout_data *d, va_list coords) return ptr; } -void *aml_layout_column_aderef(const struct aml_layout_data *d, size_t *coords) +void *aml_layout_column_aderef(const struct aml_layout_data *d, const size_t *coords) { void *ptr; assert(d != NULL); @@ -86,7 +86,7 @@ void *aml_layout_row_deref(const struct aml_layout_data *d, va_list coords) return ptr; } -void *aml_layout_row_aderef(const struct aml_layout_data *d, size_t *coords) +void *aml_layout_row_aderef(const struct aml_layout_data *d, const size_t *coords) { void *ptr; assert(d != NULL); diff --git a/tests/layout.c b/tests/layout.c index bab44d71..9f57c4da 100644 --- a/tests/layout.c +++ b/tests/layout.c @@ -7,20 +7,31 @@ int main(int argc, char *argv[]) AML_LAYOUT_DECL(b, 5); /* padd the dims to the closest multiple of 2 */ - float memory[4][4][8][12][16]; - size_t dims[5] = {2,3,7,11,13}; - size_t cpitch[5] = {4, 4*4, 4*4*4, 4*4*4*8, 4*4*4*8*12}; - size_t pitch[4] = {4, 4, 8, 12}; - size_t stride[5] = {1,1,1,1,1}; + float memory[16][12][8][8][4]; + size_t cpitch[5] = {4, 4*4, 4*4*8, 4*4*8*8, 4*4*8*8*12}; + size_t dims[5] = {2, 3, 7, 11, 13}; + size_t stride[5] = {1, 2, 1, 1, 1}; + + size_t dims_col[5] = {2, 3, 7, 11, 13}; + size_t dims_row[5] = {13, 11, 7, 3, 2}; + + size_t pitch_col[5] = {4, 8, 8, 12, 16}; + size_t pitch_row[5] = {16, 12, 8, 8, 4}; + + size_t stride_col[5] = {1, 2, 1, 1, 1}; + size_t stride_row[5] = {1, 1, 1, 2, 1}; + + for(size_t i = 0; i < 4*8*8*12*16; i++) + ((float*)(&memory[0][0][0][0][0]))[i] = (float)i; /* library initialization */ aml_init(&argc, &argv); - /* initialize the layouts */ + /* initialize column order layouts */ aml_layout_acreate(&a, AML_TYPE_LAYOUT_COLUMN_ORDER, (void *)memory, - sizeof(float), 5, dims, stride, pitch); + sizeof(float), 5, dims_col, stride_col, pitch_col); aml_layout_ainit(&b, AML_TYPE_LAYOUT_COLUMN_ORDER, (void *)memory, - sizeof(float), 5, dims, stride, pitch); + sizeof(float), 5, dims_col, stride_col, pitch_col); assert( (intptr_t)(a->data->stride) - (intptr_t)(a->data->dims) == 5*sizeof(size_t) ); @@ -35,6 +46,71 @@ int main(int argc, char *argv[]) assert(!memcmp(b.data->pitch, cpitch, sizeof(size_t)*5)); assert(!memcmp(b.data->stride, stride, sizeof(size_t)*5)); + /* test column major subroutines */ + size_t dims_res[5]; + size_t coords_test_col[5] = { 1, 2, 3, 4, 5 }; + void *test_addr; + void *res_addr = (void *)&memory[5][4][3][2*2][1]; + + aml_layout_adims(a, dims_res); + assert(!memcmp(dims_res, dims_col, sizeof(size_t)*5)); + aml_layout_dims(a, dims_res, + dims_res + 1, + dims_res + 2, + dims_res + 3, + dims_res + 4); + assert(!memcmp(dims_res, dims_col, sizeof(size_t)*5)); + test_addr = aml_layout_aderef(a, coords_test_col); + assert(res_addr == test_addr); + test_addr = aml_layout_deref(a, coords_test_col[0], + coords_test_col[1], + coords_test_col[2], + coords_test_col[3], + coords_test_col[4]); + assert(res_addr == test_addr); + assert(AML_TYPE_LAYOUT_COLUMN_ORDER == aml_layout_order(a)); + + free(a); + + /* initialize row order layouts */ + aml_layout_acreate(&a, AML_TYPE_LAYOUT_ROW_ORDER, (void *)memory, + sizeof(float), 5, dims_row, stride_row, pitch_row); + aml_layout_ainit(&b, AML_TYPE_LAYOUT_ROW_ORDER, (void *)memory, + sizeof(float), 5, dims_row, stride_row, pitch_row); + + assert( (intptr_t)(a->data->stride) - (intptr_t)(a->data->dims) + == 5*sizeof(size_t) ); + assert( (intptr_t)(a->data->pitch) - (intptr_t)(a->data->dims) + == 10*sizeof(size_t) ); + + /* some simple checks */ + assert(!memcmp(a->data->dims, dims, sizeof(size_t)*5)); + assert(!memcmp(a->data->pitch, cpitch, sizeof(size_t)*5)); + assert(!memcmp(a->data->stride, stride, sizeof(size_t)*5)); + assert(!memcmp(b.data->dims, dims, sizeof(size_t)*5)); + assert(!memcmp(b.data->pitch, cpitch, sizeof(size_t)*5)); + assert(!memcmp(b.data->stride, stride, sizeof(size_t)*5)); + + /* test column major subroutines */ + size_t coords_test_row[5] = { 5, 4, 3, 2, 1 }; + aml_layout_adims(a, dims_res); + assert(!memcmp(dims_res, dims_row, sizeof(size_t)*5)); + aml_layout_dims(a, dims_res, + dims_res + 1, + dims_res + 2, + dims_res + 3, + dims_res + 4); + assert(!memcmp(dims_res, dims_row, sizeof(size_t)*5)); + test_addr = aml_layout_aderef(a, coords_test_row); + assert(res_addr == test_addr); + test_addr = aml_layout_deref(a, coords_test_row[0], + coords_test_row[1], + coords_test_row[2], + coords_test_row[3], + coords_test_row[4]); + assert(res_addr == test_addr); + assert(AML_TYPE_LAYOUT_ROW_ORDER == aml_layout_order(a)); + free(a); aml_finalize(); From b3596fb524200ab3cc4dea74857b4fd7b51d6fdd Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Thu, 13 Dec 2018 14:45:36 -0600 Subject: [PATCH 12/47] Store the pittch given by the user. --- src/aml-layout.h | 14 +++++++++----- src/copy.c | 16 ++++++++-------- src/layout.c | 16 ++++++++++------ src/layout_dense.c | 8 ++++---- tests/layout.c | 17 +++++++++++++---- 5 files changed, 44 insertions(+), 27 deletions(-) diff --git a/src/aml-layout.h b/src/aml-layout.h index 0ffcf252..7f1f4053 100644 --- a/src/aml-layout.h +++ b/src/aml-layout.h @@ -30,16 +30,19 @@ struct aml_layout_data; * "ndims": number of dimensions * "dims": dimensions, in element size, of the data structure, by order of * appearance in memory. - * "pitch": cumulative distances between two elements in the same dimension - * (pitch[0] is the element size in bytes). * "stride": offset between elements of the same dimension. + * "pitch": distances between two elements of the next dimension (or total + dimension of the layout in this dimension). + * "cpitch": cumulative distances between two elements in the same dimension + * (pitch[0] is the element size in bytes). */ struct aml_layout_data { void *ptr; size_t ndims; size_t *dims; - size_t *pitch; size_t *stride; + size_t *pitch; + size_t *cpitch; }; struct aml_layout_ops { @@ -58,16 +61,17 @@ struct aml_layout { #define AML_LAYOUT_ALLOCSIZE(ndims) (sizeof(struct aml_layout) +\ sizeof(struct aml_layout_data) +\ - ndims * 3 * sizeof(size_t)) + ndims * 4 * sizeof(size_t)) #define AML_LAYOUT_DECL(name, ndims) \ - size_t __ ##name## _inner_data[ndims * 3]; \ + size_t __ ##name## _inner_data[ndims * 4]; \ struct aml_layout_data __ ##name## _inner_struct = { \ NULL, \ ndims, \ __ ##name## _inner_data, \ __ ##name## _inner_data + ndims, \ __ ##name## _inner_data + 2 * ndims, \ + __ ##name## _inner_data + 3 * ndims, \ }; \ struct aml_layout name = { \ 0, \ diff --git a/src/copy.c b/src/copy.c index bf6ab96b..28bc4848 100644 --- a/src/copy.c +++ b/src/copy.c @@ -488,13 +488,13 @@ int aml_copy_layout_native(struct aml_layout *dst, const struct aml_layout *src) size_t d = dsrc->ndims; assert(d > 0); - size_t elem_size = dsrc->pitch[0]; + size_t elem_size = dsrc->cpitch[0]; assert(d == ddst->ndims); - assert(elem_size == ddst->pitch[0]); + assert(elem_size == ddst->cpitch[0]); for (int i = 0; i < d; i++) assert( dsrc->dims[i] == ddst->dims[i] ); - return aml_copy_ndstr_c(d, ddst->ptr, ddst->pitch, ddst->stride, - dsrc->ptr, dsrc->pitch, dsrc->stride, + return aml_copy_ndstr_c(d, ddst->ptr, ddst->cpitch, ddst->stride, + dsrc->ptr, dsrc->cpitch, dsrc->stride, dsrc->dims, elem_size); } @@ -507,13 +507,13 @@ int aml_copy_layout_transform_native(struct aml_layout *dst, size_t d = dsrc->ndims; assert(d > 0); - size_t elem_size = dsrc->pitch[0]; + size_t elem_size = dsrc->cpitch[0]; assert(d == ddst->ndims); - assert(elem_size == ddst->pitch[0]); + assert(elem_size == ddst->cpitch[0]); for (int i = 0; i < d; i++) assert( dsrc->dims[target_dims[i]] == ddst->dims[i]); - return aml_copy_shndstr_c(d, target_dims, ddst->ptr, ddst->pitch, - ddst->stride, dsrc->ptr, dsrc->pitch, + return aml_copy_shndstr_c(d, target_dims, ddst->ptr, ddst->cpitch, + ddst->stride, dsrc->ptr, dsrc->cpitch, dsrc->stride, dsrc->dims, elem_size); } diff --git a/src/layout.c b/src/layout.c index 174881e5..92eb82d0 100644 --- a/src/layout.c +++ b/src/layout.c @@ -66,6 +66,7 @@ int aml_layout_struct_init(struct aml_layout *layout, layout->data->dims = (size_t *)memory; layout->data->stride = layout->data->dims + ndims; layout->data->pitch = layout->data->stride + ndims; + layout->data->cpitch = layout->data->pitch + ndims; return 0; } @@ -81,6 +82,7 @@ int aml_layout_ainit(struct aml_layout *layout, uint64_t tags, void *ptr, assert(data->dims); assert(data->stride); assert(data->pitch); + assert(data->cpitch); data->ptr = ptr; int type = AML_TYPE_GET(tags, AML_TYPE_LAYOUT_ORDER); if(type == AML_TYPE_LAYOUT_ROW_ORDER) @@ -91,24 +93,26 @@ int aml_layout_ainit(struct aml_layout *layout, uint64_t tags, void *ptr, { data->dims[i] = dims[ndims-i-1]; data->stride[i] = stride[ndims-i-1]; + data->pitch[i] = pitch[ndims-i-1]; } - data->pitch[0] = element_size; + data->cpitch[0] = element_size; for(size_t i = 1; i < ndims; i++) - data->pitch[i] = data->pitch[i-1]*pitch[ndims-i]; + data->cpitch[i] = data->cpitch[i-1]*pitch[ndims-i]; } else if(type == AML_TYPE_LAYOUT_COLUMN_ORDER) { AML_TYPE_SET(layout->tags, AML_TYPE_LAYOUT_COLUMN_ORDER); layout->ops = &aml_layout_column_ops; memcpy(data->dims, dims, ndims * sizeof(size_t)); + memcpy(data->stride, stride, ndims * sizeof(size_t)); + memcpy(data->pitch, pitch, ndims * sizeof(size_t)); /* pitches are only necessary for ndims-1 dimensions. Since we - * store element size as p->pitch[0], there's still ndims + * store element size as p->cpitch[0], there's still ndims * elements in the array. */ - data->pitch[0] = element_size; + data->cpitch[0] = element_size; for(size_t i = 1; i < ndims; i++) - data->pitch[i] = data->pitch[i-1]*pitch[i-1]; - memcpy(data->stride, stride, ndims * sizeof(size_t)); + data->cpitch[i] = data->cpitch[i-1]*pitch[i-1]; } return 0; } diff --git a/src/layout_dense.c b/src/layout_dense.c index 1c8a4c7a..30178474 100644 --- a/src/layout_dense.c +++ b/src/layout_dense.c @@ -14,7 +14,7 @@ void *aml_layout_column_deref(const struct aml_layout_data *d, va_list coords) { size_t c = va_arg(coords, size_t); assert(c < d->dims[i]); - ptr += c*d->pitch[i]*d->stride[i]; + ptr += c*d->cpitch[i]*d->stride[i]; } return ptr; } @@ -28,7 +28,7 @@ void *aml_layout_column_aderef(const struct aml_layout_data *d, const size_t *co for(size_t i = 0; i < d->ndims; i++) { assert(coords[i] < d->dims[i]); - ptr += coords[i]*d->pitch[i]*d->stride[i]; + ptr += coords[i]*d->cpitch[i]*d->stride[i]; } return ptr; } @@ -81,7 +81,7 @@ void *aml_layout_row_deref(const struct aml_layout_data *d, va_list coords) { size_t c = va_arg(coords, size_t); assert(c < d->dims[d->ndims - i - 1]); - ptr += c*d->pitch[d->ndims - i - 1]*d->stride[d->ndims - i - 1]; + ptr += c*d->cpitch[d->ndims - i - 1]*d->stride[d->ndims - i - 1]; } return ptr; } @@ -96,7 +96,7 @@ void *aml_layout_row_aderef(const struct aml_layout_data *d, const size_t *coord { size_t c = coords[i]; assert(c < d->dims[d->ndims - i - 1]); - ptr += c*d->pitch[d->ndims - i - 1]*d->stride[d->ndims - i - 1]; + ptr += c*d->cpitch[d->ndims - i - 1]*d->stride[d->ndims - i - 1]; } return ptr; } diff --git a/tests/layout.c b/tests/layout.c index 9f57c4da..fd815240 100644 --- a/tests/layout.c +++ b/tests/layout.c @@ -8,6 +8,7 @@ int main(int argc, char *argv[]) /* padd the dims to the closest multiple of 2 */ float memory[16][12][8][8][4]; + size_t pitch[5] = {4, 8, 8, 12, 16}; size_t cpitch[5] = {4, 4*4, 4*4*8, 4*4*8*8, 4*4*8*8*12}; size_t dims[5] = {2, 3, 7, 11, 13}; size_t stride[5] = {1, 2, 1, 1, 1}; @@ -37,14 +38,18 @@ int main(int argc, char *argv[]) == 5*sizeof(size_t) ); assert( (intptr_t)(a->data->pitch) - (intptr_t)(a->data->dims) == 10*sizeof(size_t) ); + assert( (intptr_t)(a->data->cpitch) - (intptr_t)(a->data->dims) + == 15*sizeof(size_t) ); /* some simple checks */ assert(!memcmp(a->data->dims, dims, sizeof(size_t)*5)); - assert(!memcmp(a->data->pitch, cpitch, sizeof(size_t)*5)); assert(!memcmp(a->data->stride, stride, sizeof(size_t)*5)); + assert(!memcmp(a->data->pitch, pitch, sizeof(size_t)*5)); + assert(!memcmp(a->data->cpitch, cpitch, sizeof(size_t)*5)); assert(!memcmp(b.data->dims, dims, sizeof(size_t)*5)); - assert(!memcmp(b.data->pitch, cpitch, sizeof(size_t)*5)); assert(!memcmp(b.data->stride, stride, sizeof(size_t)*5)); + assert(!memcmp(b.data->pitch, pitch, sizeof(size_t)*5)); + assert(!memcmp(b.data->cpitch, cpitch, sizeof(size_t)*5)); /* test column major subroutines */ size_t dims_res[5]; @@ -82,14 +87,18 @@ int main(int argc, char *argv[]) == 5*sizeof(size_t) ); assert( (intptr_t)(a->data->pitch) - (intptr_t)(a->data->dims) == 10*sizeof(size_t) ); + assert( (intptr_t)(a->data->cpitch) - (intptr_t)(a->data->dims) + == 15*sizeof(size_t) ); /* some simple checks */ assert(!memcmp(a->data->dims, dims, sizeof(size_t)*5)); - assert(!memcmp(a->data->pitch, cpitch, sizeof(size_t)*5)); assert(!memcmp(a->data->stride, stride, sizeof(size_t)*5)); + assert(!memcmp(a->data->pitch, pitch, sizeof(size_t)*5)); + assert(!memcmp(a->data->cpitch, cpitch, sizeof(size_t)*5)); assert(!memcmp(b.data->dims, dims, sizeof(size_t)*5)); - assert(!memcmp(b.data->pitch, cpitch, sizeof(size_t)*5)); assert(!memcmp(b.data->stride, stride, sizeof(size_t)*5)); + assert(!memcmp(b.data->pitch, pitch, sizeof(size_t)*5)); + assert(!memcmp(b.data->cpitch, cpitch, sizeof(size_t)*5)); /* test column major subroutines */ size_t coords_test_row[5] = { 5, 4, 3, 2, 1 }; From ae1bdafa0850cdf3b4d46e9a21476eb2b605e09f Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Thu, 20 Dec 2018 16:22:43 -0600 Subject: [PATCH 13/47] Replaced copy operators by generated ones and added the generator. --- src/aml-layout.h | 2 + src/copy.c | 595 ++++++++++++++++++++++++----------------------- src/copy.rb | 469 +++++++++++++++++++++++++++++++++++++ 3 files changed, 770 insertions(+), 296 deletions(-) create mode 100644 src/copy.rb diff --git a/src/aml-layout.h b/src/aml-layout.h index 7f1f4053..edbea842 100644 --- a/src/aml-layout.h +++ b/src/aml-layout.h @@ -1,6 +1,8 @@ #ifndef AML_LAYOUT_H #define AML_LAYOUT_H 1 +#include + /******************************************************************************* * Data Layout Management: ******************************************************************************/ diff --git a/src/copy.c b/src/copy.c index 28bc4848..b0162f0f 100644 --- a/src/copy.c +++ b/src/copy.c @@ -5,101 +5,76 @@ #include static inline void aml_compute_cumulative_pitch(size_t d, - size_t *cumul_dst_pitch, - size_t *cumul_src_pitch, - const size_t *dst_pitch, - const size_t *src_pitch, + size_t * cumul_dst_pitch, + size_t * cumul_src_pitch, + const size_t * dst_pitch, + const size_t * src_pitch, size_t elem_size) { cumul_dst_pitch[0] = elem_size; cumul_src_pitch[0] = elem_size; - for (int i = 0; i < d - 1; i++) { + for (size_t i = 0; i < d - 1; i += 1) { cumul_dst_pitch[i + 1] = dst_pitch[i] * cumul_dst_pitch[i]; cumul_src_pitch[i + 1] = src_pitch[i] * cumul_src_pitch[i]; } } -static inline void aml_copy_2d_helper(void *dst, const size_t *cumul_dst_pitch, +static inline void aml_copy_nd_helper(size_t d, void *dst, + const size_t * cumul_dst_pitch, const void *src, - const size_t *cumul_src_pitch, - const size_t *elem_number, + const size_t * cumul_src_pitch, + const size_t * elem_number, size_t elem_size) { - if (cumul_dst_pitch[0] == elem_size && cumul_src_pitch[0] == elem_size) - for (int i = 0; i < elem_number[1]; i++) { + if (d == 1) { + if (cumul_dst_pitch[0] == elem_size + && cumul_src_pitch[0] == elem_size) { memcpy(dst, src, elem_number[0] * elem_size); - dst = (void *)((uintptr_t) dst + cumul_dst_pitch[1]); - src = (void *)((uintptr_t) src + cumul_src_pitch[1]); - } - else - for (int j = 0; j < elem_number[1]; j++) - for (int i = 0; i < elem_number[0]; i++) - memcpy((void *)((uintptr_t) dst + - i * cumul_dst_pitch[0] + - j * cumul_dst_pitch[1]), - (void *)((uintptr_t) src + - i * cumul_src_pitch[0] + - j * cumul_src_pitch[1]), - elem_size); -} - -static void aml_copy_nd_helper(size_t d, void *dst, - const size_t *cumul_dst_pitch, const void *src, - const size_t *cumul_src_pitch, - const size_t *elem_number, - const size_t elem_size) -{ - if (d == 1) - if (cumul_dst_pitch[0] == elem_size && - cumul_src_pitch[0] == elem_size) - memcpy(dst, src, elem_number[0] * elem_size); - else - for (int i = 0; i < elem_number[0]; i++) - memcpy((void *)((uintptr_t) dst + + } else { + for (size_t i = 0; i < elem_number[0]; i += 1) { + memcpy((void *)((intptr_t) dst + i * cumul_dst_pitch[0]), - (void *)((uintptr_t) src + + (void *)((intptr_t) src + i * cumul_src_pitch[0]), elem_size); - else if (d == 2) - aml_copy_2d_helper(dst, cumul_dst_pitch, src, cumul_src_pitch, - elem_number, elem_size); - else { - for (int i = 0; i < elem_number[d - 1]; i++) { + } + } + } else { + for (size_t i = 0; i < elem_number[d - 1]; i += 1) { aml_copy_nd_helper(d - 1, dst, cumul_dst_pitch, src, cumul_src_pitch, elem_number, elem_size); - dst = - (void *)((uintptr_t) dst + cumul_dst_pitch[d - 1]); - src = - (void *)((uintptr_t) src + cumul_src_pitch[d - 1]); + dst = (void *)((intptr_t) dst + cumul_dst_pitch[d - 1]); + src = (void *)((intptr_t) src + cumul_src_pitch[d - 1]); } } } -int aml_copy_nd_c(size_t d, void *dst, const size_t *cumul_dst_pitch, - const void *src, const size_t *cumul_src_pitch, - const size_t *elem_number, size_t elem_size) +int aml_copy_nd_c(size_t d, void *dst, const size_t * cumul_dst_pitch, + const void *src, const size_t * cumul_src_pitch, + const size_t * elem_number, size_t elem_size) { assert(d > 0); - for (int i = 0; i < d - 1; i++) { - assert(cumul_dst_pitch[i + 1] >= cumul_dst_pitch[i] * - elem_number[i]); - assert(cumul_src_pitch[i + 1] >= cumul_src_pitch[i] * - elem_number[i]); + for (size_t i = 0; i < d - 1; i += 1) { + assert(cumul_dst_pitch[i + 1] >= + cumul_dst_pitch[i] * elem_number[i]); + assert(cumul_src_pitch[i + 1] >= + cumul_src_pitch[i] * elem_number[i]); } aml_copy_nd_helper(d, dst, cumul_dst_pitch, src, cumul_src_pitch, elem_number, elem_size); return 0; } -int aml_copy_nd(size_t d, void *dst, const size_t *dst_pitch, const void *src, - const size_t *src_pitch, const size_t *elem_number, +int aml_copy_nd(size_t d, void *dst, const size_t * dst_pitch, const void *src, + const size_t * src_pitch, const size_t * elem_number, size_t elem_size) { assert(d > 0); - size_t *cumul_dst_pitch = (size_t *) alloca(d * sizeof(size_t)); - size_t *cumul_src_pitch = (size_t *) alloca(d * sizeof(size_t)); - + size_t *cumul_dst_pitch; + size_t *cumul_src_pitch; + cumul_dst_pitch = (size_t *) alloca(d * sizeof(size_t)); + cumul_src_pitch = (size_t *) alloca(d * sizeof(size_t)); aml_compute_cumulative_pitch(d, cumul_dst_pitch, cumul_src_pitch, dst_pitch, src_pitch, elem_size); aml_copy_nd_c(d, dst, cumul_dst_pitch, src, cumul_src_pitch, @@ -107,162 +82,158 @@ int aml_copy_nd(size_t d, void *dst, const size_t *dst_pitch, const void *src, return 0; } -static void aml_copy_ndstr_helper(size_t d, void *dst, - const size_t *cumul_dst_pitch, - const size_t *dst_stride, const void *src, - const size_t *cumul_src_pitch, - const size_t *src_stride, - const size_t *elem_number, size_t elem_size) +static inline void aml_copy_ndstr_helper(size_t d, void *dst, + const size_t * cumul_dst_pitch, + const size_t * dst_stride, + const void *src, + const size_t * cumul_src_pitch, + const size_t * src_stride, + const size_t * elem_number, + size_t elem_size) { - if (d == 1) - for (int i = 0; i < elem_number[0]; i++) - memcpy((void *)((uintptr_t) dst + - i * dst_stride[0] * cumul_dst_pitch[0]), - (void *)((uintptr_t) src + - i * src_stride[0] * cumul_src_pitch[0]), - elem_size); - else { - for (int i = 0; i < elem_number[d - 1]; i++) { + if (d == 1) { + if (dst_stride[0] * cumul_dst_pitch[0] == elem_size + && src_stride[0] * cumul_src_pitch[0] == elem_size) { + memcpy(dst, src, elem_number[0] * elem_size); + } else { + for (size_t i = 0; i < elem_number[0]; i += 1) { + memcpy((void *)((intptr_t) dst + + i * (dst_stride[0] * + cumul_dst_pitch[0])), + (void *)((intptr_t) src + + i * (src_stride[0] * + cumul_src_pitch[0])), + elem_size); + } + } + } else { + for (size_t i = 0; i < elem_number[d - 1]; i += 1) { aml_copy_ndstr_helper(d - 1, dst, cumul_dst_pitch, dst_stride, src, cumul_src_pitch, src_stride, elem_number, elem_size); dst = - (void *)((uintptr_t) dst + - cumul_dst_pitch[d - 1] * dst_stride[d - + (void *)((intptr_t) dst + + dst_stride[d - 1] * cumul_dst_pitch[d - 1]); src = - (void *)((uintptr_t) src + - cumul_src_pitch[d - 1] * src_stride[d - + (void *)((intptr_t) src + + src_stride[d - 1] * cumul_src_pitch[d - 1]); } } } -int aml_copy_ndstr_c(size_t d, void *dst, const size_t *cumul_dst_pitch, - const size_t *dst_stride, const void *src, - const size_t *cumul_src_pitch, const size_t *src_stride, - const size_t *elem_number, const size_t elem_size) +int aml_copy_ndstr_c(size_t d, void *dst, const size_t * cumul_dst_pitch, + const size_t * dst_stride, const void *src, + const size_t * cumul_src_pitch, const size_t * src_stride, + const size_t * elem_number, size_t elem_size) { assert(d > 0); - for (int i = 0; i < d - 1; i++) { + for (size_t i = 0; i < d - 1; i += 1) { assert(cumul_dst_pitch[i + 1] >= - cumul_dst_pitch[i] * elem_number[i] * - dst_stride[i]); + dst_stride[i] * cumul_dst_pitch[i] * elem_number[i]); assert(cumul_src_pitch[i + 1] >= - cumul_src_pitch[i] * elem_number[i] * - src_stride[i]); + src_stride[i] * cumul_src_pitch[i] * elem_number[i]); } aml_copy_ndstr_helper(d, dst, cumul_dst_pitch, dst_stride, src, - cumul_src_pitch, src_stride, elem_number, - elem_size); + cumul_src_pitch, src_stride, elem_number, + elem_size); return 0; } -int aml_copy_ndstr(size_t d, void *dst, const size_t *dst_pitch, - const size_t *dst_stride, const void *src, - const size_t *src_pitch, const size_t *src_stride, - const size_t *elem_number, const size_t elem_size) +int aml_copy_ndstr(size_t d, void *dst, const size_t * dst_pitch, + const size_t * dst_stride, const void *src, + const size_t * src_pitch, const size_t * src_stride, + const size_t * elem_number, size_t elem_size) { assert(d > 0); - size_t *cumul_dst_pitch = (size_t *) alloca(d * sizeof(size_t)); - size_t *cumul_src_pitch = (size_t *) alloca(d * sizeof(size_t)); - + size_t *cumul_dst_pitch; + size_t *cumul_src_pitch; + cumul_dst_pitch = (size_t *) alloca(d * sizeof(size_t)); + cumul_src_pitch = (size_t *) alloca(d * sizeof(size_t)); aml_compute_cumulative_pitch(d, cumul_dst_pitch, cumul_src_pitch, dst_pitch, src_pitch, elem_size); aml_copy_ndstr_c(d, dst, cumul_dst_pitch, dst_stride, src, - cumul_src_pitch, src_stride, elem_number, - elem_size); + cumul_src_pitch, src_stride, elem_number, elem_size); return 0; } -static void aml_copy_sh2d_helper(const size_t *target_dims, void *dst, - const size_t *cumul_dst_pitch, - const void *src, - const size_t *cumul_src_pitch, - const size_t *elem_number, - const size_t elem_size) -{ - for (int j = 0; j < elem_number[target_dims[1]]; j++) - for (int i = 0; i < elem_number[target_dims[0]]; i++) - memcpy((void *)((uintptr_t) dst + - i * cumul_dst_pitch[0] + - j * cumul_dst_pitch[1]), - (void *)((uintptr_t) src + - i * cumul_src_pitch[target_dims[0]] + - j * cumul_src_pitch[target_dims[1]]), - elem_size); -} - -static void aml_copy_shnd_helper(size_t d, const size_t *target_dims, - void *dst, const size_t *cumul_dst_pitch, - const void *src, - const size_t *cumul_src_pitch, - const size_t *elem_number, - const size_t elem_size) +static inline void aml_copy_shnd_helper(size_t d, const size_t * target_dims, + void *dst, + const size_t * cumul_dst_pitch, + const void *src, + const size_t * cumul_src_pitch, + const size_t * elem_number, + size_t elem_size) { - if (d == 1) - for (int i = 0; i < elem_number[target_dims[0]]; i++) - memcpy((void *)((uintptr_t) dst + - i * cumul_dst_pitch[0]), - (void *)((uintptr_t) src + - i * cumul_src_pitch[target_dims[0]]), - elem_size); - if (d == 2) - aml_copy_sh2d_helper(target_dims, dst, cumul_dst_pitch, src, - cumul_src_pitch, elem_number, elem_size); - else { - // process dimension d-1 - for (int i = 0; i < elem_number[target_dims[d - 1]]; i++) { + if (d == 1) { + if (cumul_dst_pitch[0] == elem_size + && cumul_src_pitch[target_dims[0]] == elem_size) { + memcpy(dst, src, + elem_number[target_dims[0]] * elem_size); + } else { + for (size_t i = 0; i < elem_number[target_dims[0]]; + i += 1) { + memcpy((void *)((intptr_t) dst + + i * cumul_dst_pitch[0]), + (void *)((intptr_t) src + + i * + cumul_src_pitch[target_dims + [0]]), + elem_size); + } + } + } else { + for (size_t i = 0; i < elem_number[target_dims[d - 1]]; i += 1) { aml_copy_shnd_helper(d - 1, target_dims, dst, cumul_dst_pitch, src, cumul_src_pitch, elem_number, elem_size); - dst = - (void *)((uintptr_t) dst + - cumul_dst_pitch[d - 1]); + dst = (void *)((intptr_t) dst + cumul_dst_pitch[d - 1]); src = - (void *)((uintptr_t) src + + (void *)((intptr_t) src + cumul_src_pitch[target_dims[d - 1]]); } } } -int aml_copy_shnd_c(size_t d, const size_t *target_dims, void *dst, - const size_t *cumul_dst_pitch, const void *src, - const size_t *cumul_src_pitch, const size_t *elem_number, - const size_t elem_size) +int aml_copy_shnd_c(size_t d, const size_t * target_dims, void *dst, + const size_t * cumul_dst_pitch, const void *src, + const size_t * cumul_src_pitch, const size_t * elem_number, + size_t elem_size) { assert(d > 0); - size_t present_dims = 0; - - for (int i = 0; i < d - 1; i++) { + size_t present_dims; + present_dims = 0; + for (size_t i = 0; i < d; i += 1) { assert(target_dims[i] < d); - assert(cumul_src_pitch[i + 1] >= - cumul_src_pitch[i] * elem_number[i]); present_dims |= 1 << target_dims[i]; } - assert(target_dims[d - 1] < d); - present_dims |= 1 << target_dims[d - 1]; - for (int i = 0; i < d; i++) - assert(present_dims & (1 << i)); - for (int i = 0; i < d - 1; i++) + for (size_t i = 0; i < d; i += 1) { + assert(present_dims & 1 << i); + } + for (size_t i = 0; i < d - 1; i += 1) { assert(cumul_dst_pitch[i + 1] >= - cumul_dst_pitch[i] * elem_number[target_dims[i]]); + cumul_dst_pitch[i] * elem_number[target_dims[i]]); + assert(cumul_src_pitch[i + 1] >= + cumul_src_pitch[i] * elem_number[i]); + } aml_copy_shnd_helper(d, target_dims, dst, cumul_dst_pitch, src, cumul_src_pitch, elem_number, elem_size); return 0; } -int aml_copy_shnd(size_t d, const size_t *target_dims, void *dst, - const size_t *dst_pitch, const void *src, - const size_t *src_pitch, const size_t *elem_number, - const size_t elem_size) +int aml_copy_shnd(size_t d, const size_t * target_dims, void *dst, + const size_t * dst_pitch, const void *src, + const size_t * src_pitch, const size_t * elem_number, + size_t elem_size) { assert(d > 0); - size_t *cumul_dst_pitch = (size_t *) alloca(d * sizeof(size_t)); - size_t *cumul_src_pitch = (size_t *) alloca(d * sizeof(size_t)); - + size_t *cumul_dst_pitch; + size_t *cumul_src_pitch; + cumul_dst_pitch = (size_t *) alloca(d * sizeof(size_t)); + cumul_src_pitch = (size_t *) alloca(d * sizeof(size_t)); aml_compute_cumulative_pitch(d, cumul_dst_pitch, cumul_src_pitch, dst_pitch, src_pitch, elem_size); aml_copy_shnd_c(d, target_dims, dst, cumul_dst_pitch, src, @@ -270,211 +241,233 @@ int aml_copy_shnd(size_t d, const size_t *target_dims, void *dst, return 0; } -int aml_copy_tnd(size_t d, void *dst, const size_t *dst_pitch, const void *src, - const size_t *src_pitch, const size_t *elem_number, - const size_t elem_size) +int aml_copy_tnd(size_t d, void *dst, const size_t * dst_pitch, const void *src, + const size_t * src_pitch, const size_t * elem_number, + size_t elem_size) { assert(d > 0); - size_t *target_dims = (size_t *) alloca(d * sizeof(size_t)); - + size_t *target_dims; + target_dims = (size_t *) alloca(d * sizeof(size_t)); target_dims[d - 1] = 0; - for (int i = 0; i < d - 1; i++) + for (size_t i = 0; i < d - 1; i += 1) { target_dims[i] = i + 1; + } aml_copy_shnd(d, target_dims, dst, dst_pitch, src, src_pitch, elem_number, elem_size); return 0; } -int aml_copy_tnd_c(size_t d, void *dst, const size_t *cumul_dst_pitch, - const void *src, const size_t *cumul_src_pitch, - const size_t *elem_number, const size_t elem_size) +int aml_copy_tnd_c(size_t d, void *dst, const size_t * cumul_dst_pitch, + const void *src, const size_t * cumul_src_pitch, + const size_t * elem_number, size_t elem_size) { assert(d > 0); - size_t *target_dims = (size_t *) alloca(d * sizeof(size_t)); - + size_t *target_dims; + target_dims = (size_t *) alloca(d * sizeof(size_t)); target_dims[d - 1] = 0; - for (int i = 0; i < d - 1; i++) + for (size_t i = 0; i < d - 1; i += 1) { target_dims[i] = i + 1; + } aml_copy_shnd_c(d, target_dims, dst, cumul_dst_pitch, src, cumul_src_pitch, elem_number, elem_size); return 0; } -int aml_copy_rtnd(size_t d, void *dst, const size_t *dst_pitch, - const void *src, const size_t *src_pitch, - const size_t *elem_number, const size_t elem_size) +int aml_copy_rtnd(size_t d, void *dst, const size_t * dst_pitch, + const void *src, const size_t * src_pitch, + const size_t * elem_number, size_t elem_size) { assert(d > 0); - size_t *target_dims = (size_t *) alloca(d * sizeof(size_t)); - + size_t *target_dims; + target_dims = (size_t *) alloca(d * sizeof(size_t)); target_dims[0] = d - 1; - for (int i = 1; i < d; i++) + for (size_t i = 1; i < d; i += 1) { target_dims[i] = i - 1; + } aml_copy_shnd(d, target_dims, dst, dst_pitch, src, src_pitch, elem_number, elem_size); return 0; } -int aml_copy_rtnd_c(size_t d, void *dst, const size_t *cumul_dst_pitch, - const void *src, const size_t *cumul_src_pitch, - const size_t *elem_number, const size_t elem_size) +int aml_copy_rtnd_c(size_t d, void *dst, const size_t * cumul_dst_pitch, + const void *src, const size_t * cumul_src_pitch, + const size_t * elem_number, size_t elem_size) { assert(d > 0); - size_t *target_dims = (size_t *) alloca(d * sizeof(size_t)); - + size_t *target_dims; + target_dims = (size_t *) alloca(d * sizeof(size_t)); target_dims[0] = d - 1; - for (int i = 1; i < d; i++) + for (size_t i = 1; i < d; i += 1) { target_dims[i] = i - 1; + } aml_copy_shnd_c(d, target_dims, dst, cumul_dst_pitch, src, cumul_src_pitch, elem_number, elem_size); return 0; } -static void aml_copy_shndstr_helper(size_t d, const size_t *target_dims, - void *dst, const size_t *cumul_dst_pitch, - const size_t *dst_stride, const void *src, - const size_t *cumul_src_pitch, - const size_t *src_stride, - const size_t *elem_number, - const size_t elem_size) +static inline void aml_copy_shndstr_helper(size_t d, const size_t * target_dims, + void *dst, + const size_t * cumul_dst_pitch, + const size_t * dst_stride, + const void *src, + const size_t * cumul_src_pitch, + const size_t * src_stride, + const size_t * elem_number, + size_t elem_size) { - if (d == 1) - for (int i = 0; i < elem_number[target_dims[0]]; i++) - memcpy((void *)((uintptr_t) dst + - i * cumul_dst_pitch[0] * - dst_stride[0]), - (void *)((uintptr_t) src + - i * cumul_src_pitch[target_dims[0]] * - src_stride[target_dims[0]]), - elem_size); - else { - // process dimension d-1 - for (int i = 0; i < elem_number[target_dims[d - 1]]; i++) { + if (d == 1) { + if (dst_stride[0] * cumul_dst_pitch[0] == elem_size + && src_stride[target_dims[0]] * + cumul_src_pitch[target_dims[0]] == elem_size) { + memcpy(dst, src, + elem_number[target_dims[0]] * elem_size); + } else { + for (size_t i = 0; i < elem_number[target_dims[0]]; + i += 1) { + memcpy((void *)((intptr_t) dst + + i * (dst_stride[0] * + cumul_dst_pitch[0])), + (void *)((intptr_t) src + + i * + (src_stride[target_dims[0]] * + cumul_src_pitch[target_dims + [0]])), + elem_size); + } + } + } else { + for (size_t i = 0; i < elem_number[target_dims[d - 1]]; i += 1) { aml_copy_shndstr_helper(d - 1, target_dims, dst, cumul_dst_pitch, dst_stride, src, cumul_src_pitch, src_stride, elem_number, elem_size); dst = - (void *)((uintptr_t) dst + - cumul_dst_pitch[d - 1] * - dst_stride[d - 1]); + (void *)((intptr_t) dst + + dst_stride[d - 1] * cumul_dst_pitch[d - + 1]); src = - (void *)((uintptr_t) src + - cumul_src_pitch[target_dims[d - 1]] * - src_stride[target_dims[d - 1]]); + (void *)((intptr_t) src + + src_stride[target_dims[d - 1]] * + cumul_src_pitch[target_dims[d - 1]]); } } } -int aml_copy_shndstr_c(size_t d, const size_t *target_dims, void *dst, - const size_t *cumul_dst_pitch, const size_t *dst_stride, - const void *src, const size_t *cumul_src_pitch, - const size_t *src_stride, const size_t *elem_number, - const size_t elem_size) +int aml_copy_shndstr_c(size_t d, const size_t * target_dims, void *dst, + const size_t * cumul_dst_pitch, + const size_t * dst_stride, const void *src, + const size_t * cumul_src_pitch, + const size_t * src_stride, const size_t * elem_number, + size_t elem_size) { assert(d > 0); - size_t present_dims = 0; - - for (int i = 0; i < d - 1; i++) { + size_t present_dims; + present_dims = 0; + for (size_t i = 0; i < d; i += 1) { assert(target_dims[i] < d); - assert(cumul_src_pitch[i + 1] >= - cumul_src_pitch[i] * elem_number[i] * - src_stride[i]); present_dims |= 1 << target_dims[i]; } - assert(target_dims[d - 1] < d); - present_dims |= 1 << target_dims[d - 1]; - for (int i = 0; i < d; i++) - assert(present_dims & (1 << i)); - for (int i = 0; i < d - 1; i++) + for (size_t i = 0; i < d; i += 1) { + assert(present_dims & 1 << i); + } + for (size_t i = 0; i < d - 1; i += 1) { assert(cumul_dst_pitch[i + 1] >= - cumul_dst_pitch[i] * elem_number[target_dims[i]] * - dst_stride[i]); + dst_stride[i] * cumul_dst_pitch[i] * + elem_number[target_dims[i]]); + assert(cumul_src_pitch[i + 1] >= + src_stride[i] * cumul_src_pitch[i] * elem_number[i]); + } aml_copy_shndstr_helper(d, target_dims, dst, cumul_dst_pitch, - dst_stride, src, cumul_src_pitch, - src_stride, elem_number, elem_size); + dst_stride, src, cumul_src_pitch, src_stride, + elem_number, elem_size); return 0; } -int aml_copy_shndstr(size_t d, const size_t *target_dims, void *dst, - const size_t *dst_pitch, const size_t *dst_stride, - const void *src, const size_t *src_pitch, - const size_t *src_stride, const size_t *elem_number, - const size_t elem_size) +int aml_copy_shndstr(size_t d, const size_t * target_dims, void *dst, + const size_t * dst_pitch, const size_t * dst_stride, + const void *src, const size_t * src_pitch, + const size_t * src_stride, const size_t * elem_number, + size_t elem_size) { assert(d > 0); - size_t *cumul_dst_pitch = (size_t *) alloca(d * sizeof(size_t)); - size_t *cumul_src_pitch = (size_t *) alloca(d * sizeof(size_t)); - + size_t *cumul_dst_pitch; + size_t *cumul_src_pitch; + cumul_dst_pitch = (size_t *) alloca(d * sizeof(size_t)); + cumul_src_pitch = (size_t *) alloca(d * sizeof(size_t)); aml_compute_cumulative_pitch(d, cumul_dst_pitch, cumul_src_pitch, dst_pitch, src_pitch, elem_size); - aml_copy_shndstr_c(d, target_dims, dst, cumul_dst_pitch, - dst_stride, src, cumul_src_pitch, - src_stride, elem_number, elem_size); + aml_copy_shndstr_c(d, target_dims, dst, cumul_dst_pitch, dst_stride, + src, cumul_src_pitch, src_stride, elem_number, + elem_size); return 0; } -int aml_copy_tndstr(size_t d, void *dst, const size_t *dst_pitch, - const size_t *dst_stride, const void *src, - const size_t *src_pitch, const size_t *src_stride, - const size_t *elem_number, const size_t elem_size) +int aml_copy_tndstr(size_t d, void *dst, const size_t * dst_pitch, + const size_t * dst_stride, const void *src, + const size_t * src_pitch, const size_t * src_stride, + const size_t * elem_number, size_t elem_size) { assert(d > 0); - size_t *target_dims = (size_t *) alloca(d * sizeof(size_t)); - + size_t *target_dims; + target_dims = (size_t *) alloca(d * sizeof(size_t)); target_dims[d - 1] = 0; - for (int i = 0; i < d - 1; i++) + for (size_t i = 0; i < d - 1; i += 1) { target_dims[i] = i + 1; + } aml_copy_shndstr(d, target_dims, dst, dst_pitch, dst_stride, src, src_pitch, src_stride, elem_number, elem_size); return 0; } -int aml_copy_tndstr_c(size_t d, void *dst, const size_t *cumul_dst_pitch, - const size_t *dst_stride, const void *src, - const size_t *cumul_src_pitch, const size_t *src_stride, - const size_t *elem_number, const size_t elem_size) +int aml_copy_tndstr_c(size_t d, void *dst, const size_t * cumul_dst_pitch, + const size_t * dst_stride, const void *src, + const size_t * cumul_src_pitch, const size_t * src_stride, + const size_t * elem_number, size_t elem_size) { assert(d > 0); - size_t *target_dims = (size_t *) alloca(d * sizeof(size_t)); - + size_t *target_dims; + target_dims = (size_t *) alloca(d * sizeof(size_t)); target_dims[d - 1] = 0; - for (int i = 0; i < d - 1; i++) + for (size_t i = 0; i < d - 1; i += 1) { target_dims[i] = i + 1; + } aml_copy_shndstr_c(d, target_dims, dst, cumul_dst_pitch, dst_stride, src, cumul_src_pitch, src_stride, elem_number, elem_size); return 0; } -int aml_copy_rtndstr(size_t d, void *dst, const size_t *dst_pitch, - const size_t *dst_stride, const void *src, - const size_t *src_pitch, const size_t *src_stride, - const size_t *elem_number, const size_t elem_size) +int aml_copy_rtndstr(size_t d, void *dst, const size_t * dst_pitch, + const size_t * dst_stride, const void *src, + const size_t * src_pitch, const size_t * src_stride, + const size_t * elem_number, size_t elem_size) { assert(d > 0); - size_t *target_dims = (size_t *) alloca(d * sizeof(size_t)); - + size_t *target_dims; + target_dims = (size_t *) alloca(d * sizeof(size_t)); target_dims[0] = d - 1; - for (int i = 1; i < d; i++) + for (size_t i = 1; i < d; i += 1) { target_dims[i] = i - 1; + } aml_copy_shndstr(d, target_dims, dst, dst_pitch, dst_stride, src, src_pitch, src_stride, elem_number, elem_size); return 0; } -int aml_copy_rtndstr_c(size_t d, void *dst, const size_t *cumul_dst_pitch, - const size_t *dst_stride, const void *src, - const size_t *cumul_src_pitch, const size_t *src_stride, - const size_t *elem_number, const size_t elem_size) +int aml_copy_rtndstr_c(size_t d, void *dst, const size_t * cumul_dst_pitch, + const size_t * dst_stride, const void *src, + const size_t * cumul_src_pitch, + const size_t * src_stride, const size_t * elem_number, + size_t elem_size) { assert(d > 0); - size_t *target_dims = (size_t *) alloca(d * sizeof(size_t)); - + size_t *target_dims; + target_dims = (size_t *) alloca(d * sizeof(size_t)); target_dims[0] = d - 1; - for (int i = 1; i < d; i++) + for (size_t i = 1; i < d; i += 1) { target_dims[i] = i - 1; + } aml_copy_shndstr_c(d, target_dims, dst, cumul_dst_pitch, dst_stride, src, cumul_src_pitch, src_stride, elem_number, elem_size); @@ -483,16 +476,20 @@ int aml_copy_rtndstr_c(size_t d, void *dst, const size_t *cumul_dst_pitch, int aml_copy_layout_native(struct aml_layout *dst, const struct aml_layout *src) { - struct aml_layout_data *ddst = dst->data; - struct aml_layout_data *dsrc = src->data; - size_t d = dsrc->ndims; + struct aml_layout_data *ddst; + struct aml_layout_data *dsrc; + size_t d; + size_t elem_size; + ddst = dst->data; + dsrc = src->data; + d = dsrc->ndims; assert(d > 0); - - size_t elem_size = dsrc->cpitch[0]; + elem_size = dsrc->cpitch[0]; assert(d == ddst->ndims); assert(elem_size == ddst->cpitch[0]); - for (int i = 0; i < d; i++) - assert( dsrc->dims[i] == ddst->dims[i] ); + for (size_t i = 0; i < d; i += 1) { + assert(dsrc->dims[i] == ddst->dims[i]); + } return aml_copy_ndstr_c(d, ddst->ptr, ddst->cpitch, ddst->stride, dsrc->ptr, dsrc->cpitch, dsrc->stride, dsrc->dims, elem_size); @@ -500,18 +497,22 @@ int aml_copy_layout_native(struct aml_layout *dst, const struct aml_layout *src) int aml_copy_layout_transform_native(struct aml_layout *dst, const struct aml_layout *src, - const size_t *target_dims) + const size_t * target_dims) { - struct aml_layout_data *ddst = dst->data; - struct aml_layout_data *dsrc = src->data; - size_t d = dsrc->ndims; + struct aml_layout_data *ddst; + struct aml_layout_data *dsrc; + size_t d; + size_t elem_size; + ddst = dst->data; + dsrc = src->data; + d = dsrc->ndims; assert(d > 0); - - size_t elem_size = dsrc->cpitch[0]; + elem_size = dsrc->cpitch[0]; assert(d == ddst->ndims); assert(elem_size == ddst->cpitch[0]); - for (int i = 0; i < d; i++) - assert( dsrc->dims[target_dims[i]] == ddst->dims[i]); + for (size_t i = 0; i < d; i += 1) { + assert(dsrc->dims[target_dims[i]] == ddst->dims[i]); + } return aml_copy_shndstr_c(d, target_dims, ddst->ptr, ddst->cpitch, ddst->stride, dsrc->ptr, dsrc->cpitch, dsrc->stride, dsrc->dims, elem_size); @@ -520,25 +521,27 @@ int aml_copy_layout_transform_native(struct aml_layout *dst, int aml_copy_layout_transpose_native(struct aml_layout *dst, const struct aml_layout *src) { - size_t d = src->data->ndims; - assert(d > 0); - - size_t *target_dims = (size_t *) alloca(d * sizeof(size_t)); + size_t d; + size_t *target_dims; + d = src->data->ndims; + target_dims = (size_t *) alloca(d * sizeof(size_t)); target_dims[d - 1] = 0; - for (int i = 0; i < d - 1; i++) + for (size_t i = 0; i < d - 1; i += 1) { target_dims[i] = i + 1; + } return aml_copy_layout_transform_native(dst, src, target_dims); } int aml_copy_layout_reverse_transpose_native(struct aml_layout *dst, const struct aml_layout *src) { - size_t d = src->data->ndims; - assert(d > 0); - - size_t *target_dims = (size_t *) alloca(d * sizeof(size_t)); + size_t d; + size_t *target_dims; + d = src->data->ndims; + target_dims = (size_t *) alloca(d * sizeof(size_t)); target_dims[0] = d - 1; - for (int i = 1; i < d; i++) + for (size_t i = 1; i < d; i += 1) { target_dims[i] = i - 1; + } return aml_copy_layout_transform_native(dst, src, target_dims); } diff --git a/src/copy.rb b/src/copy.rb new file mode 100644 index 00000000..2018b6e7 --- /dev/null +++ b/src/copy.rb @@ -0,0 +1,469 @@ +stdin1, stdout0 = IO.pipe +stdin2, stdout1 = IO.pipe + +pid1 = Process.fork { + stdout0.close + stdin2.close + require 'cast' + + parser = C::Parser::new + parser.type_names << '__builtin_va_list' + cpp = C::Preprocessor::new + cpp.macros['__attribute__(a)'] = '' + cpp.macros['__restrict'] = 'restrict' + cpp.macros['__extension__'] = '' + cpp.macros['__asm__(a)'] = '' + cpp.include_path << './' + + + + preprocessed_sources = cpp.preprocess(< +#include +#include +#include +#include +EOF + + parser.parse(preprocessed_sources) + + ast = parser.parse(stdin1.read) + stdin1.close + + stdout1.puts < +#include +#include +#include +#include + +EOF + stdout1.puts ast + stdout1.close +} + +pid2 = Process.fork { + stdin1.close + stdout0.close + stdout1.close + require 'open3' + Open3.popen3('indent -nbad -bap -nbc -bbo -hnl -br -brs -c33 -cd33 -ncdb -ce -ci4 -cli0 -d0 -di1 -nfc1 -i8 -ip0 -l80 -lp -npcs -nprs -npsl -sai -saf -saw -ncs -nsc -sob -nfca -cp33 -ss -ts8 -il1') do |i, o, t| + i.write stdin2.read + stdin2.close + i.close + puts o.read + end +} + +stdin1.close +stdout1.close +stdin2.close + +require 'BOAST' +include BOAST + +set_array_start(0) +set_lang(C) +set_default_int_size(nil) +set_output(stdout0) + +register_funccall( :alloca ) +register_funccall( :memcpy ) +register_funccall( :assert ) +register_funccall( :sizeof ) + +def name_prefix + "aml_copy_" +end + +def name(suffix = nil, stride: false, shuffle: false) + name = name_prefix + name << "sh" if shuffle + name << "nd" + name << "str" if stride + name << "_#{suffix}" if suffix + name +end + +def transpose_name(reverse: false, stride: false, cumulative: false) + name = name_prefix + name << "r" if reverse + name << "tnd" + name << "str" if stride + name << "_c" if cumulative + name +end + +def aml_compute_cumulative_pitch + d = Sizet :d + cumul_dst_pitch = Sizet :cumul_dst_pitch, dim: Dim(d), dir: :out + cumul_src_pitch = Sizet :cumul_src_pitch, dim: Dim(d), dir: :out + dst_pitch = Sizet :dst_pitch, dim: Dim(d), dir: :in + src_pitch = Sizet :src_pitch, dim: Dim(d), dir: :in + elem_size = Sizet :elem_size + i = Sizet :i + p = Procedure( :aml_compute_cumulative_pitch, + [ d, + cumul_dst_pitch, cumul_src_pitch, + dst_pitch, src_pitch, + elem_size ], + local: true, + inline: true ) { + pr cumul_dst_pitch[0] === elem_size; + pr cumul_src_pitch[0] === elem_size; + + pr For(i, 0, d - 1, operator: '<', declit: true) { + pr cumul_dst_pitch[i + 1] === dst_pitch[i] * cumul_dst_pitch[i] + pr cumul_src_pitch[i + 1] === src_pitch[i] * cumul_src_pitch[i] + } + } +end + +def aml_copy_nd_helper(stride: false, shuffle: false) + d = Sizet :d + target_dims = Sizet :target_dims, dim: Dim(), dir: :in + dst = Pointer :dst, dir: :out + cumul_dst_pitch = Sizet :cumul_dst_pitch, dim: Dim(), dir: :in + dst_stride = Sizet :dst_stride, dim: Dim(), dir: :in + src = Pointer :src, dir: :in + cumul_src_pitch = Sizet :cumul_src_pitch, dim: Dim(), dir: :in + src_stride = Sizet :src_stride, dim: Dim(), dir: :in + elem_number = Sizet :elem_number, dim: Dim(), dir: :in + elem_size = Sizet :elem_size + i = Sizet :i + + args = [] + args += [ d ] + args += [ target_dims ] if shuffle + args += [ dst, cumul_dst_pitch ] + args += [ dst_stride ] if stride + args += [ src, cumul_src_pitch ] + args += [ src_stride ] if stride + args += [ elem_number, elem_size ] + + effective_dst_pitch = lambda { |d| cumul_dst_pitch[d] } + effective_src_pitch = lambda { |d| cumul_src_pitch[d] } + if stride + tmp_dst = effective_dst_pitch + effective_dst_pitch = lambda { |d| dst_stride[d] * tmp_dst[d] } + tmp_src = effective_src_pitch + effective_src_pitch = lambda { |d| src_stride[d] * tmp_src[d] } + end + + src_index = lambda { |d| d } + dst_index = lambda { |d| d } + elem_index = lambda { |d| d } + if shuffle + elem_index = lambda { |d| target_dims[d] } + src_index = lambda { |d| target_dims[d] } + end + + name = name(:helper, stride: stride, shuffle: shuffle) + + p = Procedure( name, + args, + local: true, + inline: true ) { + pr If( d == 1 => lambda { + pr If( And(effective_dst_pitch[dst_index[0]] == elem_size, + effective_src_pitch[src_index[0]] == elem_size) => lambda { + pr memcpy(dst, src, elem_number[elem_index[0]] * elem_size) + }, else: lambda { + pr For( i, 0, elem_number[elem_index[0]], operator: '<', declit: true ) { + pr memcpy( (dst.cast(Intptrt) + i * effective_dst_pitch[dst_index[0]]).cast(dst), + (src.cast(Intptrt) + i * effective_src_pitch[src_index[0]]).cast(src), + elem_size) + } + }) + }, else: lambda { + pr For( i, 0, elem_number[elem_index[d - 1]], operator: '<', declit: true ) { + args[0] = d - 1 + pr p.call(*args) + pr dst === (dst.cast(Intptrt) + effective_dst_pitch[dst_index[d - 1]]).cast(dst) + pr src === (src.cast(Intptrt) + effective_src_pitch[src_index[d - 1]]).cast(src) + } + }) + + } +end + +def aml_copy_nd_c(stride: false, shuffle: false) + d = Sizet :d + target_dims = Sizet :target_dims, dim: Dim(), dir: :in + dst = Pointer :dst, dir: :out + cumul_dst_pitch = Sizet :cumul_dst_pitch, dim: Dim(d), dir: :in + dst_stride = Sizet :dst_stride, dim: Dim(d), dir: :in + src = Pointer :src, dir: :in + cumul_src_pitch = Sizet :cumul_src_pitch, dim: Dim(d), dir: :in + src_stride = Sizet :src_stride, dim: Dim(d), dir: :in + elem_number = Sizet :elem_number, dim: Dim(d), dir: :in + elem_size = Sizet :elem_size + i = Sizet :i + present_dims = Sizet :present_dims + + args = [] + args += [ d ] + args += [ target_dims ] if shuffle + args += [ dst, cumul_dst_pitch ] + args += [ dst_stride ] if stride + args += [ src, cumul_src_pitch] + args += [ src_stride ] if stride + args += [ elem_number, elem_size] + + effective_dst_pitch = lambda { |d| cumul_dst_pitch[d] } + effective_src_pitch = lambda { |d| cumul_src_pitch[d] } + if stride + tmp_dst = effective_dst_pitch + effective_dst_pitch = lambda { |d| dst_stride[d] * tmp_dst[d] } + tmp_src = effective_src_pitch + effective_src_pitch = lambda { |d| src_stride[d] * tmp_src[d] } + end + + elem_index = lambda { |d| d } + if shuffle + elem_index = lambda { |d| target_dims[d] } + end + + name = name(:c, stride: stride, shuffle: shuffle) + + p = Procedure( name, + args, + return_type: Int ) { + pr assert(d > 0) + if shuffle + decl present_dims + pr present_dims === 0 + pr For(i, 0, d, operator: '<', declit: true ) { + pr assert(target_dims[i] < d) + get_output.puts "#{present_dims} |= 1 << #{target_dims[i]};" + } + pr For(i, 0, d, operator: '<', declit: true ) { + pr assert("#{present_dims} & (1 << #{i})") + } + end + pr For(i, 0, d - 1, operator: '<', declit: true ) { + pr assert(cumul_dst_pitch[i + 1] >= effective_dst_pitch[i] * elem_number[elem_index[i]]); + pr assert(cumul_src_pitch[i + 1] >= effective_src_pitch[i] * elem_number[i]); + } + pr aml_copy_nd_helper(stride: stride, shuffle: shuffle).call( *args ) + pr Return(0) + } +end + +def aml_copy_nd(stride: false, shuffle: false) + d = Sizet :d + target_dims = Sizet :target_dims, dim: Dim(d), dir: :in + dst = Pointer :dst, dir: :out + dst_pitch = Sizet :dst_pitch, dim: Dim(d), dir: :in + dst_stride = Sizet :dst_stride, dim: Dim(d), dir: :in + src = Pointer :src, dir: :in + src_pitch = Sizet :src_pitch, dim: Dim(d), dir: :in + src_stride = Sizet :src_stride, dim: Dim(d), dir: :in + elem_number = Sizet :elem_number, dim: Dim(d), dir: :in + elem_size = Sizet :elem_size + cumul_dst_pitch = Pointer :cumul_dst_pitch, type: Sizet + cumul_src_pitch = Pointer :cumul_src_pitch, type: Sizet + + args = [] + args += [ d ] + args += [ target_dims ] if shuffle + args += [ dst, dst_pitch ] + args += [ dst_stride ] if stride + args += [ src, src_pitch] + args += [ src_stride ] if stride + args += [ elem_number, elem_size] + + name = name(stride: stride, shuffle: shuffle) + + p = Procedure( name, + args, + return_type: Int ) { + pr assert(d > 0); + decl cumul_dst_pitch, cumul_src_pitch + pr cumul_dst_pitch === alloca(d * sizeof("size_t")).cast(cumul_dst_pitch) + pr cumul_src_pitch === alloca(d * sizeof("size_t")).cast(cumul_src_pitch) + pr $aml_compute_cumulative_pitch.call(d, cumul_dst_pitch, cumul_src_pitch, + dst_pitch, src_pitch, elem_size); + args = [] + args += [ d ] + args += [ target_dims ] if shuffle + args += [ dst, cumul_dst_pitch ] + args += [ dst_stride ] if stride + args += [ src, cumul_src_pitch] + args += [ src_stride ] if stride + args += [ elem_number, elem_size] + + pr aml_copy_nd_c(stride: stride, shuffle: shuffle).call( *args ) + pr Return(0) + } +end + +def aml_copy_tnd(reverse: false, stride: false, cumulative: false) + d = Sizet :d + dst = Pointer :dst, dir: :out + dst_pitch = Sizet :dst_pitch, dim: Dim(d), dir: :in + cumul_dst_pitch = Sizet :cumul_dst_pitch, dim: Dim(d), dir: :in + dst_stride = Sizet :dst_stride, dim: Dim(d), dir: :in + src = Pointer :src, dir: :in + src_pitch = Sizet :src_pitch, dim: Dim(d), dir: :in + src_stride = Sizet :src_stride, dim: Dim(d), dir: :in + cumul_src_pitch = Sizet :cumul_src_pitch, dim: Dim(d), dir: :in + elem_number = Sizet :elem_number, dim: Dim(d), dir: :in + elem_size = Sizet :elem_size + + args = [] + args += [ d, dst ] + args += cumulative ? [ cumul_dst_pitch ] : [ dst_pitch ] + args += [ dst_stride ] if stride + args += [ src ] + args += cumulative ? [ cumul_src_pitch ] : [ src_pitch ] + args += [ src_stride ] if stride + args += [ elem_number, elem_size] + + target_dims = Sizet :target_dims, dim: Dim(d) + i = Sizet :i + + name = transpose_name(reverse: reverse, stride: stride, cumulative: cumulative) + + p = Procedure( name, + args, + return_type: Int ) { + pr assert(d > 0); + decl target_dims + pr target_dims === alloca(d * sizeof("size_t")).cast(target_dims) + if reverse + pr target_dims[0] === d - 1 + pr For(i, 1, d, operator: '<', declit: true) { + pr target_dims[i] === i - 1 + } + else + pr target_dims[d - 1] === 0 + pr For(i, 0, d - 1, operator: '<', declit: true) { + pr target_dims[i] === i + 1 + } + end + + args.insert(1, target_dims) + + if cumulative + pr aml_copy_nd_c(stride: stride, shuffle: true).call(*args) + else + pr aml_copy_nd(stride: stride, shuffle: true).call(*args) + end + pr Return(0) + } +end + +def aml_copy_layout_native(shuffle: false) + dst = Pointer :dst, type: CStruct::new(type_name: :aml_layout, members: {}), dir: :inout + src = Pointer :src, type: CStruct::new(type_name: :aml_layout, members: {}), dir: :in + target_dims = Sizet :target_dims, dim: Dim(), dir: :in + + ddst = Pointer :ddst, type: CStruct::new(type_name: :aml_layout_data, members: {}) + dsrc = Pointer :dsrc, type: CStruct::new(type_name: :aml_layout_data, members: {}) + d = Sizet :d + elem_size = Sizet :elem_size + i = Sizet :i + + src_index = lambda { |d| d } + dst_index = lambda { |d| d } + if shuffle + src_index = lambda { |d| target_dims[d] } + end + + name = name_prefix + "layout_" + name << "transform_" if shuffle + name << "native" + + args = [dst, src] + args += [target_dims] if shuffle + + p = Procedure( name, args, return_type: Int ) { + decl ddst, dsrc + decl d, elem_size + + pr ddst === "#{dst}->data" + pr dsrc === "#{src}->data" + pr d === "#{dsrc}->ndims" + pr assert(d > 0); + + pr elem_size === "#{dsrc}->cpitch[0]" + pr assert(d == "#{ddst}->ndims") + pr assert(elem_size == "#{ddst}->cpitch[0]") + pr For(i, 0, d, operator: '<', declit: true) { + pr assert( "#{dsrc}->dims[#{src_index[i]}] == #{ddst}->dims[#{dst_index[i]}]" ) + } + + args = [] + args += [ d ] + args += [ target_dims ] if shuffle + args += [ "#{ddst}->ptr", "#{ddst}->cpitch", "#{ddst}->stride", + "#{dsrc}->ptr", "#{dsrc}->cpitch", "#{dsrc}->stride", + "#{dsrc}->dims", elem_size ] + pr Return(aml_copy_nd_c(stride: true, shuffle: shuffle).call(*args)) + } +end + +def aml_copy_transpose_native(reverse: false) + dst = Pointer :dst, type: CStruct::new(type_name: :aml_layout, members: {}), dir: :inout + src = Pointer :src, type: CStruct::new(type_name: :aml_layout, members: {}), dir: :in + + target_dims = Sizet :target_dims, dim: Dim() + d = Sizet :d + i = Sizet :i + + name = name_prefix + "layout_" + name << "reverse_" if reverse + name << "transpose_native" + p = Procedure( name, [ dst, src ], return_type: Int ) { + decl d + decl target_dims + + pr d === "#{src}->data->ndims" + pr target_dims === alloca(d * sizeof("size_t")).cast(target_dims) + if reverse + pr target_dims[0] === d - 1 + pr For(i, 1, d, operator: '<', declit: true) { + pr target_dims[i] === i - 1 + } + else + pr target_dims[d - 1] === 0 + pr For(i, 0, d - 1, operator: '<', declit: true) { + pr target_dims[i] === i + 1 + } + end + pr Return( aml_copy_layout_native(shuffle: true).call( dst, src, target_dims) ) + } +end + +pr $aml_compute_cumulative_pitch = aml_compute_cumulative_pitch +pr $aml_copy_nd_helper = aml_copy_nd_helper +pr $aml_copy_nd_c = aml_copy_nd_c +pr $aml_copy_nd = aml_copy_nd +pr $aml_copy_ndstr_helper = aml_copy_nd_helper(stride: true) +pr $aml_copy_ndstr_c = aml_copy_nd_c(stride: true) +pr $aml_copy_ndstr = aml_copy_nd(stride: true) +pr $aml_copy_shnd_helper = aml_copy_nd_helper(shuffle: true) +pr $aml_copy_shnd_c = aml_copy_nd_c(shuffle: true) +pr $aml_copy_shnd = aml_copy_nd(shuffle: true) +pr $aml_copy_tnd = aml_copy_tnd +pr $aml_copy_tnd_c = aml_copy_tnd(cumulative: true) +pr $aml_copy_rtnd = aml_copy_tnd(reverse: true) +pr $aml_copy_rtnd_c = aml_copy_tnd(reverse: true, cumulative: true) +pr $aml_copy_shndstr_helper = aml_copy_nd_helper(stride: true, shuffle: true) +pr $aml_copy_shndstr_c = aml_copy_nd_c(stride: true, shuffle: true) +pr $aml_copy_shndstr = aml_copy_nd(stride: true, shuffle: true) +pr $aml_copy_tndstr = aml_copy_tnd(stride: true) +pr $aml_copy_tndstr_c = aml_copy_tnd(stride: true, cumulative: true) +pr $aml_copy_rtndstr = aml_copy_tnd(reverse: true, stride: true) +pr $aml_copy_rtndstr_c = aml_copy_tnd(reverse: true, stride: true, cumulative: true) +pr $aml_copy_layout_native = aml_copy_layout_native +pr $aml_copy_layout_tranform_native = aml_copy_layout_native(shuffle: true) +pr $aml_copy_transpose_native = aml_copy_transpose_native +pr $aml_copy_transpose_reverse_native = aml_copy_transpose_native(reverse: true) + +stdout0.close + +Process.wait(pid1) +Process.wait(pid2) + From 29bbe34ed62075e0b711d4d1287f23b48eb78317 Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Thu, 20 Dec 2018 16:51:34 -0600 Subject: [PATCH 14/47] Removed useless curly braces also. --- src/copy.c | 94 +++++++++++++++++++---------------------------------- src/copy.rb | 6 ++++ 2 files changed, 40 insertions(+), 60 deletions(-) diff --git a/src/copy.c b/src/copy.c index b0162f0f..0c291628 100644 --- a/src/copy.c +++ b/src/copy.c @@ -26,20 +26,18 @@ static inline void aml_copy_nd_helper(size_t d, void *dst, const size_t * elem_number, size_t elem_size) { - if (d == 1) { + if (d == 1) if (cumul_dst_pitch[0] == elem_size - && cumul_src_pitch[0] == elem_size) { + && cumul_src_pitch[0] == elem_size) memcpy(dst, src, elem_number[0] * elem_size); - } else { - for (size_t i = 0; i < elem_number[0]; i += 1) { + else + for (size_t i = 0; i < elem_number[0]; i += 1) memcpy((void *)((intptr_t) dst + i * cumul_dst_pitch[0]), (void *)((intptr_t) src + i * cumul_src_pitch[0]), elem_size); - } - } - } else { + else for (size_t i = 0; i < elem_number[d - 1]; i += 1) { aml_copy_nd_helper(d - 1, dst, cumul_dst_pitch, src, cumul_src_pitch, elem_number, @@ -47,7 +45,6 @@ static inline void aml_copy_nd_helper(size_t d, void *dst, dst = (void *)((intptr_t) dst + cumul_dst_pitch[d - 1]); src = (void *)((intptr_t) src + cumul_src_pitch[d - 1]); } - } } int aml_copy_nd_c(size_t d, void *dst, const size_t * cumul_dst_pitch, @@ -91,12 +88,12 @@ static inline void aml_copy_ndstr_helper(size_t d, void *dst, const size_t * elem_number, size_t elem_size) { - if (d == 1) { + if (d == 1) if (dst_stride[0] * cumul_dst_pitch[0] == elem_size - && src_stride[0] * cumul_src_pitch[0] == elem_size) { + && src_stride[0] * cumul_src_pitch[0] == elem_size) memcpy(dst, src, elem_number[0] * elem_size); - } else { - for (size_t i = 0; i < elem_number[0]; i += 1) { + else + for (size_t i = 0; i < elem_number[0]; i += 1) memcpy((void *)((intptr_t) dst + i * (dst_stride[0] * cumul_dst_pitch[0])), @@ -104,9 +101,7 @@ static inline void aml_copy_ndstr_helper(size_t d, void *dst, i * (src_stride[0] * cumul_src_pitch[0])), elem_size); - } - } - } else { + else for (size_t i = 0; i < elem_number[d - 1]; i += 1) { aml_copy_ndstr_helper(d - 1, dst, cumul_dst_pitch, dst_stride, src, cumul_src_pitch, @@ -121,7 +116,6 @@ static inline void aml_copy_ndstr_helper(size_t d, void *dst, src_stride[d - 1] * cumul_src_pitch[d - 1]); } - } } int aml_copy_ndstr_c(size_t d, void *dst, const size_t * cumul_dst_pitch, @@ -167,14 +161,14 @@ static inline void aml_copy_shnd_helper(size_t d, const size_t * target_dims, const size_t * elem_number, size_t elem_size) { - if (d == 1) { + if (d == 1) if (cumul_dst_pitch[0] == elem_size - && cumul_src_pitch[target_dims[0]] == elem_size) { + && cumul_src_pitch[target_dims[0]] == elem_size) memcpy(dst, src, elem_number[target_dims[0]] * elem_size); - } else { + else for (size_t i = 0; i < elem_number[target_dims[0]]; - i += 1) { + i += 1) memcpy((void *)((intptr_t) dst + i * cumul_dst_pitch[0]), (void *)((intptr_t) src + @@ -182,9 +176,7 @@ static inline void aml_copy_shnd_helper(size_t d, const size_t * target_dims, cumul_src_pitch[target_dims [0]]), elem_size); - } - } - } else { + else for (size_t i = 0; i < elem_number[target_dims[d - 1]]; i += 1) { aml_copy_shnd_helper(d - 1, target_dims, dst, cumul_dst_pitch, src, @@ -195,7 +187,6 @@ static inline void aml_copy_shnd_helper(size_t d, const size_t * target_dims, (void *)((intptr_t) src + cumul_src_pitch[target_dims[d - 1]]); } - } } int aml_copy_shnd_c(size_t d, const size_t * target_dims, void *dst, @@ -210,9 +201,8 @@ int aml_copy_shnd_c(size_t d, const size_t * target_dims, void *dst, assert(target_dims[i] < d); present_dims |= 1 << target_dims[i]; } - for (size_t i = 0; i < d; i += 1) { + for (size_t i = 0; i < d; i += 1) assert(present_dims & 1 << i); - } for (size_t i = 0; i < d - 1; i += 1) { assert(cumul_dst_pitch[i + 1] >= cumul_dst_pitch[i] * elem_number[target_dims[i]]); @@ -249,9 +239,8 @@ int aml_copy_tnd(size_t d, void *dst, const size_t * dst_pitch, const void *src, size_t *target_dims; target_dims = (size_t *) alloca(d * sizeof(size_t)); target_dims[d - 1] = 0; - for (size_t i = 0; i < d - 1; i += 1) { + for (size_t i = 0; i < d - 1; i += 1) target_dims[i] = i + 1; - } aml_copy_shnd(d, target_dims, dst, dst_pitch, src, src_pitch, elem_number, elem_size); return 0; @@ -265,9 +254,8 @@ int aml_copy_tnd_c(size_t d, void *dst, const size_t * cumul_dst_pitch, size_t *target_dims; target_dims = (size_t *) alloca(d * sizeof(size_t)); target_dims[d - 1] = 0; - for (size_t i = 0; i < d - 1; i += 1) { + for (size_t i = 0; i < d - 1; i += 1) target_dims[i] = i + 1; - } aml_copy_shnd_c(d, target_dims, dst, cumul_dst_pitch, src, cumul_src_pitch, elem_number, elem_size); return 0; @@ -281,9 +269,8 @@ int aml_copy_rtnd(size_t d, void *dst, const size_t * dst_pitch, size_t *target_dims; target_dims = (size_t *) alloca(d * sizeof(size_t)); target_dims[0] = d - 1; - for (size_t i = 1; i < d; i += 1) { + for (size_t i = 1; i < d; i += 1) target_dims[i] = i - 1; - } aml_copy_shnd(d, target_dims, dst, dst_pitch, src, src_pitch, elem_number, elem_size); return 0; @@ -297,9 +284,8 @@ int aml_copy_rtnd_c(size_t d, void *dst, const size_t * cumul_dst_pitch, size_t *target_dims; target_dims = (size_t *) alloca(d * sizeof(size_t)); target_dims[0] = d - 1; - for (size_t i = 1; i < d; i += 1) { + for (size_t i = 1; i < d; i += 1) target_dims[i] = i - 1; - } aml_copy_shnd_c(d, target_dims, dst, cumul_dst_pitch, src, cumul_src_pitch, elem_number, elem_size); return 0; @@ -315,15 +301,15 @@ static inline void aml_copy_shndstr_helper(size_t d, const size_t * target_dims, const size_t * elem_number, size_t elem_size) { - if (d == 1) { + if (d == 1) if (dst_stride[0] * cumul_dst_pitch[0] == elem_size && src_stride[target_dims[0]] * - cumul_src_pitch[target_dims[0]] == elem_size) { + cumul_src_pitch[target_dims[0]] == elem_size) memcpy(dst, src, elem_number[target_dims[0]] * elem_size); - } else { + else for (size_t i = 0; i < elem_number[target_dims[0]]; - i += 1) { + i += 1) memcpy((void *)((intptr_t) dst + i * (dst_stride[0] * cumul_dst_pitch[0])), @@ -333,9 +319,7 @@ static inline void aml_copy_shndstr_helper(size_t d, const size_t * target_dims, cumul_src_pitch[target_dims [0]])), elem_size); - } - } - } else { + else for (size_t i = 0; i < elem_number[target_dims[d - 1]]; i += 1) { aml_copy_shndstr_helper(d - 1, target_dims, dst, cumul_dst_pitch, dst_stride, @@ -351,7 +335,6 @@ static inline void aml_copy_shndstr_helper(size_t d, const size_t * target_dims, src_stride[target_dims[d - 1]] * cumul_src_pitch[target_dims[d - 1]]); } - } } int aml_copy_shndstr_c(size_t d, const size_t * target_dims, void *dst, @@ -368,9 +351,8 @@ int aml_copy_shndstr_c(size_t d, const size_t * target_dims, void *dst, assert(target_dims[i] < d); present_dims |= 1 << target_dims[i]; } - for (size_t i = 0; i < d; i += 1) { + for (size_t i = 0; i < d; i += 1) assert(present_dims & 1 << i); - } for (size_t i = 0; i < d - 1; i += 1) { assert(cumul_dst_pitch[i + 1] >= dst_stride[i] * cumul_dst_pitch[i] * @@ -412,9 +394,8 @@ int aml_copy_tndstr(size_t d, void *dst, const size_t * dst_pitch, size_t *target_dims; target_dims = (size_t *) alloca(d * sizeof(size_t)); target_dims[d - 1] = 0; - for (size_t i = 0; i < d - 1; i += 1) { + for (size_t i = 0; i < d - 1; i += 1) target_dims[i] = i + 1; - } aml_copy_shndstr(d, target_dims, dst, dst_pitch, dst_stride, src, src_pitch, src_stride, elem_number, elem_size); return 0; @@ -429,9 +410,8 @@ int aml_copy_tndstr_c(size_t d, void *dst, const size_t * cumul_dst_pitch, size_t *target_dims; target_dims = (size_t *) alloca(d * sizeof(size_t)); target_dims[d - 1] = 0; - for (size_t i = 0; i < d - 1; i += 1) { + for (size_t i = 0; i < d - 1; i += 1) target_dims[i] = i + 1; - } aml_copy_shndstr_c(d, target_dims, dst, cumul_dst_pitch, dst_stride, src, cumul_src_pitch, src_stride, elem_number, elem_size); @@ -447,9 +427,8 @@ int aml_copy_rtndstr(size_t d, void *dst, const size_t * dst_pitch, size_t *target_dims; target_dims = (size_t *) alloca(d * sizeof(size_t)); target_dims[0] = d - 1; - for (size_t i = 1; i < d; i += 1) { + for (size_t i = 1; i < d; i += 1) target_dims[i] = i - 1; - } aml_copy_shndstr(d, target_dims, dst, dst_pitch, dst_stride, src, src_pitch, src_stride, elem_number, elem_size); return 0; @@ -465,9 +444,8 @@ int aml_copy_rtndstr_c(size_t d, void *dst, const size_t * cumul_dst_pitch, size_t *target_dims; target_dims = (size_t *) alloca(d * sizeof(size_t)); target_dims[0] = d - 1; - for (size_t i = 1; i < d; i += 1) { + for (size_t i = 1; i < d; i += 1) target_dims[i] = i - 1; - } aml_copy_shndstr_c(d, target_dims, dst, cumul_dst_pitch, dst_stride, src, cumul_src_pitch, src_stride, elem_number, elem_size); @@ -487,9 +465,8 @@ int aml_copy_layout_native(struct aml_layout *dst, const struct aml_layout *src) elem_size = dsrc->cpitch[0]; assert(d == ddst->ndims); assert(elem_size == ddst->cpitch[0]); - for (size_t i = 0; i < d; i += 1) { + for (size_t i = 0; i < d; i += 1) assert(dsrc->dims[i] == ddst->dims[i]); - } return aml_copy_ndstr_c(d, ddst->ptr, ddst->cpitch, ddst->stride, dsrc->ptr, dsrc->cpitch, dsrc->stride, dsrc->dims, elem_size); @@ -510,9 +487,8 @@ int aml_copy_layout_transform_native(struct aml_layout *dst, elem_size = dsrc->cpitch[0]; assert(d == ddst->ndims); assert(elem_size == ddst->cpitch[0]); - for (size_t i = 0; i < d; i += 1) { + for (size_t i = 0; i < d; i += 1) assert(dsrc->dims[target_dims[i]] == ddst->dims[i]); - } return aml_copy_shndstr_c(d, target_dims, ddst->ptr, ddst->cpitch, ddst->stride, dsrc->ptr, dsrc->cpitch, dsrc->stride, dsrc->dims, elem_size); @@ -526,9 +502,8 @@ int aml_copy_layout_transpose_native(struct aml_layout *dst, d = src->data->ndims; target_dims = (size_t *) alloca(d * sizeof(size_t)); target_dims[d - 1] = 0; - for (size_t i = 0; i < d - 1; i += 1) { + for (size_t i = 0; i < d - 1; i += 1) target_dims[i] = i + 1; - } return aml_copy_layout_transform_native(dst, src, target_dims); } @@ -540,8 +515,7 @@ int aml_copy_layout_reverse_transpose_native(struct aml_layout *dst, d = src->data->ndims; target_dims = (size_t *) alloca(d * sizeof(size_t)); target_dims[0] = d - 1; - for (size_t i = 1; i < d; i += 1) { + for (size_t i = 1; i < d; i += 1) target_dims[i] = i - 1; - } return aml_copy_layout_transform_native(dst, src, target_dims); } diff --git a/src/copy.rb b/src/copy.rb index 2018b6e7..efba6f02 100644 --- a/src/copy.rb +++ b/src/copy.rb @@ -30,6 +30,12 @@ ast = parser.parse(stdin1.read) stdin1.close + ast.postorder { |n| + n.stmt = n.stmt.stmts.first if n.For? && n.stmt.Block? && n.stmt.stmts.size == 1 + n.then = n.then.stmts.first if n.If? && n.then.Block? && n.then.stmts.size == 1 + n.else = n.else.stmts.first if n.If? && n.else && n.else.Block? && n.else.stmts.size == 1 + } + stdout1.puts < #include From 18906d99a16eac5aaabe233e1057f1fb15c4593e Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Fri, 4 Jan 2019 14:59:29 -0600 Subject: [PATCH 15/47] Fix naming inconsistencies. --- src/copy.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/copy.rb b/src/copy.rb index efba6f02..846ed48e 100644 --- a/src/copy.rb +++ b/src/copy.rb @@ -410,7 +410,7 @@ def aml_copy_layout_native(shuffle: false) } end -def aml_copy_transpose_native(reverse: false) +def aml_copy_layout_transpose_native(reverse: false) dst = Pointer :dst, type: CStruct::new(type_name: :aml_layout, members: {}), dir: :inout src = Pointer :src, type: CStruct::new(type_name: :aml_layout, members: {}), dir: :in @@ -465,8 +465,8 @@ def aml_copy_transpose_native(reverse: false) pr $aml_copy_rtndstr_c = aml_copy_tnd(reverse: true, stride: true, cumulative: true) pr $aml_copy_layout_native = aml_copy_layout_native pr $aml_copy_layout_tranform_native = aml_copy_layout_native(shuffle: true) -pr $aml_copy_transpose_native = aml_copy_transpose_native -pr $aml_copy_transpose_reverse_native = aml_copy_transpose_native(reverse: true) +pr $aml_copy_layout_transpose_native = aml_copy_layout_transpose_native +pr $aml_copy_layout_transpose_reverse_native = aml_copy_layout_transpose_native(reverse: true) stdout0.close From f9c8c2d94d88dc0a865e377cf59482af82f3344e Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Fri, 4 Jan 2019 15:13:51 -0600 Subject: [PATCH 16/47] Automatic enumeration of design space. --- src/copy.c | 120 ++++++++++++++++++++++++++-------------------------- src/copy.rb | 54 ++++++++++++----------- 2 files changed, 90 insertions(+), 84 deletions(-) diff --git a/src/copy.c b/src/copy.c index 0c291628..6e2504be 100644 --- a/src/copy.c +++ b/src/copy.c @@ -231,66 +231,6 @@ int aml_copy_shnd(size_t d, const size_t * target_dims, void *dst, return 0; } -int aml_copy_tnd(size_t d, void *dst, const size_t * dst_pitch, const void *src, - const size_t * src_pitch, const size_t * elem_number, - size_t elem_size) -{ - assert(d > 0); - size_t *target_dims; - target_dims = (size_t *) alloca(d * sizeof(size_t)); - target_dims[d - 1] = 0; - for (size_t i = 0; i < d - 1; i += 1) - target_dims[i] = i + 1; - aml_copy_shnd(d, target_dims, dst, dst_pitch, src, src_pitch, - elem_number, elem_size); - return 0; -} - -int aml_copy_tnd_c(size_t d, void *dst, const size_t * cumul_dst_pitch, - const void *src, const size_t * cumul_src_pitch, - const size_t * elem_number, size_t elem_size) -{ - assert(d > 0); - size_t *target_dims; - target_dims = (size_t *) alloca(d * sizeof(size_t)); - target_dims[d - 1] = 0; - for (size_t i = 0; i < d - 1; i += 1) - target_dims[i] = i + 1; - aml_copy_shnd_c(d, target_dims, dst, cumul_dst_pitch, src, - cumul_src_pitch, elem_number, elem_size); - return 0; -} - -int aml_copy_rtnd(size_t d, void *dst, const size_t * dst_pitch, - const void *src, const size_t * src_pitch, - const size_t * elem_number, size_t elem_size) -{ - assert(d > 0); - size_t *target_dims; - target_dims = (size_t *) alloca(d * sizeof(size_t)); - target_dims[0] = d - 1; - for (size_t i = 1; i < d; i += 1) - target_dims[i] = i - 1; - aml_copy_shnd(d, target_dims, dst, dst_pitch, src, src_pitch, - elem_number, elem_size); - return 0; -} - -int aml_copy_rtnd_c(size_t d, void *dst, const size_t * cumul_dst_pitch, - const void *src, const size_t * cumul_src_pitch, - const size_t * elem_number, size_t elem_size) -{ - assert(d > 0); - size_t *target_dims; - target_dims = (size_t *) alloca(d * sizeof(size_t)); - target_dims[0] = d - 1; - for (size_t i = 1; i < d; i += 1) - target_dims[i] = i - 1; - aml_copy_shnd_c(d, target_dims, dst, cumul_dst_pitch, src, - cumul_src_pitch, elem_number, elem_size); - return 0; -} - static inline void aml_copy_shndstr_helper(size_t d, const size_t * target_dims, void *dst, const size_t * cumul_dst_pitch, @@ -385,6 +325,66 @@ int aml_copy_shndstr(size_t d, const size_t * target_dims, void *dst, return 0; } +int aml_copy_tnd(size_t d, void *dst, const size_t * dst_pitch, const void *src, + const size_t * src_pitch, const size_t * elem_number, + size_t elem_size) +{ + assert(d > 0); + size_t *target_dims; + target_dims = (size_t *) alloca(d * sizeof(size_t)); + target_dims[d - 1] = 0; + for (size_t i = 0; i < d - 1; i += 1) + target_dims[i] = i + 1; + aml_copy_shnd(d, target_dims, dst, dst_pitch, src, src_pitch, + elem_number, elem_size); + return 0; +} + +int aml_copy_tnd_c(size_t d, void *dst, const size_t * cumul_dst_pitch, + const void *src, const size_t * cumul_src_pitch, + const size_t * elem_number, size_t elem_size) +{ + assert(d > 0); + size_t *target_dims; + target_dims = (size_t *) alloca(d * sizeof(size_t)); + target_dims[d - 1] = 0; + for (size_t i = 0; i < d - 1; i += 1) + target_dims[i] = i + 1; + aml_copy_shnd_c(d, target_dims, dst, cumul_dst_pitch, src, + cumul_src_pitch, elem_number, elem_size); + return 0; +} + +int aml_copy_rtnd(size_t d, void *dst, const size_t * dst_pitch, + const void *src, const size_t * src_pitch, + const size_t * elem_number, size_t elem_size) +{ + assert(d > 0); + size_t *target_dims; + target_dims = (size_t *) alloca(d * sizeof(size_t)); + target_dims[0] = d - 1; + for (size_t i = 1; i < d; i += 1) + target_dims[i] = i - 1; + aml_copy_shnd(d, target_dims, dst, dst_pitch, src, src_pitch, + elem_number, elem_size); + return 0; +} + +int aml_copy_rtnd_c(size_t d, void *dst, const size_t * cumul_dst_pitch, + const void *src, const size_t * cumul_src_pitch, + const size_t * elem_number, size_t elem_size) +{ + assert(d > 0); + size_t *target_dims; + target_dims = (size_t *) alloca(d * sizeof(size_t)); + target_dims[0] = d - 1; + for (size_t i = 1; i < d; i += 1) + target_dims[i] = i - 1; + aml_copy_shnd_c(d, target_dims, dst, cumul_dst_pitch, src, + cumul_src_pitch, elem_number, elem_size); + return 0; +} + int aml_copy_tndstr(size_t d, void *dst, const size_t * dst_pitch, const size_t * dst_stride, const void *src, const size_t * src_pitch, const size_t * src_stride, diff --git a/src/copy.rb b/src/copy.rb index 846ed48e..5b7b8317 100644 --- a/src/copy.rb +++ b/src/copy.rb @@ -443,30 +443,36 @@ def aml_copy_layout_transpose_native(reverse: false) end pr $aml_compute_cumulative_pitch = aml_compute_cumulative_pitch -pr $aml_copy_nd_helper = aml_copy_nd_helper -pr $aml_copy_nd_c = aml_copy_nd_c -pr $aml_copy_nd = aml_copy_nd -pr $aml_copy_ndstr_helper = aml_copy_nd_helper(stride: true) -pr $aml_copy_ndstr_c = aml_copy_nd_c(stride: true) -pr $aml_copy_ndstr = aml_copy_nd(stride: true) -pr $aml_copy_shnd_helper = aml_copy_nd_helper(shuffle: true) -pr $aml_copy_shnd_c = aml_copy_nd_c(shuffle: true) -pr $aml_copy_shnd = aml_copy_nd(shuffle: true) -pr $aml_copy_tnd = aml_copy_tnd -pr $aml_copy_tnd_c = aml_copy_tnd(cumulative: true) -pr $aml_copy_rtnd = aml_copy_tnd(reverse: true) -pr $aml_copy_rtnd_c = aml_copy_tnd(reverse: true, cumulative: true) -pr $aml_copy_shndstr_helper = aml_copy_nd_helper(stride: true, shuffle: true) -pr $aml_copy_shndstr_c = aml_copy_nd_c(stride: true, shuffle: true) -pr $aml_copy_shndstr = aml_copy_nd(stride: true, shuffle: true) -pr $aml_copy_tndstr = aml_copy_tnd(stride: true) -pr $aml_copy_tndstr_c = aml_copy_tnd(stride: true, cumulative: true) -pr $aml_copy_rtndstr = aml_copy_tnd(reverse: true, stride: true) -pr $aml_copy_rtndstr_c = aml_copy_tnd(reverse: true, stride: true, cumulative: true) -pr $aml_copy_layout_native = aml_copy_layout_native -pr $aml_copy_layout_tranform_native = aml_copy_layout_native(shuffle: true) -pr $aml_copy_layout_transpose_native = aml_copy_layout_transpose_native -pr $aml_copy_layout_transpose_reverse_native = aml_copy_layout_transpose_native(reverse: true) + +generation_space = BruteForceOptimizer::new( + OptimizationSpace::new( + shuffle: [false, true], + stride: [false, true] + ) +) + +transpose_generation_space = BruteForceOptimizer::new( + OptimizationSpace::new( + stride: [false, true], + reverse: [false, true], + cumulative: [false, true] + ) +) + +generation_space.each { |params| + pr aml_copy_nd_helper(**params) + pr aml_copy_nd_c(**params) + pr aml_copy_nd(**params) +} + +transpose_generation_space.each { |params| + pr aml_copy_tnd(**params) +} + +pr aml_copy_layout_native +pr aml_copy_layout_native(shuffle: true) +pr aml_copy_layout_transpose_native +pr aml_copy_layout_transpose_native(reverse: true) stdout0.close From fea8a1e8a453b2d79196465de18c35aad4286fde Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Fri, 4 Jan 2019 16:30:11 -0600 Subject: [PATCH 17/47] Starting adding support for generic copy operators of layout. --- src/copy.rb | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/src/copy.rb b/src/copy.rb index 5b7b8317..57b22a44 100644 --- a/src/copy.rb +++ b/src/copy.rb @@ -145,7 +145,7 @@ def aml_copy_nd_helper(stride: false, shuffle: false) args += [ dst_stride ] if stride args += [ src, cumul_src_pitch ] args += [ src_stride ] if stride - args += [ elem_number, elem_size ] + args += [ elem_number, elem_size ] effective_dst_pitch = lambda { |d| cumul_dst_pitch[d] } effective_src_pitch = lambda { |d| cumul_src_pitch[d] } @@ -214,7 +214,7 @@ def aml_copy_nd_c(stride: false, shuffle: false) args += [ dst_stride ] if stride args += [ src, cumul_src_pitch] args += [ src_stride ] if stride - args += [ elem_number, elem_size] + args += [ elem_number, elem_size] effective_dst_pitch = lambda { |d| cumul_dst_pitch[d] } effective_src_pitch = lambda { |d| cumul_src_pitch[d] } @@ -277,7 +277,7 @@ def aml_copy_nd(stride: false, shuffle: false) args += [ dst_stride ] if stride args += [ src, src_pitch] args += [ src_stride ] if stride - args += [ elem_number, elem_size] + args += [ elem_number, elem_size] name = name(stride: stride, shuffle: shuffle) @@ -324,7 +324,7 @@ def aml_copy_tnd(reverse: false, stride: false, cumulative: false) args += [ src ] args += cumulative ? [ cumul_src_pitch ] : [ src_pitch ] args += [ src_stride ] if stride - args += [ elem_number, elem_size] + args += [ elem_number, elem_size] target_dims = Sizet :target_dims, dim: Dim(d) i = Sizet :i @@ -360,7 +360,7 @@ def aml_copy_tnd(reverse: false, stride: false, cumulative: false) } end -def aml_copy_layout_native(shuffle: false) +def aml_copy_layout(native: true, shuffle: false) dst = Pointer :dst, type: CStruct::new(type_name: :aml_layout, members: {}), dir: :inout src = Pointer :src, type: CStruct::new(type_name: :aml_layout, members: {}), dir: :in target_dims = Sizet :target_dims, dim: Dim(), dir: :in @@ -379,7 +379,7 @@ def aml_copy_layout_native(shuffle: false) name = name_prefix + "layout_" name << "transform_" if shuffle - name << "native" + name << (native ? "native" : "generic") args = [dst, src] args += [target_dims] if shuffle @@ -410,7 +410,7 @@ def aml_copy_layout_native(shuffle: false) } end -def aml_copy_layout_transpose_native(reverse: false) +def aml_copy_layout_transpose(native: true, reverse: false) dst = Pointer :dst, type: CStruct::new(type_name: :aml_layout, members: {}), dir: :inout src = Pointer :src, type: CStruct::new(type_name: :aml_layout, members: {}), dir: :in @@ -420,7 +420,8 @@ def aml_copy_layout_transpose_native(reverse: false) name = name_prefix + "layout_" name << "reverse_" if reverse - name << "transpose_native" + name << "transpose_" + name << (native ? "native" : "generic") p = Procedure( name, [ dst, src ], return_type: Int ) { decl d decl target_dims @@ -438,7 +439,7 @@ def aml_copy_layout_transpose_native(reverse: false) pr target_dims[i] === i + 1 } end - pr Return( aml_copy_layout_native(shuffle: true).call( dst, src, target_dims) ) + pr Return( aml_copy_layout(native: true, shuffle: true).call( dst, src, target_dims) ) } end @@ -469,10 +470,10 @@ def aml_copy_layout_transpose_native(reverse: false) pr aml_copy_tnd(**params) } -pr aml_copy_layout_native -pr aml_copy_layout_native(shuffle: true) -pr aml_copy_layout_transpose_native -pr aml_copy_layout_transpose_native(reverse: true) +pr aml_copy_layout +pr aml_copy_layout(shuffle: true) +pr aml_copy_layout_transpose +pr aml_copy_layout_transpose(reverse: true) stdout0.close From 35420d1da3f696b0f4a13b4236e5eb37e1e3c40a Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Mon, 7 Jan 2019 11:22:54 -0600 Subject: [PATCH 18/47] Added ndims and element_size methods to layouts. --- src/aml-layout.h | 8 ++++++++ src/layout.c | 14 ++++++++++++++ src/layout_dense.c | 24 ++++++++++++++++++++++++ 3 files changed, 46 insertions(+) diff --git a/src/aml-layout.h b/src/aml-layout.h index edbea842..3b67047b 100644 --- a/src/aml-layout.h +++ b/src/aml-layout.h @@ -53,6 +53,8 @@ struct aml_layout_ops { int (*order)(const struct aml_layout_data *); int (*dims)(const struct aml_layout_data *, va_list dim_ptrs); int (*adims)(const struct aml_layout_data *, size_t *dims); + size_t (*ndims)(const struct aml_layout_data *); + size_t (*element_size)(const struct aml_layout_data *); }; struct aml_layout { @@ -104,6 +106,8 @@ void *aml_layout_aderef(const struct aml_layout *l, const size_t *coords); int aml_layout_order(const struct aml_layout *l); int aml_layout_dims(const struct aml_layout *l, ...); int aml_layout_adims(const struct aml_layout *l, size_t *dims); +size_t aml_layout_ndims(const struct aml_layout *l); +size_t aml_layout_element_size(const struct aml_layout *l); /******************************************************************************* * Dense Layout Operators. @@ -115,6 +119,8 @@ void *aml_layout_column_aderef(const struct aml_layout_data *d, int aml_layout_column_order(const struct aml_layout_data *d); int aml_layout_column_dims(const struct aml_layout_data *d, va_list dims); int aml_layout_column_adims(const struct aml_layout_data *d, size_t *dims); +size_t aml_layout_column_ndims(const struct aml_layout_data *d); +size_t aml_layout_column_elem_size(const struct aml_layout_data *d); extern struct aml_layout_ops aml_layout_column_ops; @@ -124,6 +130,8 @@ void *aml_layout_row_aderef(const struct aml_layout_data *d, int aml_layout_row_order(const struct aml_layout_data *d); int aml_layout_row_dims(const struct aml_layout_data *d, va_list dims); int aml_layout_row_adims(const struct aml_layout_data *d, size_t *dims); +size_t aml_layout_row_ndims(const struct aml_layout_data *d); +size_t aml_layout_row_element_size(const struct aml_layout_data *d); extern struct aml_layout_ops aml_layout_row_ops; #endif diff --git a/src/layout.c b/src/layout.c index 92eb82d0..a8b8a70c 100644 --- a/src/layout.c +++ b/src/layout.c @@ -49,6 +49,20 @@ int aml_layout_adims(const struct aml_layout *layout, size_t *dims) return layout->ops->adims(layout->data, dims); } +size_t aml_layout_ndims(const struct aml_layout *layout) +{ + assert(layout != NULL); + assert(layout->ops != NULL); + return layout->ops->ndims(layout->data); +} + +size_t aml_layout_element_size(const struct aml_layout *layout) +{ + assert(layout != NULL); + assert(layout->ops != NULL); + return layout->ops->element_size(layout->data); +} + /******************************************************************************* * Layout initialization: ******************************************************************************/ diff --git a/src/layout_dense.c b/src/layout_dense.c index 30178474..48128ed8 100644 --- a/src/layout_dense.c +++ b/src/layout_dense.c @@ -58,12 +58,24 @@ int aml_layout_column_adims(const struct aml_layout_data *d, size_t *dims) return 0; } +size_t aml_layout_column_ndims(const struct aml_layout_data *d) +{ + return d->ndims; +} + +size_t aml_layout_column_element_size(const struct aml_layout_data *d) +{ + return d->cpitch[0]; +} + struct aml_layout_ops aml_layout_column_ops = { aml_layout_column_deref, aml_layout_column_aderef, aml_layout_column_order, aml_layout_column_dims, aml_layout_column_adims, + aml_layout_column_ndims, + aml_layout_column_element_size }; @@ -128,11 +140,23 @@ int aml_layout_row_adims(const struct aml_layout_data *d, size_t *dims) return 0; } +size_t aml_layout_row_ndims(const struct aml_layout_data *d) +{ + return d->ndims; +} + +size_t aml_layout_row_element_size(const struct aml_layout_data *d) +{ + return d->cpitch[0]; +} + struct aml_layout_ops aml_layout_row_ops = { aml_layout_row_deref, aml_layout_row_aderef, aml_layout_row_order, aml_layout_row_dims, aml_layout_row_adims, + aml_layout_row_ndims, + aml_layout_row_element_size }; From 5033fe9545955818e772ac88bdc5880a18f1e63c Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Mon, 7 Jan 2019 11:24:38 -0600 Subject: [PATCH 19/47] Added generic copy operators. --- src/aml-copy.h | 10 +++- src/copy.c | 132 +++++++++++++++++++++++++++++++++++++++++++++++-- src/copy.rb | 123 +++++++++++++++++++++++++++++++++++++-------- tests/copy.c | 130 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 370 insertions(+), 25 deletions(-) diff --git a/src/aml-copy.h b/src/aml-copy.h index d7877350..22dd8933 100644 --- a/src/aml-copy.h +++ b/src/aml-copy.h @@ -177,8 +177,16 @@ int aml_copy_layout_native(struct aml_layout *dst, int aml_copy_layout_transform_native(struct aml_layout *dst, const struct aml_layout *src, const size_t *target_dims); +int aml_copy_layout_generic(struct aml_layout *dst, + const struct aml_layout *src); +int aml_copy_layout_transform_generic(struct aml_layout *dst, + const struct aml_layout *src, + const size_t *target_dims); int aml_copy_layout_transpose_native(struct aml_layout *dst, const struct aml_layout *src); int aml_copy_layout_reverse_transpose_native(struct aml_layout *dst, - const struct aml_layout *src); + const struct aml_layout *src); +int aml_copy_layout_transpose_generic(struct aml_layout *dst, const struct aml_layout *src); +int aml_copy_layout_reverse_transpose_generic(struct aml_layout *dst, + const struct aml_layout *src); #endif diff --git a/src/copy.c b/src/copy.c index 6e2504be..ee2eead1 100644 --- a/src/copy.c +++ b/src/copy.c @@ -454,10 +454,10 @@ int aml_copy_rtndstr_c(size_t d, void *dst, const size_t * cumul_dst_pitch, int aml_copy_layout_native(struct aml_layout *dst, const struct aml_layout *src) { - struct aml_layout_data *ddst; - struct aml_layout_data *dsrc; size_t d; size_t elem_size; + struct aml_layout_data *ddst; + struct aml_layout_data *dsrc; ddst = dst->data; dsrc = src->data; d = dsrc->ndims; @@ -476,10 +476,10 @@ int aml_copy_layout_transform_native(struct aml_layout *dst, const struct aml_layout *src, const size_t * target_dims) { - struct aml_layout_data *ddst; - struct aml_layout_data *dsrc; size_t d; size_t elem_size; + struct aml_layout_data *ddst; + struct aml_layout_data *dsrc; ddst = dst->data; dsrc = src->data; d = dsrc->ndims; @@ -519,3 +519,127 @@ int aml_copy_layout_reverse_transpose_native(struct aml_layout *dst, target_dims[i] = i - 1; return aml_copy_layout_transform_native(dst, src, target_dims); } + +static inline void aml_copy_layout_generic_helper(size_t d, + struct aml_layout *dst, + const struct aml_layout *src, + const size_t * elem_number, + size_t elem_size, + size_t * coords) +{ + if (d == 1) + for (size_t i = 0; i < elem_number[0]; i += 1) { + coords[0] = i; + coords[0] = i; + memcpy(aml_layout_aderef(dst, coords), + aml_layout_aderef(src, coords), elem_size); + } else + for (size_t i = 0; i < elem_number[d - 1]; i += 1) { + coords[d - 1] = i; + coords[d - 1] = i; + aml_copy_layout_generic_helper(d - 1, dst, src, + elem_number, elem_size, + coords); + } +} + +static inline void aml_copy_layout_transform_generic_helper(size_t d, + struct aml_layout + *dst, + const struct + aml_layout *src, + const size_t * + elem_number, + size_t elem_size, + size_t * coords, + size_t * coords_out, + const size_t * + target_dims) +{ + if (d == 1) + for (size_t i = 0; i < elem_number[target_dims[0]]; i += 1) { + coords_out[0] = i; + coords[target_dims[0]] = i; + memcpy(aml_layout_aderef(dst, coords_out), + aml_layout_aderef(src, coords), elem_size); + } else + for (size_t i = 0; i < elem_number[target_dims[d - 1]]; i += 1) { + coords_out[d - 1] = i; + coords[target_dims[d - 1]] = i; + aml_copy_layout_transform_generic_helper(d - 1, dst, + src, + elem_number, + elem_size, + coords, + coords_out, + target_dims); + } +} + +int aml_copy_layout_generic(struct aml_layout *dst, + const struct aml_layout *src) +{ + size_t d; + size_t elem_size; + size_t *coords; + size_t *elem_number; + assert(aml_layout_ndims(dst) == aml_layout_ndims(src)); + d = aml_layout_ndims(dst); + assert(aml_layout_element_size(dst) == aml_layout_element_size(src)); + elem_size = aml_layout_element_size(dst); + coords = (size_t *) alloca(d * sizeof(size_t)); + elem_number = (size_t *) alloca(d * sizeof(size_t)); + aml_layout_adims(src, elem_number); + aml_copy_layout_generic_helper(d, dst, src, elem_number, elem_size, + coords); + return 0; +} + +int aml_copy_layout_transform_generic(struct aml_layout *dst, + const struct aml_layout *src, + const size_t * target_dims) +{ + size_t d; + size_t elem_size; + size_t *coords; + size_t *coords_out; + size_t *elem_number; + assert(aml_layout_ndims(dst) == aml_layout_ndims(src)); + d = aml_layout_ndims(dst); + assert(aml_layout_element_size(dst) == aml_layout_element_size(src)); + elem_size = aml_layout_element_size(dst); + coords = (size_t *) alloca(d * sizeof(size_t)); + coords_out = (size_t *) alloca(d * sizeof(size_t)); + elem_number = (size_t *) alloca(d * sizeof(size_t)); + aml_layout_adims(src, elem_number); + aml_copy_layout_transform_generic_helper(d, dst, src, elem_number, + elem_size, coords, coords_out, + target_dims); + return 0; +} + +int aml_copy_layout_transpose_generic(struct aml_layout *dst, + const struct aml_layout *src) +{ + size_t d; + size_t *target_dims; + d = src->data->ndims; + target_dims = (size_t *) alloca(d * sizeof(size_t)); + target_dims[d - 1] = 0; + for (size_t i = 0; i < d - 1; i += 1) + target_dims[i] = i + 1; + return aml_copy_layout_transform_generic(dst, src, target_dims); +} + +int aml_copy_layout_reverse_transpose_generic(struct aml_layout *dst, + const struct aml_layout *src) +{ + size_t d; + size_t *target_dims; + d = src->data->ndims; + target_dims = (size_t *) alloca(d * sizeof(size_t)); + target_dims[0] = d - 1; + for (size_t i = 1; i < d; i += 1) + target_dims[i] = i - 1; + return aml_copy_layout_transform_generic(dst, src, target_dims); +} diff --git a/src/copy.rb b/src/copy.rb index 57b22a44..f2c8fc2f 100644 --- a/src/copy.rb +++ b/src/copy.rb @@ -360,6 +360,57 @@ def aml_copy_tnd(reverse: false, stride: false, cumulative: false) } end +def aml_copy_layout_generic_helper(shuffle: false) + d = Sizet :d + dst = Pointer :dst, type: CStruct::new(type_name: :aml_layout, members: {}), dir: :inout + src = Pointer :src, type: CStruct::new(type_name: :aml_layout, members: {}), dir: :in + elem_number = Sizet :elem_number, dim: Dim(), dir: :in + elem_size = Sizet :elem_size + coords = Sizet :coords, dim: Dim(), dir: :inout + coords_out = Sizet :coords_out, dim: Dim(), dir: :inout + target_dims = Sizet :target_dims, dim: Dim(), dir: :in + + i = Sizet :i + + name = name_prefix + "layout_" + name << "transform_" if shuffle + name << "generic_helper" + + args = [d, dst, src, elem_number, elem_size, coords] + args << coords_out << target_dims if shuffle + + src_index = lambda { |d| d } + dst_index = lambda { |d| d } + elem_index = lambda { |d| d } + if shuffle + elem_index = lambda { |d| target_dims[d] } + src_index = lambda { |d| target_dims[d] } + end + + coord_src = coords + coord_dst = coords + if shuffle + coord_dst = coords_out + end + + p = Procedure( name, args, local: true, inline: true ) { + pr If( d == 1 => lambda { + pr For( i, 0, elem_number[elem_index[0]], operator: '<', declit: true ) { + pr coord_dst[dst_index[0]] === i + pr coord_src[src_index[0]] === i + pr memcpy( FuncCall(:aml_layout_aderef, dst, coord_dst), FuncCall(:aml_layout_aderef, src, coord_src), elem_size ) + } + }, else: lambda { + pr For( i, 0, elem_number[elem_index[d - 1]], operator: '<', declit: true ) { + args[0] = d - 1 + pr coord_dst[dst_index[d - 1]] === i + pr coord_src[src_index[d - 1]] === i + pr p.call(*args) + } + }) + } +end + def aml_copy_layout(native: true, shuffle: false) dst = Pointer :dst, type: CStruct::new(type_name: :aml_layout, members: {}), dir: :inout src = Pointer :src, type: CStruct::new(type_name: :aml_layout, members: {}), dir: :in @@ -382,31 +433,56 @@ def aml_copy_layout(native: true, shuffle: false) name << (native ? "native" : "generic") args = [dst, src] - args += [target_dims] if shuffle + args << target_dims if shuffle p = Procedure( name, args, return_type: Int ) { - decl ddst, dsrc decl d, elem_size - pr ddst === "#{dst}->data" - pr dsrc === "#{src}->data" - pr d === "#{dsrc}->ndims" - pr assert(d > 0); + if native + decl ddst, dsrc - pr elem_size === "#{dsrc}->cpitch[0]" - pr assert(d == "#{ddst}->ndims") - pr assert(elem_size == "#{ddst}->cpitch[0]") - pr For(i, 0, d, operator: '<', declit: true) { - pr assert( "#{dsrc}->dims[#{src_index[i]}] == #{ddst}->dims[#{dst_index[i]}]" ) - } + pr ddst === "#{dst}->data" + pr dsrc === "#{src}->data" + pr d === "#{dsrc}->ndims" + pr assert(d > 0); - args = [] - args += [ d ] - args += [ target_dims ] if shuffle - args += [ "#{ddst}->ptr", "#{ddst}->cpitch", "#{ddst}->stride", - "#{dsrc}->ptr", "#{dsrc}->cpitch", "#{dsrc}->stride", - "#{dsrc}->dims", elem_size ] - pr Return(aml_copy_nd_c(stride: true, shuffle: shuffle).call(*args)) + pr elem_size === "#{dsrc}->cpitch[0]" + pr assert(d == "#{ddst}->ndims") + pr assert(elem_size == "#{ddst}->cpitch[0]") + pr For(i, 0, d, operator: '<', declit: true) { + pr assert( "#{dsrc}->dims[#{src_index[i]}] == #{ddst}->dims[#{dst_index[i]}]" ) + } + + args = [] + args += [ d ] + args += [ target_dims ] if shuffle + args += [ "#{ddst}->ptr", "#{ddst}->cpitch", "#{ddst}->stride", + "#{dsrc}->ptr", "#{dsrc}->cpitch", "#{dsrc}->stride", + "#{dsrc}->dims", elem_size ] + pr Return(aml_copy_nd_c(stride: true, shuffle: shuffle).call(*args)) + else + coords = Sizet :coords, dim: Dim() + coords_out = Sizet :coords_out, dim: Dim() + elem_number = Sizet :elem_number, dim: Dim() + decl coords + decl coords_out if shuffle + decl elem_number + + pr assert( FuncCall( :aml_layout_ndims, dst ) == FuncCall( :aml_layout_ndims, src ) ) + pr d === FuncCall( :aml_layout_ndims, dst ) + pr assert( FuncCall( :aml_layout_element_size, dst ) == FuncCall( :aml_layout_element_size, src ) ) + pr elem_size === FuncCall( :aml_layout_element_size, dst ) + pr coords === alloca(d * sizeof("size_t")).cast(coords) + pr coords_out === alloca(d * sizeof("size_t")).cast(coords_out) if shuffle + pr elem_number === alloca(d * sizeof("size_t")).cast(elem_number) + pr FuncCall( :aml_layout_adims, src, elem_number ) + + new_args = [d, dst, src, elem_number, elem_size, coords] + new_args << coords_out << target_dims if shuffle + + pr aml_copy_layout_generic_helper(shuffle: shuffle).call(*new_args) + pr Return(0) + end } end @@ -439,7 +515,7 @@ def aml_copy_layout_transpose(native: true, reverse: false) pr target_dims[i] === i + 1 } end - pr Return( aml_copy_layout(native: true, shuffle: true).call( dst, src, target_dims) ) + pr Return( aml_copy_layout(native: native, shuffle: true).call( dst, src, target_dims) ) } end @@ -475,6 +551,13 @@ def aml_copy_layout_transpose(native: true, reverse: false) pr aml_copy_layout_transpose pr aml_copy_layout_transpose(reverse: true) +pr aml_copy_layout_generic_helper(shuffle: false) +pr aml_copy_layout_generic_helper(shuffle: true) +pr aml_copy_layout(native: false) +pr aml_copy_layout(native: false, shuffle: true) +pr aml_copy_layout_transpose(native: false) +pr aml_copy_layout_transpose(native: false, reverse: true) + stdout0.close Process.wait(pid1) diff --git a/tests/copy.c b/tests/copy.c index c85b6e39..c8499fa8 100644 --- a/tests/copy.c +++ b/tests/copy.c @@ -903,6 +903,65 @@ void test_copy_layout(void) } +void test_copy_layout_generic(void) +{ + size_t elem_number[3] = { 5, 3, 2 }; + size_t c_src_pitch[3] = { 10, 6, 4 }; + size_t src_stride[3] = { 1, 1, 1}; + size_t c_dst_pitch[3] = { 5, 3, 2 }; + size_t dst_stride[3] = { 1, 1, 1}; + + double src[4][6][10]; + double dst[2][3][5]; + double dst2[4][6][10]; + + double ref_dst2[4][6][10]; + double ref_dst[2][3][5]; + + AML_LAYOUT_DECL(src_layout, 3); + AML_LAYOUT_DECL(dst_layout, 3); + AML_LAYOUT_DECL(dst2_layout, 3); + + aml_layout_ainit(&src_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)src, sizeof(double), 3, elem_number, + src_stride, c_src_pitch); + aml_layout_ainit(&dst_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)dst, sizeof(double), 3, elem_number, + dst_stride, c_dst_pitch); + aml_layout_ainit(&dst2_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)dst2, sizeof(double), 3, elem_number, + src_stride, c_src_pitch); + + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) { + src[k][j][i] = + (double)(i + j * 10 + k * 10 * 6); + ref_dst2[k][j][i] = 0.0; + dst2[k][j][i] = 0.0; + } + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) { + dst[k][j][i] = 0.0; + ref_dst[k][j][i] = src[k][j][i]; + ref_dst2[k][j][i] = src[k][j][i]; + } + + aml_copy_layout_generic(&dst_layout, &src_layout); + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst[k][j][i] == dst[k][j][i]); + + aml_copy_layout_generic(&dst2_layout, &dst_layout); + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) + assert(ref_dst2[k][j][i] == dst2[k][j][i]); + +} + void test_transpose_layout(void) { size_t elem_number[4] = { 5, 3, 2, 4 }; @@ -972,6 +1031,75 @@ void test_transpose_layout(void) } +void test_transpose_layout_generic(void) +{ + size_t elem_number[4] = { 5, 3, 2, 4 }; + size_t elem_number2[4] = { 3, 2, 4, 5 }; + size_t c_src_pitch[4] = { 10, 6, 4, 8 }; + size_t src_stride[4] = { 2, 2, 2, 2 }; + size_t c_dst_pitch[4] = { 3, 2, 4, 5 }; + size_t dst_stride[4] = { 1, 1, 1, 1 }; + + double src[8][4][6][10]; + double dst[5][4][2][3]; + double dst2[8][4][6][10]; + + double ref_dst[5][4][2][3]; + double ref_dst2[8][4][6][10]; + + AML_LAYOUT_DECL(src_layout, 4); + AML_LAYOUT_DECL(dst_layout, 4); + AML_LAYOUT_DECL(dst2_layout, 4); + + aml_layout_ainit(&src_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)src, sizeof(double), 4, elem_number, + src_stride, c_src_pitch); + aml_layout_ainit(&dst_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)dst, sizeof(double), 4, elem_number2, + dst_stride, c_dst_pitch); + aml_layout_ainit(&dst2_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)dst2, sizeof(double), 4, elem_number, + src_stride, c_src_pitch); + + for (int l = 0; l < 8; l++) + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) { + src[l][k][j][i] = + (double)(i + j * 10 + k * 10 * 6 + + l * 10 * 6 * 4); + ref_dst2[l][k][j][i] = 0.0; + dst2[l][k][j][i] = 0.0; + } + for (int l = 0; l < 4; l++) + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) { + dst[i][l][k][j] = 0.0; + ref_dst[i][l][k][j] = + src[2 * l][2 * k][2 * j][2 * i]; + ref_dst2[2 * l][2 * k][2 * j][2 * i] = + src[2 * l][2 * k][2 * j][2 * i]; + } + + aml_copy_layout_transpose_generic(&dst_layout, &src_layout); + for (int l = 0; l < 4; l++) + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst[i][l][k][j] == + dst[i][l][k][j]); + + aml_copy_layout_reverse_transpose_generic(&dst2_layout, &dst_layout); + for (int l = 0; l < 8; l++) + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) + assert(ref_dst2[l][k][j][i] == + dst2[l][k][j][i]); + +} + int main(int argc, char *argv[]) { test_copy_2d(); @@ -991,6 +1119,8 @@ int main(int argc, char *argv[]) test_copy_sh4dstr(); test_copy_sh4dstr_c(); test_copy_layout(); + test_copy_layout_generic(); test_transpose_layout(); + test_transpose_layout_generic(); return 0; } From 0d35405f2b49a904095624e124747a9a7eb74cbe Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Fri, 11 Jan 2019 10:05:08 -0600 Subject: [PATCH 20/47] Refactoring. --- src/Makefile.am | 2 +- src/aml-layout-dense.h | 95 +++++++++++++++++++ src/aml-layout.h | 84 ----------------- src/aml.h | 1 + src/copy.c | 32 ++++--- src/copy.rb | 14 ++- src/layout.c | 132 --------------------------- src/layout_dense.c | 202 +++++++++++++++++++++++++++++++++++++---- tests/copy.c | 100 ++++++++++---------- tests/layout.c | 77 +++++++++------- 10 files changed, 406 insertions(+), 333 deletions(-) create mode 100644 src/aml-layout-dense.h diff --git a/src/Makefile.am b/src/Makefile.am index f498a625..1cb59c2a 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -43,7 +43,7 @@ LIBCSOURCES = aml.c area.c arena.c \ $(LAYOUT_CSOURCES) \ copy.c -LIBHSOURCES = aml.h aml-layout.h aml-copy.h +LIBHSOURCES = aml.h aml-layout.h aml-layout-dense.h aml-copy.h libaml_la_SOURCES = $(LIBCSOURCES) $(LIBHSOURCES) libaml_la_LIBADD = -L$(top_srcdir)/jemalloc/lib/ -ljemalloc-aml diff --git a/src/aml-layout-dense.h b/src/aml-layout-dense.h new file mode 100644 index 00000000..8ff68402 --- /dev/null +++ b/src/aml-layout-dense.h @@ -0,0 +1,95 @@ +#ifndef AML_LAYOUT_DENSE_H +#define AML_LAYOUT_DENSE_H 1 + +#include + +/******************************************************************************* + * Native Layout Operators. + ******************************************************************************/ + +/* Layout: describes how a multi-dimensional dense data structure is collapsed + * into a linear (and contiguous) virtual address range. + * "ptr": base pointer of the address range + * "ndims": number of dimensions + * "dims": dimensions, in element size, of the data structure, by order of + * appearance in memory. + * "stride": offset between elements of the same dimension. + * "pitch": distances between two elements of the next dimension (or total + dimension of the layout in this dimension). + * "cpitch": cumulative distances between two elements in the same dimension + * (pitch[0] is the element size in bytes). + */ +struct aml_layout_data_native { + void *ptr; + size_t ndims; + size_t *dims; + size_t *stride; + size_t *pitch; + size_t *cpitch; +}; + +#define AML_LAYOUT_NATIVE_ALLOCSIZE(ndims) (sizeof(struct aml_layout) +\ + sizeof(struct aml_layout_data_native) +\ + ndims * 4 * sizeof(size_t)) + +#define AML_LAYOUT_NATIVE_DECL(name, ndims) \ + size_t __ ##name## _inner_data[ndims * 4]; \ + struct aml_layout_data_native __ ##name## _inner_struct = { \ + NULL, \ + ndims, \ + __ ##name## _inner_data, \ + __ ##name## _inner_data + ndims, \ + __ ##name## _inner_data + 2 * ndims, \ + __ ##name## _inner_data + 3 * ndims, \ + }; \ + struct aml_layout name = { \ + 0, \ + NULL, \ + (struct aml_layout_data *)& __ ##name## _inner_struct, \ + }; + +int aml_layout_native_struct_init(struct aml_layout *l, size_t ndims, + void *data); +int aml_layout_native_ainit(struct aml_layout *l, uint64_t tags, void *ptr, + const size_t element_size, size_t ndims, + const size_t *dims, const size_t *stride, + const size_t *pitch); +int aml_layout_native_vinit(struct aml_layout *l, uint64_t tags, void *ptr, + const size_t element_size, size_t ndims, + va_list data); +int aml_layout_native_init(struct aml_layout *l, uint64_t tags, void *ptr, + const size_t element_size, size_t ndims, ...); +int aml_layout_native_acreate(struct aml_layout **l, uint64_t tags, void *ptr, + const size_t element_size, size_t ndims, + const size_t *dims, const size_t *stride, + const size_t *pitch); +int aml_layout_native_vcreate(struct aml_layout **l, uint64_t tags, void *ptr, + const size_t element_size, size_t ndims, + va_list data); +int aml_layout_native_create(struct aml_layout **l, uint64_t tags, void *ptr, + const size_t element_size, size_t ndims, ...); + +void *aml_layout_column_deref(const struct aml_layout_data *data, + va_list coords); +void *aml_layout_column_aderef(const struct aml_layout_data *data, + const size_t *coords); +int aml_layout_column_order(const struct aml_layout_data *data); +int aml_layout_column_dims(const struct aml_layout_data *data, va_list dims); +int aml_layout_column_adims(const struct aml_layout_data *data, size_t *dims); +size_t aml_layout_column_ndims(const struct aml_layout_data *data); +size_t aml_layout_column_elem_size(const struct aml_layout_data *data); + +extern struct aml_layout_ops aml_layout_column_ops; + +void *aml_layout_row_deref(const struct aml_layout_data *data, va_list coords); +void *aml_layout_row_aderef(const struct aml_layout_data *data, + const size_t *coords); +int aml_layout_row_order(const struct aml_layout_data *data); +int aml_layout_row_dims(const struct aml_layout_data *data, va_list dims); +int aml_layout_row_adims(const struct aml_layout_data *data, size_t *dims); +size_t aml_layout_row_ndims(const struct aml_layout_data *data); +size_t aml_layout_row_element_size(const struct aml_layout_data *data); + +extern struct aml_layout_ops aml_layout_row_ops; + +#endif diff --git a/src/aml-layout.h b/src/aml-layout.h index 3b67047b..e0ebd805 100644 --- a/src/aml-layout.h +++ b/src/aml-layout.h @@ -26,27 +26,6 @@ struct aml_layout_data; #define AML_TYPE_SET(tags, bit) (tags | bit) -/* Layout: describes how a multi-dimensional data structure is collapsed into a - * linear (and contiguous) virtual address range. - * "ptr": base pointer of the address range - * "ndims": number of dimensions - * "dims": dimensions, in element size, of the data structure, by order of - * appearance in memory. - * "stride": offset between elements of the same dimension. - * "pitch": distances between two elements of the next dimension (or total - dimension of the layout in this dimension). - * "cpitch": cumulative distances between two elements in the same dimension - * (pitch[0] is the element size in bytes). - */ -struct aml_layout_data { - void *ptr; - size_t ndims; - size_t *dims; - size_t *stride; - size_t *pitch; - size_t *cpitch; -}; - struct aml_layout_ops { void *(*deref)(const struct aml_layout_data *, va_list coords); void *(*aderef)(const struct aml_layout_data *, const size_t *coords); @@ -63,44 +42,6 @@ struct aml_layout { struct aml_layout_data *data; }; -#define AML_LAYOUT_ALLOCSIZE(ndims) (sizeof(struct aml_layout) +\ - sizeof(struct aml_layout_data) +\ - ndims * 4 * sizeof(size_t)) - -#define AML_LAYOUT_DECL(name, ndims) \ - size_t __ ##name## _inner_data[ndims * 4]; \ - struct aml_layout_data __ ##name## _inner_struct = { \ - NULL, \ - ndims, \ - __ ##name## _inner_data, \ - __ ##name## _inner_data + ndims, \ - __ ##name## _inner_data + 2 * ndims, \ - __ ##name## _inner_data + 3 * ndims, \ - }; \ - struct aml_layout name = { \ - 0, \ - NULL, \ - & __ ##name## _inner_struct, \ - }; - -int aml_layout_struct_init(struct aml_layout *l, size_t ndims, void *data); -int aml_layout_ainit(struct aml_layout *l, uint64_t tags, void *ptr, - const size_t element_size, size_t ndims, - const size_t *dims, const size_t *stride, - const size_t *pitch); -int aml_layout_vinit(struct aml_layout *l, uint64_t tags, void *ptr, - const size_t element_size, size_t ndims, va_list data); -int aml_layout_init(struct aml_layout *l, uint64_t tags, void *ptr, - const size_t element_size, size_t ndims, ...); -int aml_layout_acreate(struct aml_layout **l, uint64_t tags, void *ptr, - const size_t element_size, size_t ndims, - const size_t *dims, const size_t *stride, - const size_t *pitch); -int aml_layout_vcreate(struct aml_layout **l, uint64_t tags, void *ptr, - const size_t element_size, size_t ndims, va_list data); -int aml_layout_create(struct aml_layout **l, uint64_t tags, void *ptr, - const size_t element_size, size_t ndims, ...); - void *aml_layout_deref(const struct aml_layout *l, ...); void *aml_layout_aderef(const struct aml_layout *l, const size_t *coords); int aml_layout_order(const struct aml_layout *l); @@ -109,29 +50,4 @@ int aml_layout_adims(const struct aml_layout *l, size_t *dims); size_t aml_layout_ndims(const struct aml_layout *l); size_t aml_layout_element_size(const struct aml_layout *l); -/******************************************************************************* - * Dense Layout Operators. - ******************************************************************************/ - -void *aml_layout_column_deref(const struct aml_layout_data *d, va_list coords); -void *aml_layout_column_aderef(const struct aml_layout_data *d, - const size_t *coords); -int aml_layout_column_order(const struct aml_layout_data *d); -int aml_layout_column_dims(const struct aml_layout_data *d, va_list dims); -int aml_layout_column_adims(const struct aml_layout_data *d, size_t *dims); -size_t aml_layout_column_ndims(const struct aml_layout_data *d); -size_t aml_layout_column_elem_size(const struct aml_layout_data *d); - -extern struct aml_layout_ops aml_layout_column_ops; - -void *aml_layout_row_deref(const struct aml_layout_data *d, va_list coords); -void *aml_layout_row_aderef(const struct aml_layout_data *d, - const size_t *coords); -int aml_layout_row_order(const struct aml_layout_data *d); -int aml_layout_row_dims(const struct aml_layout_data *d, va_list dims); -int aml_layout_row_adims(const struct aml_layout_data *d, size_t *dims); -size_t aml_layout_row_ndims(const struct aml_layout_data *d); -size_t aml_layout_row_element_size(const struct aml_layout_data *d); - -extern struct aml_layout_ops aml_layout_row_ops; #endif diff --git a/src/aml.h b/src/aml.h index 5a1718bd..d8cd5407 100644 --- a/src/aml.h +++ b/src/aml.h @@ -20,6 +20,7 @@ #endif #include "aml-layout.h" +#include "aml-layout-dense.h" #include "aml-copy.h" /******************************************************************************* * Forward Declarations: diff --git a/src/copy.c b/src/copy.c index ee2eead1..4fcd4d0d 100644 --- a/src/copy.c +++ b/src/copy.c @@ -456,10 +456,10 @@ int aml_copy_layout_native(struct aml_layout *dst, const struct aml_layout *src) { size_t d; size_t elem_size; - struct aml_layout_data *ddst; - struct aml_layout_data *dsrc; - ddst = dst->data; - dsrc = src->data; + struct aml_layout_data_native *ddst; + struct aml_layout_data_native *dsrc; + ddst = (struct aml_layout_data_native *)dst->data; + dsrc = (struct aml_layout_data_native *)src->data; d = dsrc->ndims; assert(d > 0); elem_size = dsrc->cpitch[0]; @@ -478,10 +478,10 @@ int aml_copy_layout_transform_native(struct aml_layout *dst, { size_t d; size_t elem_size; - struct aml_layout_data *ddst; - struct aml_layout_data *dsrc; - ddst = dst->data; - dsrc = src->data; + struct aml_layout_data_native *ddst; + struct aml_layout_data_native *dsrc; + ddst = (struct aml_layout_data_native *)dst->data; + dsrc = (struct aml_layout_data_native *)src->data; d = dsrc->ndims; assert(d > 0); elem_size = dsrc->cpitch[0]; @@ -499,7 +499,9 @@ int aml_copy_layout_transpose_native(struct aml_layout *dst, { size_t d; size_t *target_dims; - d = src->data->ndims; + struct aml_layout_data_native *dsrc; + dsrc = (struct aml_layout_data_native *)src->data; + d = dsrc->ndims; target_dims = (size_t *) alloca(d * sizeof(size_t)); target_dims[d - 1] = 0; for (size_t i = 0; i < d - 1; i += 1) @@ -512,7 +514,9 @@ int aml_copy_layout_reverse_transpose_native(struct aml_layout *dst, { size_t d; size_t *target_dims; - d = src->data->ndims; + struct aml_layout_data_native *dsrc; + dsrc = (struct aml_layout_data_native *)src->data; + d = dsrc->ndims; target_dims = (size_t *) alloca(d * sizeof(size_t)); target_dims[0] = d - 1; for (size_t i = 1; i < d; i += 1) @@ -623,7 +627,9 @@ int aml_copy_layout_transpose_generic(struct aml_layout *dst, { size_t d; size_t *target_dims; - d = src->data->ndims; + struct aml_layout_data_native *dsrc; + dsrc = (struct aml_layout_data_native *)src->data; + d = dsrc->ndims; target_dims = (size_t *) alloca(d * sizeof(size_t)); target_dims[d - 1] = 0; for (size_t i = 0; i < d - 1; i += 1) @@ -636,7 +642,9 @@ int aml_copy_layout_reverse_transpose_generic(struct aml_layout *dst, { size_t d; size_t *target_dims; - d = src->data->ndims; + struct aml_layout_data_native *dsrc; + dsrc = (struct aml_layout_data_native *)src->data; + d = dsrc->ndims; target_dims = (size_t *) alloca(d * sizeof(size_t)); target_dims[0] = d - 1; for (size_t i = 1; i < d; i += 1) diff --git a/src/copy.rb b/src/copy.rb index f2c8fc2f..e38e1b8c 100644 --- a/src/copy.rb +++ b/src/copy.rb @@ -21,6 +21,7 @@ #include #include #include +#include #include #include EOF @@ -416,8 +417,8 @@ def aml_copy_layout(native: true, shuffle: false) src = Pointer :src, type: CStruct::new(type_name: :aml_layout, members: {}), dir: :in target_dims = Sizet :target_dims, dim: Dim(), dir: :in - ddst = Pointer :ddst, type: CStruct::new(type_name: :aml_layout_data, members: {}) - dsrc = Pointer :dsrc, type: CStruct::new(type_name: :aml_layout_data, members: {}) + ddst = Pointer :ddst, type: CStruct::new(type_name: :aml_layout_data_native, members: {}) + dsrc = Pointer :dsrc, type: CStruct::new(type_name: :aml_layout_data_native, members: {}) d = Sizet :d elem_size = Sizet :elem_size i = Sizet :i @@ -441,8 +442,8 @@ def aml_copy_layout(native: true, shuffle: false) if native decl ddst, dsrc - pr ddst === "#{dst}->data" - pr dsrc === "#{src}->data" + pr ddst === "(struct aml_layout_data_native *)#{dst}->data" + pr dsrc === "(struct aml_layout_data_native *)#{src}->data" pr d === "#{dsrc}->ndims" pr assert(d > 0); @@ -490,6 +491,7 @@ def aml_copy_layout_transpose(native: true, reverse: false) dst = Pointer :dst, type: CStruct::new(type_name: :aml_layout, members: {}), dir: :inout src = Pointer :src, type: CStruct::new(type_name: :aml_layout, members: {}), dir: :in + dsrc = Pointer :dsrc, type: CStruct::new(type_name: :aml_layout_data_native, members: {}) target_dims = Sizet :target_dims, dim: Dim() d = Sizet :d i = Sizet :i @@ -501,8 +503,10 @@ def aml_copy_layout_transpose(native: true, reverse: false) p = Procedure( name, [ dst, src ], return_type: Int ) { decl d decl target_dims + decl dsrc - pr d === "#{src}->data->ndims" + pr dsrc === "(struct aml_layout_data_native *)#{src}->data" + pr d === "#{dsrc}->ndims" pr target_dims === alloca(d * sizeof("size_t")).cast(target_dims) if reverse pr target_dims[0] === d - 1 diff --git a/src/layout.c b/src/layout.c index a8b8a70c..7c1aa163 100644 --- a/src/layout.c +++ b/src/layout.c @@ -63,135 +63,3 @@ size_t aml_layout_element_size(const struct aml_layout *layout) return layout->ops->element_size(layout->data); } -/******************************************************************************* - * Layout initialization: - ******************************************************************************/ - -int aml_layout_struct_init(struct aml_layout *layout, - size_t ndims, void *memory) -{ - assert(layout == (struct aml_layout *)memory); - memory = (void *)((uintptr_t)memory + - sizeof(struct aml_layout)); - layout->data = memory; - memory = (void *)((uintptr_t)memory + - sizeof(struct aml_layout_data)); - layout->data->ndims = ndims; - layout->data->dims = (size_t *)memory; - layout->data->stride = layout->data->dims + ndims; - layout->data->pitch = layout->data->stride + ndims; - layout->data->cpitch = layout->data->pitch + ndims; - return 0; -} - -int aml_layout_ainit(struct aml_layout *layout, uint64_t tags, void *ptr, - const size_t element_size, size_t ndims, - const size_t *dims, const size_t *stride, - const size_t *pitch) -{ - assert(layout != NULL); - assert(layout->data != NULL); - struct aml_layout_data *data = layout->data; - assert(data->ndims == ndims); - assert(data->dims); - assert(data->stride); - assert(data->pitch); - assert(data->cpitch); - data->ptr = ptr; - int type = AML_TYPE_GET(tags, AML_TYPE_LAYOUT_ORDER); - if(type == AML_TYPE_LAYOUT_ROW_ORDER) - { - AML_TYPE_SET(layout->tags, AML_TYPE_LAYOUT_ROW_ORDER); - layout->ops = &aml_layout_row_ops; - for(size_t i = 0; i < ndims; i++) - { - data->dims[i] = dims[ndims-i-1]; - data->stride[i] = stride[ndims-i-1]; - data->pitch[i] = pitch[ndims-i-1]; - } - data->cpitch[0] = element_size; - for(size_t i = 1; i < ndims; i++) - data->cpitch[i] = data->cpitch[i-1]*pitch[ndims-i]; - } - else if(type == AML_TYPE_LAYOUT_COLUMN_ORDER) - { - AML_TYPE_SET(layout->tags, AML_TYPE_LAYOUT_COLUMN_ORDER); - layout->ops = &aml_layout_column_ops; - memcpy(data->dims, dims, ndims * sizeof(size_t)); - memcpy(data->stride, stride, ndims * sizeof(size_t)); - memcpy(data->pitch, pitch, ndims * sizeof(size_t)); - /* pitches are only necessary for ndims-1 dimensions. Since we - * store element size as p->cpitch[0], there's still ndims - * elements in the array. - */ - data->cpitch[0] = element_size; - for(size_t i = 1; i < ndims; i++) - data->cpitch[i] = data->cpitch[i-1]*pitch[i-1]; - } - return 0; -} - -int aml_layout_vinit(struct aml_layout *p, uint64_t tags, void *ptr, - const size_t element_size, size_t ndims, va_list ap) -{ - size_t dims[ndims]; - size_t stride[ndims]; - size_t pitch[ndims-1]; - for(size_t i = 0; i < ndims; i++) - dims[i] = va_arg(ap, size_t); - for(size_t i = 0; i < ndims; i++) - stride[i] = va_arg(ap, size_t); - for(size_t i = 0; i < ndims; i++) - pitch[i] = va_arg(ap, size_t); - return aml_layout_ainit(p, tags, ptr, element_size, ndims, dims, stride, - pitch); -} - -int aml_layout_init(struct aml_layout *p, uint64_t tags, void *ptr, - const size_t element_size, size_t ndims, ...) -{ - int err; - va_list ap; - va_start(ap, ndims); - err = aml_layout_vinit(p, tags, ptr, element_size, ndims, ap); - va_end(ap); - return err; -} - -int aml_layout_acreate(struct aml_layout **layout, uint64_t tags, void *ptr, - const size_t element_size, - size_t ndims, const size_t *dims, const size_t *stride, - const size_t *pitch) -{ - assert(ndims > 0); - void *baseptr = calloc(1, AML_LAYOUT_ALLOCSIZE(ndims)); - *layout = (struct aml_layout *)baseptr; - aml_layout_struct_init(*layout, ndims, baseptr); - aml_layout_ainit(*layout, tags, ptr, element_size, ndims, dims, stride, pitch); - return 0; -} - -int aml_layout_vcreate(struct aml_layout **layout, uint64_t tags, void *ptr, - const size_t element_size, size_t ndims, va_list ap) -{ - assert(ndims > 0); - void *baseptr = calloc(1, AML_LAYOUT_ALLOCSIZE(ndims)); - *layout = (struct aml_layout *)baseptr; - aml_layout_struct_init(*layout, ndims, baseptr); - return aml_layout_vinit(*layout, tags, ptr, element_size, ndims, ap); -} - -int aml_layout_create(struct aml_layout **layout, uint64_t tags, void *ptr, - const size_t element_size, size_t ndims, ...) -{ - int err; - va_list ap; - assert(ndims > 0); - void *baseptr = calloc(1, AML_LAYOUT_ALLOCSIZE(ndims)); - *layout = (struct aml_layout *)baseptr; - aml_layout_struct_init(*layout, ndims, baseptr); - va_start(ap, ndims); - err = aml_layout_vinit(*layout, tags, ptr, element_size, ndims, ap); - va_end(ap); - return err; -} diff --git a/src/layout_dense.c b/src/layout_dense.c index 48128ed8..29a7c9c2 100644 --- a/src/layout_dense.c +++ b/src/layout_dense.c @@ -1,11 +1,155 @@ #include +/******************************************************************************* + * Native layout initialization: + ******************************************************************************/ + +int aml_layout_native_struct_init(struct aml_layout *layout, size_t ndims, + void *memory) +{ + struct aml_layout_data_native *dataptr; + + assert(layout == (struct aml_layout *)memory); + memory = (void *)((uintptr_t)memory + + sizeof(struct aml_layout)); + dataptr = memory; + layout->data = memory; + memory = (void *)((uintptr_t)memory + + sizeof(struct aml_layout_data_native)); + dataptr->ndims = ndims; + dataptr->dims = (size_t *)memory; + dataptr->stride = dataptr->dims + ndims; + dataptr->pitch = dataptr->stride + ndims; + dataptr->cpitch = dataptr->pitch + ndims; + return 0; +} + +int aml_layout_native_ainit(struct aml_layout *layout, uint64_t tags, void *ptr, + const size_t element_size, size_t ndims, + const size_t *dims, const size_t *stride, + const size_t *pitch) +{ + assert(layout != NULL); + assert(layout->data != NULL); + struct aml_layout_data_native *data = + (struct aml_layout_data_native *)layout->data; + assert(data->ndims == ndims); + assert(data->dims); + assert(data->stride); + assert(data->pitch); + assert(data->cpitch); + data->ptr = ptr; + int type = AML_TYPE_GET(tags, AML_TYPE_LAYOUT_ORDER); + if(type == AML_TYPE_LAYOUT_ROW_ORDER) + { + AML_TYPE_SET(layout->tags, AML_TYPE_LAYOUT_ROW_ORDER); + layout->ops = &aml_layout_row_ops; + for(size_t i = 0; i < ndims; i++) + { + data->dims[i] = dims[ndims-i-1]; + data->stride[i] = stride[ndims-i-1]; + data->pitch[i] = pitch[ndims-i-1]; + } + data->cpitch[0] = element_size; + for(size_t i = 1; i < ndims; i++) + data->cpitch[i] = data->cpitch[i-1]*pitch[ndims-i]; + } + else if(type == AML_TYPE_LAYOUT_COLUMN_ORDER) + { + AML_TYPE_SET(layout->tags, AML_TYPE_LAYOUT_COLUMN_ORDER); + layout->ops = &aml_layout_column_ops; + memcpy(data->dims, dims, ndims * sizeof(size_t)); + memcpy(data->stride, stride, ndims * sizeof(size_t)); + memcpy(data->pitch, pitch, ndims * sizeof(size_t)); + /* pitches are only necessary for ndims-1 dimensions. Since we + * store element size as p->cpitch[0], there's still ndims + * elements in the array. + */ + data->cpitch[0] = element_size; + for(size_t i = 1; i < ndims; i++) + data->cpitch[i] = data->cpitch[i-1]*pitch[i-1]; + } + return 0; +} + +int aml_layout_native_vinit(struct aml_layout *p, uint64_t tags, void *ptr, + const size_t element_size, size_t ndims, va_list ap) +{ + size_t dims[ndims]; + size_t stride[ndims]; + size_t pitch[ndims-1]; + for(size_t i = 0; i < ndims; i++) + dims[i] = va_arg(ap, size_t); + for(size_t i = 0; i < ndims; i++) + stride[i] = va_arg(ap, size_t); + for(size_t i = 0; i < ndims; i++) + pitch[i] = va_arg(ap, size_t); + return aml_layout_native_ainit(p, tags, ptr, element_size, ndims, dims, + stride, pitch); +} + +int aml_layout_native_init(struct aml_layout *p, uint64_t tags, void *ptr, + const size_t element_size, size_t ndims, ...) +{ + int err; + va_list ap; + va_start(ap, ndims); + err = aml_layout_native_vinit(p, tags, ptr, element_size, ndims, ap); + va_end(ap); + return err; +} + +int aml_layout_native_acreate(struct aml_layout **layout, uint64_t tags, + void *ptr, const size_t element_size, + size_t ndims, const size_t *dims, + const size_t *stride, const size_t *pitch) +{ + assert(ndims > 0); + void *baseptr = calloc(1, AML_LAYOUT_NATIVE_ALLOCSIZE(ndims)); + *layout = (struct aml_layout *)baseptr; + aml_layout_native_struct_init(*layout, ndims, baseptr); + return aml_layout_native_ainit(*layout, tags, ptr, element_size, ndims, + dims, stride, pitch); +} + +int aml_layout_native_vcreate(struct aml_layout **layout, uint64_t tags, + void *ptr, const size_t element_size, + size_t ndims, va_list ap) +{ + assert(ndims > 0); + void *baseptr = calloc(1, AML_LAYOUT_NATIVE_ALLOCSIZE(ndims)); + *layout = (struct aml_layout *)baseptr; + aml_layout_native_struct_init(*layout, ndims, baseptr); + return aml_layout_native_vinit(*layout, tags, ptr, element_size, ndims, + ap); +} + +int aml_layout_native_create(struct aml_layout **layout, uint64_t tags, + void *ptr, const size_t element_size, size_t ndims, + ...) +{ + int err; + va_list ap; + assert(ndims > 0); + void *baseptr = calloc(1, AML_LAYOUT_NATIVE_ALLOCSIZE(ndims)); + *layout = (struct aml_layout *)baseptr; + aml_layout_native_struct_init(*layout, ndims, baseptr); + va_start(ap, ndims); + err = aml_layout_native_vinit(*layout, tags, ptr, element_size, ndims, + ap); + va_end(ap); + return err; +} + /******************************************************************************* * COLUMN OPERATORS: ******************************************************************************/ -void *aml_layout_column_deref(const struct aml_layout_data *d, va_list coords) +void *aml_layout_column_deref(const struct aml_layout_data *data, + va_list coords) { + const struct aml_layout_data_native *d = + (const struct aml_layout_data_native *)data; void *ptr; assert(d != NULL); assert(d->ptr != NULL); @@ -19,8 +163,11 @@ void *aml_layout_column_deref(const struct aml_layout_data *d, va_list coords) return ptr; } -void *aml_layout_column_aderef(const struct aml_layout_data *d, const size_t *coords) +void *aml_layout_column_aderef(const struct aml_layout_data *data, + const size_t *coords) { + const struct aml_layout_data_native *d = + (const struct aml_layout_data_native *)data; void *ptr; assert(d != NULL); assert(d->ptr != NULL); @@ -33,13 +180,15 @@ void *aml_layout_column_aderef(const struct aml_layout_data *d, const size_t *co return ptr; } -int aml_layout_column_order(const struct aml_layout_data *d) +int aml_layout_column_order(const struct aml_layout_data *data) { return AML_TYPE_LAYOUT_COLUMN_ORDER; } -int aml_layout_column_dims(const struct aml_layout_data *d, va_list dims) +int aml_layout_column_dims(const struct aml_layout_data *data, va_list dims) { + const struct aml_layout_data_native *d = + (const struct aml_layout_data_native *)data; assert(d != NULL); for(size_t i = 0; i < d->ndims; i++) { @@ -50,21 +199,27 @@ int aml_layout_column_dims(const struct aml_layout_data *d, va_list dims) return 0; } -int aml_layout_column_adims(const struct aml_layout_data *d, size_t *dims) +int aml_layout_column_adims(const struct aml_layout_data *data, size_t *dims) { + const struct aml_layout_data_native *d = + (const struct aml_layout_data_native *)data; assert(d != NULL); assert(dims != NULL); memcpy((void*)dims, (void*)d->dims, sizeof(size_t)*d->ndims); return 0; } -size_t aml_layout_column_ndims(const struct aml_layout_data *d) +size_t aml_layout_column_ndims(const struct aml_layout_data *data) { + const struct aml_layout_data_native *d = + (const struct aml_layout_data_native *)data; return d->ndims; } -size_t aml_layout_column_element_size(const struct aml_layout_data *d) +size_t aml_layout_column_element_size(const struct aml_layout_data *data) { + const struct aml_layout_data_native *d = + (const struct aml_layout_data_native *)data; return d->cpitch[0]; } @@ -83,8 +238,10 @@ struct aml_layout_ops aml_layout_column_ops = { * ROW OPERATORS: ******************************************************************************/ -void *aml_layout_row_deref(const struct aml_layout_data *d, va_list coords) +void *aml_layout_row_deref(const struct aml_layout_data *data, va_list coords) { + const struct aml_layout_data_native *d = + (const struct aml_layout_data_native *)data; void *ptr; assert(d != NULL); assert(d->ptr != NULL); @@ -93,13 +250,17 @@ void *aml_layout_row_deref(const struct aml_layout_data *d, va_list coords) { size_t c = va_arg(coords, size_t); assert(c < d->dims[d->ndims - i - 1]); - ptr += c*d->cpitch[d->ndims - i - 1]*d->stride[d->ndims - i - 1]; + ptr += c * d->cpitch[d->ndims - i - 1] * + d->stride[d->ndims - i - 1]; } return ptr; } -void *aml_layout_row_aderef(const struct aml_layout_data *d, const size_t *coords) +void *aml_layout_row_aderef(const struct aml_layout_data *data, + const size_t *coords) { + const struct aml_layout_data_native *d = + (const struct aml_layout_data_native *)data; void *ptr; assert(d != NULL); assert(d->ptr != NULL); @@ -108,18 +269,21 @@ void *aml_layout_row_aderef(const struct aml_layout_data *d, const size_t *coord { size_t c = coords[i]; assert(c < d->dims[d->ndims - i - 1]); - ptr += c*d->cpitch[d->ndims - i - 1]*d->stride[d->ndims - i - 1]; + ptr += c * d->cpitch[d->ndims - i - 1] * + d->stride[d->ndims - i - 1]; } return ptr; } -int aml_layout_row_order(const struct aml_layout_data *d) +int aml_layout_row_order(const struct aml_layout_data *data) { return AML_TYPE_LAYOUT_ROW_ORDER; } -int aml_layout_row_dims(const struct aml_layout_data *d, va_list dims) +int aml_layout_row_dims(const struct aml_layout_data *data, va_list dims) { + const struct aml_layout_data_native *d = + (const struct aml_layout_data_native *)data; assert(d != NULL); for(size_t i = 0; i < d->ndims; i++) { @@ -130,8 +294,10 @@ int aml_layout_row_dims(const struct aml_layout_data *d, va_list dims) return 0; } -int aml_layout_row_adims(const struct aml_layout_data *d, size_t *dims) +int aml_layout_row_adims(const struct aml_layout_data *data, size_t *dims) { + const struct aml_layout_data_native *d = + (const struct aml_layout_data_native *)data; assert(d != NULL); for(size_t i = 0; i < d->ndims; i++) { @@ -140,13 +306,17 @@ int aml_layout_row_adims(const struct aml_layout_data *d, size_t *dims) return 0; } -size_t aml_layout_row_ndims(const struct aml_layout_data *d) +size_t aml_layout_row_ndims(const struct aml_layout_data *data) { + const struct aml_layout_data_native *d = + (const struct aml_layout_data_native *)data; return d->ndims; } -size_t aml_layout_row_element_size(const struct aml_layout_data *d) +size_t aml_layout_row_element_size(const struct aml_layout_data *data) { + const struct aml_layout_data_native *d = + (const struct aml_layout_data_native *)data; return d->cpitch[0]; } diff --git a/tests/copy.c b/tests/copy.c index c8499fa8..f9bc6a66 100644 --- a/tests/copy.c +++ b/tests/copy.c @@ -859,19 +859,19 @@ void test_copy_layout(void) double ref_dst2[4][6][10]; double ref_dst[2][3][5]; - AML_LAYOUT_DECL(src_layout, 3); - AML_LAYOUT_DECL(dst_layout, 3); - AML_LAYOUT_DECL(dst2_layout, 3); - - aml_layout_ainit(&src_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, - (void *)src, sizeof(double), 3, elem_number, - src_stride, c_src_pitch); - aml_layout_ainit(&dst_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, - (void *)dst, sizeof(double), 3, elem_number, - dst_stride, c_dst_pitch); - aml_layout_ainit(&dst2_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, - (void *)dst2, sizeof(double), 3, elem_number, - src_stride, c_src_pitch); + AML_LAYOUT_NATIVE_DECL(src_layout, 3); + AML_LAYOUT_NATIVE_DECL(dst_layout, 3); + AML_LAYOUT_NATIVE_DECL(dst2_layout, 3); + + aml_layout_native_ainit(&src_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)src, sizeof(double), 3, elem_number, + src_stride, c_src_pitch); + aml_layout_native_ainit(&dst_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)dst, sizeof(double), 3, elem_number, + dst_stride, c_dst_pitch); + aml_layout_native_ainit(&dst2_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)dst2, sizeof(double), 3, elem_number, + src_stride, c_src_pitch); for (int k = 0; k < 4; k++) for (int j = 0; j < 6; j++) @@ -918,19 +918,19 @@ void test_copy_layout_generic(void) double ref_dst2[4][6][10]; double ref_dst[2][3][5]; - AML_LAYOUT_DECL(src_layout, 3); - AML_LAYOUT_DECL(dst_layout, 3); - AML_LAYOUT_DECL(dst2_layout, 3); + AML_LAYOUT_NATIVE_DECL(src_layout, 3); + AML_LAYOUT_NATIVE_DECL(dst_layout, 3); + AML_LAYOUT_NATIVE_DECL(dst2_layout, 3); - aml_layout_ainit(&src_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, - (void *)src, sizeof(double), 3, elem_number, - src_stride, c_src_pitch); - aml_layout_ainit(&dst_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, - (void *)dst, sizeof(double), 3, elem_number, - dst_stride, c_dst_pitch); - aml_layout_ainit(&dst2_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, - (void *)dst2, sizeof(double), 3, elem_number, - src_stride, c_src_pitch); + aml_layout_native_ainit(&src_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)src, sizeof(double), 3, elem_number, + src_stride, c_src_pitch); + aml_layout_native_ainit(&dst_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)dst, sizeof(double), 3, elem_number, + dst_stride, c_dst_pitch); + aml_layout_native_ainit(&dst2_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)dst2, sizeof(double), 3, elem_number, + src_stride, c_src_pitch); for (int k = 0; k < 4; k++) for (int j = 0; j < 6; j++) @@ -978,19 +978,19 @@ void test_transpose_layout(void) double ref_dst[5][4][2][3]; double ref_dst2[8][4][6][10]; - AML_LAYOUT_DECL(src_layout, 4); - AML_LAYOUT_DECL(dst_layout, 4); - AML_LAYOUT_DECL(dst2_layout, 4); + AML_LAYOUT_NATIVE_DECL(src_layout, 4); + AML_LAYOUT_NATIVE_DECL(dst_layout, 4); + AML_LAYOUT_NATIVE_DECL(dst2_layout, 4); - aml_layout_ainit(&src_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, - (void *)src, sizeof(double), 4, elem_number, - src_stride, c_src_pitch); - aml_layout_ainit(&dst_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, - (void *)dst, sizeof(double), 4, elem_number2, - dst_stride, c_dst_pitch); - aml_layout_ainit(&dst2_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, - (void *)dst2, sizeof(double), 4, elem_number, - src_stride, c_src_pitch); + aml_layout_native_ainit(&src_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)src, sizeof(double), 4, elem_number, + src_stride, c_src_pitch); + aml_layout_native_ainit(&dst_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)dst, sizeof(double), 4, elem_number2, + dst_stride, c_dst_pitch); + aml_layout_native_ainit(&dst2_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)dst2, sizeof(double), 4, elem_number, + src_stride, c_src_pitch); for (int l = 0; l < 8; l++) for (int k = 0; k < 4; k++) @@ -1047,19 +1047,19 @@ void test_transpose_layout_generic(void) double ref_dst[5][4][2][3]; double ref_dst2[8][4][6][10]; - AML_LAYOUT_DECL(src_layout, 4); - AML_LAYOUT_DECL(dst_layout, 4); - AML_LAYOUT_DECL(dst2_layout, 4); - - aml_layout_ainit(&src_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, - (void *)src, sizeof(double), 4, elem_number, - src_stride, c_src_pitch); - aml_layout_ainit(&dst_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, - (void *)dst, sizeof(double), 4, elem_number2, - dst_stride, c_dst_pitch); - aml_layout_ainit(&dst2_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, - (void *)dst2, sizeof(double), 4, elem_number, - src_stride, c_src_pitch); + AML_LAYOUT_NATIVE_DECL(src_layout, 4); + AML_LAYOUT_NATIVE_DECL(dst_layout, 4); + AML_LAYOUT_NATIVE_DECL(dst2_layout, 4); + + aml_layout_native_ainit(&src_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)src, sizeof(double), 4, elem_number, + src_stride, c_src_pitch); + aml_layout_native_ainit(&dst_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)dst, sizeof(double), 4, elem_number2, + dst_stride, c_dst_pitch); + aml_layout_native_ainit(&dst2_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)dst2, sizeof(double), 4, elem_number, + src_stride, c_src_pitch); for (int l = 0; l < 8; l++) for (int k = 0; k < 4; k++) diff --git a/tests/layout.c b/tests/layout.c index fd815240..826feb0d 100644 --- a/tests/layout.c +++ b/tests/layout.c @@ -4,7 +4,7 @@ int main(int argc, char *argv[]) { struct aml_layout *a; - AML_LAYOUT_DECL(b, 5); + AML_LAYOUT_NATIVE_DECL(b, 5); /* padd the dims to the closest multiple of 2 */ float memory[16][12][8][8][4]; @@ -29,27 +29,34 @@ int main(int argc, char *argv[]) aml_init(&argc, &argv); /* initialize column order layouts */ - aml_layout_acreate(&a, AML_TYPE_LAYOUT_COLUMN_ORDER, (void *)memory, - sizeof(float), 5, dims_col, stride_col, pitch_col); - aml_layout_ainit(&b, AML_TYPE_LAYOUT_COLUMN_ORDER, (void *)memory, - sizeof(float), 5, dims_col, stride_col, pitch_col); - - assert( (intptr_t)(a->data->stride) - (intptr_t)(a->data->dims) + aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)memory, sizeof(float), 5, dims_col, + stride_col, pitch_col); + aml_layout_native_ainit(&b, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)memory, sizeof(float), 5, dims_col, + stride_col, pitch_col); + + struct aml_layout_data_native *adataptr; + struct aml_layout_data_native *bdataptr; + + adataptr = (struct aml_layout_data_native *)a->data; + bdataptr = (struct aml_layout_data_native *)b.data; + assert( (intptr_t)(adataptr->stride) - (intptr_t)(adataptr->dims) == 5*sizeof(size_t) ); - assert( (intptr_t)(a->data->pitch) - (intptr_t)(a->data->dims) + assert( (intptr_t)(adataptr->pitch) - (intptr_t)(adataptr->dims) == 10*sizeof(size_t) ); - assert( (intptr_t)(a->data->cpitch) - (intptr_t)(a->data->dims) + assert( (intptr_t)(adataptr->cpitch) - (intptr_t)(adataptr->dims) == 15*sizeof(size_t) ); /* some simple checks */ - assert(!memcmp(a->data->dims, dims, sizeof(size_t)*5)); - assert(!memcmp(a->data->stride, stride, sizeof(size_t)*5)); - assert(!memcmp(a->data->pitch, pitch, sizeof(size_t)*5)); - assert(!memcmp(a->data->cpitch, cpitch, sizeof(size_t)*5)); - assert(!memcmp(b.data->dims, dims, sizeof(size_t)*5)); - assert(!memcmp(b.data->stride, stride, sizeof(size_t)*5)); - assert(!memcmp(b.data->pitch, pitch, sizeof(size_t)*5)); - assert(!memcmp(b.data->cpitch, cpitch, sizeof(size_t)*5)); + assert(!memcmp(adataptr->dims, dims, sizeof(size_t)*5)); + assert(!memcmp(adataptr->stride, stride, sizeof(size_t)*5)); + assert(!memcmp(adataptr->pitch, pitch, sizeof(size_t)*5)); + assert(!memcmp(adataptr->cpitch, cpitch, sizeof(size_t)*5)); + assert(!memcmp(bdataptr->dims, dims, sizeof(size_t)*5)); + assert(!memcmp(bdataptr->stride, stride, sizeof(size_t)*5)); + assert(!memcmp(bdataptr->pitch, pitch, sizeof(size_t)*5)); + assert(!memcmp(bdataptr->cpitch, cpitch, sizeof(size_t)*5)); /* test column major subroutines */ size_t dims_res[5]; @@ -78,27 +85,31 @@ int main(int argc, char *argv[]) free(a); /* initialize row order layouts */ - aml_layout_acreate(&a, AML_TYPE_LAYOUT_ROW_ORDER, (void *)memory, - sizeof(float), 5, dims_row, stride_row, pitch_row); - aml_layout_ainit(&b, AML_TYPE_LAYOUT_ROW_ORDER, (void *)memory, - sizeof(float), 5, dims_row, stride_row, pitch_row); - - assert( (intptr_t)(a->data->stride) - (intptr_t)(a->data->dims) + aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_ROW_ORDER, (void *)memory, + sizeof(float), 5, dims_row, stride_row, + pitch_row); + aml_layout_native_ainit(&b, AML_TYPE_LAYOUT_ROW_ORDER, (void *)memory, + sizeof(float), 5, dims_row, stride_row, + pitch_row); + + adataptr = (struct aml_layout_data_native *)a->data; + bdataptr = (struct aml_layout_data_native *)b.data; + assert( (intptr_t)(adataptr->stride) - (intptr_t)(adataptr->dims) == 5*sizeof(size_t) ); - assert( (intptr_t)(a->data->pitch) - (intptr_t)(a->data->dims) + assert( (intptr_t)(adataptr->pitch) - (intptr_t)(adataptr->dims) == 10*sizeof(size_t) ); - assert( (intptr_t)(a->data->cpitch) - (intptr_t)(a->data->dims) + assert( (intptr_t)(adataptr->cpitch) - (intptr_t)(adataptr->dims) == 15*sizeof(size_t) ); /* some simple checks */ - assert(!memcmp(a->data->dims, dims, sizeof(size_t)*5)); - assert(!memcmp(a->data->stride, stride, sizeof(size_t)*5)); - assert(!memcmp(a->data->pitch, pitch, sizeof(size_t)*5)); - assert(!memcmp(a->data->cpitch, cpitch, sizeof(size_t)*5)); - assert(!memcmp(b.data->dims, dims, sizeof(size_t)*5)); - assert(!memcmp(b.data->stride, stride, sizeof(size_t)*5)); - assert(!memcmp(b.data->pitch, pitch, sizeof(size_t)*5)); - assert(!memcmp(b.data->cpitch, cpitch, sizeof(size_t)*5)); + assert(!memcmp(adataptr->dims, dims, sizeof(size_t)*5)); + assert(!memcmp(adataptr->stride, stride, sizeof(size_t)*5)); + assert(!memcmp(adataptr->pitch, pitch, sizeof(size_t)*5)); + assert(!memcmp(adataptr->cpitch, cpitch, sizeof(size_t)*5)); + assert(!memcmp(bdataptr->dims, dims, sizeof(size_t)*5)); + assert(!memcmp(bdataptr->stride, stride, sizeof(size_t)*5)); + assert(!memcmp(bdataptr->pitch, pitch, sizeof(size_t)*5)); + assert(!memcmp(bdataptr->cpitch, cpitch, sizeof(size_t)*5)); /* test column major subroutines */ size_t coords_test_row[5] = { 5, 4, 3, 2, 1 }; From afd21ac2526ca118cff15a2115d1de123efec942 Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Fri, 11 Jan 2019 16:07:29 -0600 Subject: [PATCH 21/47] Corrected (I hope) the bit set macro... --- src/aml-layout.h | 10 ++++++---- src/layout_dense.c | 7 ++++--- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/aml-layout.h b/src/aml-layout.h index e0ebd805..d2887a8b 100644 --- a/src/aml-layout.h +++ b/src/aml-layout.h @@ -18,12 +18,14 @@ struct aml_layout_data; #define AML_TYPE_LAYOUT_ORDER (1 << 0) #define AML_TYPE_MAX (1 << 1) -#define AML_TYPE_LAYOUT_ROW_ORDER 0 -#define AML_TYPE_LAYOUT_COLUMN_ORDER 1 +#define AML_TYPE_LAYOUT_ROW_ORDER 1 +#define AML_TYPE_LAYOUT_COLUMN_ORDER 0 #define AML_TYPE_GET(tags, bit) (tags & bit) -#define AML_TYPE_CLEAR(tags, bit) (tags & ~bit) -#define AML_TYPE_SET(tags, bit) (tags | bit) +#define AML_TYPE_CLEAR(tags, bit) (tags &= ~bit) +#define AML_TYPE_SET(tags, bit, value) do { \ + AML_TYPE_CLEAR(tags, bit); \ + if(value) tags |= bit;} while(0) struct aml_layout_ops { diff --git a/src/layout_dense.c b/src/layout_dense.c index 29a7c9c2..61bceefd 100644 --- a/src/layout_dense.c +++ b/src/layout_dense.c @@ -42,7 +42,8 @@ int aml_layout_native_ainit(struct aml_layout *layout, uint64_t tags, void *ptr, int type = AML_TYPE_GET(tags, AML_TYPE_LAYOUT_ORDER); if(type == AML_TYPE_LAYOUT_ROW_ORDER) { - AML_TYPE_SET(layout->tags, AML_TYPE_LAYOUT_ROW_ORDER); + AML_TYPE_SET(layout->tags, AML_TYPE_LAYOUT_ORDER, + AML_TYPE_LAYOUT_ROW_ORDER); layout->ops = &aml_layout_row_ops; for(size_t i = 0; i < ndims; i++) { @@ -56,7 +57,8 @@ int aml_layout_native_ainit(struct aml_layout *layout, uint64_t tags, void *ptr, } else if(type == AML_TYPE_LAYOUT_COLUMN_ORDER) { - AML_TYPE_SET(layout->tags, AML_TYPE_LAYOUT_COLUMN_ORDER); + AML_TYPE_SET(layout->tags, AML_TYPE_LAYOUT_ORDER, + AML_TYPE_LAYOUT_COLUMN_ORDER); layout->ops = &aml_layout_column_ops; memcpy(data->dims, dims, ndims * sizeof(size_t)); memcpy(data->stride, stride, ndims * sizeof(size_t)); @@ -233,7 +235,6 @@ struct aml_layout_ops aml_layout_column_ops = { aml_layout_column_element_size }; - /******************************************************************************* * ROW OPERATORS: ******************************************************************************/ From 6ed9d36c984e076826aacd82e9d947f2b1d76139 Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Fri, 11 Jan 2019 16:07:57 -0600 Subject: [PATCH 22/47] Added a padding layout. --- src/Makefile.am | 5 +- src/aml-layout-pad.h | 58 ++++++++ src/aml.h | 1 + src/layout_pad.c | 309 +++++++++++++++++++++++++++++++++++++++++++ tests/copy.c | 52 ++++++++ 5 files changed, 423 insertions(+), 2 deletions(-) create mode 100644 src/aml-layout-pad.h create mode 100644 src/layout_pad.c diff --git a/src/Makefile.am b/src/Makefile.am index 1cb59c2a..8d5e568d 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -11,7 +11,8 @@ AREA_LINUX_CSOURCES = area_linux.c \ AREA_POSIX_CSOURCES = area_posix.c LAYOUT_CSOURCES = layout.c \ - layout_dense.c + layout_dense.c \ + layout_pad.c TILING_CSOURCES = tiling.c \ tiling_1d.c \ @@ -43,7 +44,7 @@ LIBCSOURCES = aml.c area.c arena.c \ $(LAYOUT_CSOURCES) \ copy.c -LIBHSOURCES = aml.h aml-layout.h aml-layout-dense.h aml-copy.h +LIBHSOURCES = aml.h aml-layout.h aml-layout-dense.h aml-layout-pad.h aml-copy.h libaml_la_SOURCES = $(LIBCSOURCES) $(LIBHSOURCES) libaml_la_LIBADD = -L$(top_srcdir)/jemalloc/lib/ -ljemalloc-aml diff --git a/src/aml-layout-pad.h b/src/aml-layout-pad.h new file mode 100644 index 00000000..8aba36a3 --- /dev/null +++ b/src/aml-layout-pad.h @@ -0,0 +1,58 @@ +#ifndef AML_LAYOUT_PAD_H +#define AML_LAYOUT_PAD_H 1 + +#include + +struct aml_layout_data_pad { + struct aml_layout *target; + size_t ndims; + size_t element_size; + size_t *dims; + size_t *target_dims; + void *neutral; +}; + + +#define AML_LAYOUT_PAD_ALLOCSIZE(ndims, neutral_size) ( \ + sizeof(struct aml_layout) + \ + sizeof(struct aml_layout_data_pad) + \ + 2 * ndims * sizeof(size_t) + \ + neutral_size ) + +#define AML_LAYOUT_PAD_DECL(name, ndims, neutral_size) \ + uint8_t __ ##name## _inner_data[ndims * sizeof(size_t) + \ + neutral_size ]; \ + struct aml_layout_data_pad __ ##name## _inner_struct = { \ + NULL, \ + ndims, \ + neutral_size, \ + (size_t *) __ ##name## _inner_data, \ + (size_t *) __ ##name## _inner_data + ndims * sizeof(size_t), \ + (void *) __ ##name## _inner_data + 2 * ndims * sizeof(size_t) \ + }; \ + struct aml_layout name = { \ + 0, \ + NULL, \ + (struct aml_layout_data *)& __ ##name## _inner_struct \ + }; + +int aml_layout_pad_struct_init(struct aml_layout *l, size_t ndims, + size_t element_size, void *data); +int aml_layout_pad_ainit(struct aml_layout *l, uint64_t tags, + struct aml_layout *target, const size_t *dims, + void *neutral); +int aml_layout_pad_vinit(struct aml_layout *l, uint64_t tags, + struct aml_layout *target, va_list data); +int aml_layout_pad_init(struct aml_layout *l, uint64_t tags, + struct aml_layout *target, ...); +int aml_layout_pad_acreate(struct aml_layout **l, uint64_t tags, + struct aml_layout *target, const size_t *dims, + void *neutral); +int aml_layout_pad_vcreate(struct aml_layout **l, uint64_t tags, + struct aml_layout *target, va_list data); +int aml_layout_pad_create(struct aml_layout **l, uint64_t tags, + struct aml_layout *target, ...); + +extern struct aml_layout_ops aml_layout_pad_column_ops; +extern struct aml_layout_ops aml_layout_pad_row_ops; +#endif diff --git a/src/aml.h b/src/aml.h index d8cd5407..a194b5ef 100644 --- a/src/aml.h +++ b/src/aml.h @@ -21,6 +21,7 @@ #include "aml-layout.h" #include "aml-layout-dense.h" +#include "aml-layout-pad.h" #include "aml-copy.h" /******************************************************************************* * Forward Declarations: diff --git a/src/layout_pad.c b/src/layout_pad.c new file mode 100644 index 00000000..19a856d0 --- /dev/null +++ b/src/layout_pad.c @@ -0,0 +1,309 @@ +#include + +int aml_layout_pad_struct_init(struct aml_layout *layout, size_t ndims, + size_t element_size, void *memory) +{ + struct aml_layout_data_pad *dataptr; + + assert(layout == (struct aml_layout *)memory); + memory = (void *)((uintptr_t)memory + + sizeof(struct aml_layout)); + dataptr = memory; + layout->data = memory; + memory = (void *)((uintptr_t)memory + + sizeof(struct aml_layout_data_pad)); + dataptr->target = NULL; + dataptr->ndims = ndims; + dataptr->element_size = element_size; + dataptr->dims = (size_t *)memory; + dataptr->target_dims = (void *)(dataptr->dims + ndims); + dataptr->neutral = (void *)(dataptr->target_dims + ndims); + return 0; +} + +int aml_layout_pad_ainit(struct aml_layout *layout, uint64_t tags, + struct aml_layout *target, const size_t *dims, + void *neutral) +{ + assert(layout != NULL); + assert(layout->data != NULL); + struct aml_layout_data_pad *data = + (struct aml_layout_data_pad *)layout->data; + size_t ndims = aml_layout_ndims(target); + size_t element_size = aml_layout_element_size(target); + assert(data->ndims == ndims); + assert(data->element_size == element_size); + assert(data->dims); + assert(data->target_dims); + assert(data->neutral); + int type = AML_TYPE_GET(tags, AML_TYPE_LAYOUT_ORDER); + if (type == AML_TYPE_LAYOUT_ROW_ORDER) { + AML_TYPE_SET(layout->tags, AML_TYPE_LAYOUT_ORDER, + AML_TYPE_LAYOUT_ROW_ORDER); + layout->ops = &aml_layout_pad_row_ops; + for(size_t i = 0; i < ndims; i++) + data->dims[i] = dims[ndims-i-1]; + } else if (type == AML_TYPE_LAYOUT_COLUMN_ORDER) { + AML_TYPE_SET(layout->tags, AML_TYPE_LAYOUT_ORDER, + AML_TYPE_LAYOUT_COLUMN_ORDER); + layout->ops = &aml_layout_pad_column_ops; + memcpy(data->dims, dims, ndims * sizeof(size_t)); + } + type = aml_layout_order(target); + if(type == AML_TYPE_LAYOUT_ROW_ORDER) { + size_t target_dims[ndims]; + aml_layout_adims(target, target_dims); + for(size_t i = 0; i < ndims; i++) + data->target_dims[i] = target_dims[ndims-i-1]; + } else if (type == AML_TYPE_LAYOUT_COLUMN_ORDER) { + aml_layout_adims(target, data->target_dims); + } + for(size_t i = 0; i < ndims; i++) + assert(data->dims[i] >= data->target_dims[i]); + memcpy(data->neutral, neutral, element_size); + data->target = target; + return 0; +} + +int aml_layout_pad_vinit(struct aml_layout *layout, uint64_t tags, + struct aml_layout *target, va_list ap) +{ + size_t ndims = aml_layout_ndims(target); + size_t dims[ndims]; + for(size_t i = 0; i < ndims; i++) + dims[i] = va_arg(ap, size_t); + void *neutral = va_arg(ap, void *); + return aml_layout_pad_ainit(layout, tags, target, dims, neutral); +} + +int aml_layout_pad_init(struct aml_layout *layout, uint64_t tags, + struct aml_layout *target, ...) +{ + int err; + va_list ap; + va_start(ap, target); + err = aml_layout_pad_vinit(layout, tags, target, ap); + va_end(ap); + return err; +} + +int aml_layout_pad_acreate(struct aml_layout **layout, uint64_t tags, + struct aml_layout *target, const size_t *dims, + void *neutral) +{ + assert(target != NULL); + size_t ndims = aml_layout_ndims(target); + size_t element_size = aml_layout_element_size(target); + void *baseptr = calloc(1, AML_LAYOUT_PAD_ALLOCSIZE(ndims, + element_size)); + *layout = (struct aml_layout *)baseptr; + aml_layout_pad_struct_init(*layout, ndims, element_size, baseptr); + return aml_layout_pad_ainit(*layout, tags, target, dims, neutral); +} + +int aml_layout_pad_vcreate(struct aml_layout **layout, uint64_t tags, + struct aml_layout *target, va_list ap) +{ + assert(target != NULL); + size_t ndims = aml_layout_ndims(target); + size_t element_size = aml_layout_element_size(target); + void *baseptr = calloc(1, AML_LAYOUT_PAD_ALLOCSIZE(ndims, + element_size)); + *layout = (struct aml_layout *)baseptr; + aml_layout_pad_struct_init(*layout, ndims, element_size, baseptr); + return aml_layout_pad_vinit(*layout, tags, target, ap); +} + +int aml_layout_pad_create(struct aml_layout **layout, uint64_t tags, + struct aml_layout *target, ...) +{ + int err; + va_list ap; + assert(target != NULL); + size_t ndims = aml_layout_ndims(target); + size_t element_size = aml_layout_element_size(target); + void *baseptr = calloc(1, AML_LAYOUT_PAD_ALLOCSIZE(ndims, + element_size)); + *layout = (struct aml_layout *)baseptr; + aml_layout_pad_struct_init(*layout, ndims, element_size, baseptr); + va_start(ap, target); + err = aml_layout_pad_vinit(*layout, tags, target, ap); + va_end(ap); + return err; +} + +/******************************************************************************* + * COLUMN OPERATORS: + ******************************************************************************/ + +void *aml_layout_pad_column_aderef(const struct aml_layout_data *data, + const size_t *coords) +{ + const struct aml_layout_data_pad *d = + (const struct aml_layout_data_pad *)data; + assert(d !=NULL); + size_t ndims = d->ndims; + for (int i = 0; i < ndims; i++) { + assert(coords[i] < d->dims[i]); + if(coords[i] >= d->target_dims[i]) + return d->neutral; + } + int type = aml_layout_order(d->target); + if (type == AML_TYPE_LAYOUT_COLUMN_ORDER) + return aml_layout_aderef(d->target, coords); + else { + size_t target_coords[ndims]; + for (int i = 0; i < ndims; i++) + target_coords[i] = coords[ndims - i - 1]; + return aml_layout_aderef(d->target, coords); + } +} + +void *aml_layout_pad_column_deref(const struct aml_layout_data *data, + va_list coords) +{ + const struct aml_layout_data_pad *d = + (const struct aml_layout_data_pad *)data; + assert(d !=NULL); + size_t ndims = d->ndims; + size_t target_coords[d->ndims]; + for (int i = 0; i < ndims; i++) + target_coords[i] = va_arg(coords, size_t); + return aml_layout_pad_column_aderef(data, target_coords); +} + +int aml_layout_pad_column_order(const struct aml_layout_data *data) +{ + return AML_TYPE_LAYOUT_COLUMN_ORDER; +} + +int aml_layout_pad_column_dims(const struct aml_layout_data *data, va_list dims) +{ + const struct aml_layout_data_pad *d = + (const struct aml_layout_data_pad *)data; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) + { + size_t *dim = va_arg(dims, size_t*); + assert(dim != NULL); + *dim = d->dims[i]; + } + return 0; +} + +int aml_layout_pad_column_adims(const struct aml_layout_data *data, + size_t *dims) +{ + const struct aml_layout_data_pad *d = + (const struct aml_layout_data_pad *)data; + assert(d != NULL); + assert(dims != NULL); + memcpy((void*)dims, (void*)d->dims, sizeof(size_t)*d->ndims); + return 0; +} + +size_t aml_layout_pad_ndims(const struct aml_layout_data *data) +{ + const struct aml_layout_data_pad *d = + (const struct aml_layout_data_pad *)data; + return d->ndims; +} + +size_t aml_layout_pad_element_size(const struct aml_layout_data *data) +{ + const struct aml_layout_data_pad *d = + (const struct aml_layout_data_pad *)data; + return d->element_size; +} + +struct aml_layout_ops aml_layout_pad_column_ops = { + aml_layout_pad_column_deref, + aml_layout_pad_column_aderef, + aml_layout_pad_column_order, + aml_layout_pad_column_dims, + aml_layout_pad_column_adims, + aml_layout_pad_ndims, + aml_layout_pad_element_size +}; + +/******************************************************************************* + * ROW OPERATORS: + ******************************************************************************/ + +void *aml_layout_pad_row_aderef(const struct aml_layout_data *data, + const size_t *coords) +{ + const struct aml_layout_data_pad *d = + (const struct aml_layout_data_pad *)data; + assert(d !=NULL); + size_t ndims = d->ndims; + for (int i = 0; i < ndims; i++) { + assert(coords[ndims - i - 1] < d->dims[i]); + if(coords[ndims - i - 1] >= d->target_dims[i]) + return d->neutral; + } + int type = aml_layout_order(d->target); + if (type == AML_TYPE_LAYOUT_ROW_ORDER) + return aml_layout_aderef(d->target, coords); + else { + size_t target_coords[ndims]; + for (int i = 0; i < ndims; i++) + target_coords[i] = coords[ndims - i - 1]; + return aml_layout_aderef(d->target, coords); + } +} + +void *aml_layout_pad_row_deref(const struct aml_layout_data *data, + va_list coords) +{ + const struct aml_layout_data_pad *d = + (const struct aml_layout_data_pad *)data; + assert(d !=NULL); + size_t ndims = d->ndims; + size_t target_coords[d->ndims]; + for (int i = 0; i < ndims; i++) + target_coords[i] = va_arg(coords, size_t); + return aml_layout_pad_row_aderef(data, target_coords); +} + +int aml_layout_pad_row_order(const struct aml_layout_data *data) +{ + return AML_TYPE_LAYOUT_ROW_ORDER; +} + +int aml_layout_pad_row_dims(const struct aml_layout_data *data, va_list dims) +{ + const struct aml_layout_data_pad *d = + (const struct aml_layout_data_pad *)data; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) + { + size_t *dim = va_arg(dims, size_t*); + assert(dim != NULL); + *dim = d->dims[d->ndims - i - 1]; + } + return 0; +} + +int aml_layout_pad_row_adims(const struct aml_layout_data *data, size_t *dims) +{ + const struct aml_layout_data_pad *d = + (const struct aml_layout_data_pad *)data; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) + { + dims[i] = d->dims[d->ndims - i - 1]; + } + return 0; +} + +struct aml_layout_ops aml_layout_pad_row_ops = { + aml_layout_pad_row_deref, + aml_layout_pad_row_aderef, + aml_layout_pad_row_order, + aml_layout_pad_row_dims, + aml_layout_pad_row_adims, + aml_layout_pad_ndims, + aml_layout_pad_element_size +}; + diff --git a/tests/copy.c b/tests/copy.c index f9bc6a66..854719fa 100644 --- a/tests/copy.c +++ b/tests/copy.c @@ -962,6 +962,57 @@ void test_copy_layout_generic(void) } +void test_copy_layout_pad_generic(void) +{ + size_t elem_number[3] = { 5, 3, 2 }; + size_t src_pitch[3] = { 10, 6, 4 }; + size_t src_stride[3] = { 1, 1, 1}; + + size_t elem_number2[3] = { 7, 3, 4 }; + + double src[4][6][10]; + double dst[4][6][10]; + double dst_ref[4][6][10]; + + + + AML_LAYOUT_NATIVE_DECL(src_layout, 3); + AML_LAYOUT_NATIVE_DECL(dst_layout, 3); + AML_LAYOUT_PAD_DECL(src_pad, 3, sizeof(double)); + + aml_layout_native_ainit(&src_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)src, sizeof(double), 3, elem_number, + src_stride, src_pitch); + aml_layout_native_ainit(&dst_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)dst, sizeof(double), 3, elem_number2, + src_stride, src_pitch); + + double neutral = 1337.0; + aml_layout_pad_ainit(&src_pad, AML_TYPE_LAYOUT_COLUMN_ORDER, + &src_layout, elem_number2, (void*)&neutral); + + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) { + src[k][j][i] = + (double)(i + j * 10 + k * 10 * 6); + dst[k][j][i] = 0.0; + dst_ref[k][j][i] = 0.0; + } + for (int k = 0; k < 4; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 7; i++) + dst_ref[k][j][i] = 1337.0; + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + dst_ref[k][j][i] = src[k][j][i]; + aml_copy_layout_generic(&dst_layout, &src_pad); + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) + assert(dst_ref[k][j][i] == dst[k][j][i]); +} void test_transpose_layout(void) { size_t elem_number[4] = { 5, 3, 2, 4 }; @@ -1120,6 +1171,7 @@ int main(int argc, char *argv[]) test_copy_sh4dstr_c(); test_copy_layout(); test_copy_layout_generic(); + test_copy_layout_pad_generic(); test_transpose_layout(); test_transpose_layout_generic(); return 0; From 8da06a1b5920c3210e898c05ea3baf4749ac7871 Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Fri, 11 Jan 2019 18:12:46 -0600 Subject: [PATCH 23/47] Bugfix. --- src/aml-layout-pad.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/aml-layout-pad.h b/src/aml-layout-pad.h index 8aba36a3..6705375f 100644 --- a/src/aml-layout-pad.h +++ b/src/aml-layout-pad.h @@ -20,15 +20,15 @@ struct aml_layout_data_pad { neutral_size ) #define AML_LAYOUT_PAD_DECL(name, ndims, neutral_size) \ - uint8_t __ ##name## _inner_data[ndims * sizeof(size_t) + \ + uint8_t __ ##name## _inner_data[2 * ndims * sizeof(size_t) + \ neutral_size ]; \ struct aml_layout_data_pad __ ##name## _inner_struct = { \ NULL, \ ndims, \ neutral_size, \ (size_t *) __ ##name## _inner_data, \ - (size_t *) __ ##name## _inner_data + ndims * sizeof(size_t), \ - (void *) __ ##name## _inner_data + 2 * ndims * sizeof(size_t) \ + (size_t *) (__ ##name## _inner_data + ndims * sizeof(size_t)), \ + (void *) (__ ##name## _inner_data + 2 * ndims * sizeof(size_t)) \ }; \ struct aml_layout name = { \ 0, \ From 997b420f962a7a11b374bcc342689f6a54a1f3e9 Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Sat, 12 Jan 2019 10:59:53 -0600 Subject: [PATCH 24/47] More rigorous asserts. --- src/layout_pad.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/layout_pad.c b/src/layout_pad.c index 19a856d0..5eb58c62 100644 --- a/src/layout_pad.c +++ b/src/layout_pad.c @@ -143,8 +143,9 @@ void *aml_layout_pad_column_aderef(const struct aml_layout_data *data, (const struct aml_layout_data_pad *)data; assert(d !=NULL); size_t ndims = d->ndims; - for (int i = 0; i < ndims; i++) { + for (int i = 0; i < ndims; i++) assert(coords[i] < d->dims[i]); + for (int i = 0; i < ndims; i++) { if(coords[i] >= d->target_dims[i]) return d->neutral; } @@ -237,8 +238,9 @@ void *aml_layout_pad_row_aderef(const struct aml_layout_data *data, (const struct aml_layout_data_pad *)data; assert(d !=NULL); size_t ndims = d->ndims; - for (int i = 0; i < ndims; i++) { + for (int i = 0; i < ndims; i++) assert(coords[ndims - i - 1] < d->dims[i]); + for (int i = 0; i < ndims; i++) { if(coords[ndims - i - 1] >= d->target_dims[i]) return d->neutral; } From a249a8efb7d43d1337391d2ecc8cdfb2d0e9b5ac Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Tue, 15 Jan 2019 14:50:51 -0600 Subject: [PATCH 25/47] Added reshape operation for dense layouts. --- src/aml-layout-dense.h | 22 +---- src/aml-layout.h | 8 ++ src/layout.c | 24 ++++++ src/layout_dense.c | 177 +++++++++++++++++++++++++++++++++++++-- src/layout_pad.c | 8 +- tests/layout.c | 182 +++++++++++++++++++++++++++++++++++++++-- 6 files changed, 381 insertions(+), 40 deletions(-) diff --git a/src/aml-layout-dense.h b/src/aml-layout-dense.h index 8ff68402..62fffc70 100644 --- a/src/aml-layout-dense.h +++ b/src/aml-layout-dense.h @@ -30,7 +30,7 @@ struct aml_layout_data_native { #define AML_LAYOUT_NATIVE_ALLOCSIZE(ndims) (sizeof(struct aml_layout) +\ sizeof(struct aml_layout_data_native) +\ - ndims * 4 * sizeof(size_t)) + (ndims * 4 + 1) * sizeof(size_t)) #define AML_LAYOUT_NATIVE_DECL(name, ndims) \ size_t __ ##name## _inner_data[ndims * 4]; \ @@ -69,27 +69,7 @@ int aml_layout_native_vcreate(struct aml_layout **l, uint64_t tags, void *ptr, int aml_layout_native_create(struct aml_layout **l, uint64_t tags, void *ptr, const size_t element_size, size_t ndims, ...); -void *aml_layout_column_deref(const struct aml_layout_data *data, - va_list coords); -void *aml_layout_column_aderef(const struct aml_layout_data *data, - const size_t *coords); -int aml_layout_column_order(const struct aml_layout_data *data); -int aml_layout_column_dims(const struct aml_layout_data *data, va_list dims); -int aml_layout_column_adims(const struct aml_layout_data *data, size_t *dims); -size_t aml_layout_column_ndims(const struct aml_layout_data *data); -size_t aml_layout_column_elem_size(const struct aml_layout_data *data); - extern struct aml_layout_ops aml_layout_column_ops; - -void *aml_layout_row_deref(const struct aml_layout_data *data, va_list coords); -void *aml_layout_row_aderef(const struct aml_layout_data *data, - const size_t *coords); -int aml_layout_row_order(const struct aml_layout_data *data); -int aml_layout_row_dims(const struct aml_layout_data *data, va_list dims); -int aml_layout_row_adims(const struct aml_layout_data *data, size_t *dims); -size_t aml_layout_row_ndims(const struct aml_layout_data *data); -size_t aml_layout_row_element_size(const struct aml_layout_data *data); - extern struct aml_layout_ops aml_layout_row_ops; #endif diff --git a/src/aml-layout.h b/src/aml-layout.h index d2887a8b..5a383da2 100644 --- a/src/aml-layout.h +++ b/src/aml-layout.h @@ -36,6 +36,10 @@ struct aml_layout_ops { int (*adims)(const struct aml_layout_data *, size_t *dims); size_t (*ndims)(const struct aml_layout_data *); size_t (*element_size)(const struct aml_layout_data *); + struct aml_layout * (*reshape)(const struct aml_layout_data *, + size_t ndims, va_list dims); + struct aml_layout * (*areshape)(const struct aml_layout_data *, + size_t ndims, const size_t *dims); }; struct aml_layout { @@ -51,5 +55,9 @@ int aml_layout_dims(const struct aml_layout *l, ...); int aml_layout_adims(const struct aml_layout *l, size_t *dims); size_t aml_layout_ndims(const struct aml_layout *l); size_t aml_layout_element_size(const struct aml_layout *l); +struct aml_layout * aml_layout_areshape(const struct aml_layout *l, + size_t ndims, const size_t *dims); +struct aml_layout * aml_layout_reshape(const struct aml_layout *l, + size_t ndims, ...); #endif diff --git a/src/layout.c b/src/layout.c index 7c1aa163..9a21da26 100644 --- a/src/layout.c +++ b/src/layout.c @@ -63,3 +63,27 @@ size_t aml_layout_element_size(const struct aml_layout *layout) return layout->ops->element_size(layout->data); } +struct aml_layout * aml_layout_areshape(const struct aml_layout *layout, + size_t ndims, const size_t *dims) +{ + assert(ndims != 0); + assert(layout != NULL); + assert(layout->ops != NULL); + assert(layout->ops->areshape != NULL); + return layout->ops->areshape(layout->data, ndims, dims); +} + +struct aml_layout * aml_layout_reshape(const struct aml_layout *layout, + size_t ndims, ...) +{ + assert(ndims != 0); + assert(layout != NULL); + assert(layout->ops != NULL); + assert(layout->ops->reshape != NULL); + va_list ap; + struct aml_layout *ret; + va_start(ap, ndims); + ret = layout->ops->reshape(layout->data, ndims, ap); + va_end(ap); + return ret; +} diff --git a/src/layout_dense.c b/src/layout_dense.c index 61bceefd..60ff4f23 100644 --- a/src/layout_dense.c +++ b/src/layout_dense.c @@ -24,6 +24,23 @@ int aml_layout_native_struct_init(struct aml_layout *layout, size_t ndims, return 0; } +static +int aml_layout_native_ainit_cpitch(struct aml_layout *layout, + uint64_t tags, void *ptr, size_t ndims, + const size_t *dims, const size_t *stride, + const size_t *cpitch) +{ + struct aml_layout_data_native *data = + (struct aml_layout_data_native *)layout->data; + layout->tags = tags; + data->ptr = ptr; + memcpy(data->dims, dims, ndims * sizeof(size_t)); + memcpy(data->stride, stride, ndims * sizeof(size_t)); + memset(data->pitch, 0, ndims * sizeof(size_t)); + memcpy(data->cpitch, cpitch, (ndims + 1) * sizeof(size_t)); + return 0; +} + int aml_layout_native_ainit(struct aml_layout *layout, uint64_t tags, void *ptr, const size_t element_size, size_t ndims, const size_t *dims, const size_t *stride, @@ -52,7 +69,7 @@ int aml_layout_native_ainit(struct aml_layout *layout, uint64_t tags, void *ptr, data->pitch[i] = pitch[ndims-i-1]; } data->cpitch[0] = element_size; - for(size_t i = 1; i < ndims; i++) + for(size_t i = 1; i <= ndims; i++) data->cpitch[i] = data->cpitch[i-1]*pitch[ndims-i]; } else if(type == AML_TYPE_LAYOUT_COLUMN_ORDER) @@ -63,12 +80,8 @@ int aml_layout_native_ainit(struct aml_layout *layout, uint64_t tags, void *ptr, memcpy(data->dims, dims, ndims * sizeof(size_t)); memcpy(data->stride, stride, ndims * sizeof(size_t)); memcpy(data->pitch, pitch, ndims * sizeof(size_t)); - /* pitches are only necessary for ndims-1 dimensions. Since we - * store element size as p->cpitch[0], there's still ndims - * elements in the array. - */ data->cpitch[0] = element_size; - for(size_t i = 1; i < ndims; i++) + for(size_t i = 1; i <= ndims; i++) data->cpitch[i] = data->cpitch[i-1]*pitch[i-1]; } return 0; @@ -225,6 +238,105 @@ size_t aml_layout_column_element_size(const struct aml_layout_data *data) return d->cpitch[0]; } +static void merge_dims(size_t ndims, + const size_t *dims, const size_t *stride, + const size_t *cpitch, size_t *new_ndims, + size_t *new_dims, size_t *new_stride, + size_t *new_cpitch) +{ + size_t dim_index = 0; + size_t new_dim_index = 0; + new_dims[new_dim_index] = dims[dim_index]; + new_cpitch[new_dim_index] = cpitch[dim_index]; + new_stride[new_dim_index] = stride[dim_index]; + for (; dim_index < ndims - 1; dim_index++) { + if (dims[dim_index] * stride[dim_index] * cpitch[dim_index] == + cpitch[dim_index + 1] && stride[dim_index + 1] == 1) { + new_dims[new_dim_index] *= dims[dim_index + 1]; + } else { + new_dim_index++; + new_dims[new_dim_index] = dims[dim_index + 1]; + new_cpitch[new_dim_index] = cpitch[dim_index + 1]; + new_stride[new_dim_index] = stride[dim_index + 1]; + } + } + new_cpitch[new_dim_index + 1] = cpitch[dim_index + 1]; + *new_ndims = new_dim_index + 1; +} + +static void +reshape_dims(const struct aml_layout_data_native *d, size_t ndims, + const size_t *dims, size_t *n_stride, size_t *n_cpitch) +{ + size_t m_ndims; + size_t m_dims[d->ndims]; + size_t m_stride[d->ndims]; + size_t m_cpitch[d->ndims + 1]; + + merge_dims(d->ndims, d->dims, d->stride, d->cpitch, + &m_ndims, m_dims, m_stride, m_cpitch); + + size_t m_dim_index = 0; + + n_cpitch[0] = m_cpitch[m_dim_index]; + for (size_t i = 0; i < ndims; i++) { + if (m_dims[m_dim_index] == dims[i]) { + n_stride[i] = m_stride[m_dim_index]; + n_cpitch[i + 1] = m_cpitch[m_dim_index + 1]; + m_dim_index++; + } else if (m_dims[m_dim_index] % dims[i] == 0) { + m_dims[m_dim_index] /= dims[i]; + n_stride[i] = m_stride[m_dim_index]; + n_cpitch[i + 1] = + n_cpitch[i] * dims[i] * m_stride[m_dim_index]; + m_stride[m_dim_index] = 1; + } else { + assert(0); + } + } +} + +struct aml_layout * +aml_layout_column_areshape(const struct aml_layout_data *data, size_t ndims, + const size_t *dims) +{ + const struct aml_layout_data_native *d = + (const struct aml_layout_data_native *)data; + size_t total_size, new_total_size; + total_size = d->dims[0]; + for (size_t i = 1; i < d->ndims; i++) + total_size *= d->dims[i]; + new_total_size = dims[0]; + for (size_t i = 1; i < ndims; i++) + new_total_size *= dims[i]; + assert(total_size == total_size); + + size_t stride[ndims]; + size_t cpitch[ndims + 1]; + reshape_dims(d, ndims, dims, stride, cpitch); + + void *baseptr = calloc(1, AML_LAYOUT_NATIVE_ALLOCSIZE(ndims)); + struct aml_layout *layout = (struct aml_layout *)baseptr; + aml_layout_native_struct_init(layout, ndims, baseptr); + + aml_layout_native_ainit_cpitch(layout, AML_TYPE_LAYOUT_COLUMN_ORDER, + d->ptr, ndims, dims, stride, cpitch); + layout->ops = &aml_layout_column_ops; + + return layout; +} + +struct aml_layout * +aml_layout_column_reshape(const struct aml_layout_data *data, size_t ndims, + va_list dims) +{ + size_t n_dims[ndims]; + for (int i = 0; i < ndims; i++) { + n_dims[i] = va_arg(dims, size_t); + } + return aml_layout_column_areshape(data, ndims, n_dims); +} + struct aml_layout_ops aml_layout_column_ops = { aml_layout_column_deref, aml_layout_column_aderef, @@ -232,7 +344,9 @@ struct aml_layout_ops aml_layout_column_ops = { aml_layout_column_dims, aml_layout_column_adims, aml_layout_column_ndims, - aml_layout_column_element_size + aml_layout_column_element_size, + aml_layout_column_reshape, + aml_layout_column_areshape }; /******************************************************************************* @@ -321,6 +435,51 @@ size_t aml_layout_row_element_size(const struct aml_layout_data *data) return d->cpitch[0]; } +struct aml_layout * +aml_layout_row_areshape(const struct aml_layout_data *data, size_t ndims, + const size_t *dims) +{ + const struct aml_layout_data_native *d = + (const struct aml_layout_data_native *)data; + size_t total_size, new_total_size; + total_size = d->dims[0]; + for (size_t i = 1; i < d->ndims; i++) + total_size *= d->dims[i]; + new_total_size = dims[0]; + for (size_t i = 1; i < ndims; i++) + new_total_size *= dims[i]; + assert(total_size == total_size); + + size_t n_dims[ndims]; + for (int i = 0; i < ndims; i++) + n_dims[ndims - i - 1] = dims[i]; + + size_t stride[ndims]; + size_t cpitch[ndims + 1]; + reshape_dims(d, ndims, n_dims, stride, cpitch); + + void *baseptr = calloc(1, AML_LAYOUT_NATIVE_ALLOCSIZE(ndims)); + struct aml_layout *layout = (struct aml_layout *)baseptr; + aml_layout_native_struct_init(layout, ndims, baseptr); + + aml_layout_native_ainit_cpitch(layout, AML_TYPE_LAYOUT_ROW_ORDER, + d->ptr, ndims, n_dims, stride, cpitch); + layout->ops = &aml_layout_row_ops; + + return layout; +} + +struct aml_layout * +aml_layout_row_reshape(const struct aml_layout_data *data, size_t ndims, + va_list dims) +{ + size_t n_dims[ndims]; + for (int i = 0; i < ndims; i++) + n_dims[i] = va_arg(dims, size_t); + return aml_layout_row_areshape(data, ndims, n_dims); +} + + struct aml_layout_ops aml_layout_row_ops = { aml_layout_row_deref, aml_layout_row_aderef, @@ -328,6 +487,8 @@ struct aml_layout_ops aml_layout_row_ops = { aml_layout_row_dims, aml_layout_row_adims, aml_layout_row_ndims, - aml_layout_row_element_size + aml_layout_row_element_size, + aml_layout_row_reshape, + aml_layout_row_areshape }; diff --git a/src/layout_pad.c b/src/layout_pad.c index 5eb58c62..00268c4e 100644 --- a/src/layout_pad.c +++ b/src/layout_pad.c @@ -224,7 +224,9 @@ struct aml_layout_ops aml_layout_pad_column_ops = { aml_layout_pad_column_dims, aml_layout_pad_column_adims, aml_layout_pad_ndims, - aml_layout_pad_element_size + aml_layout_pad_element_size, + NULL, + NULL }; /******************************************************************************* @@ -306,6 +308,8 @@ struct aml_layout_ops aml_layout_pad_row_ops = { aml_layout_pad_row_dims, aml_layout_pad_row_adims, aml_layout_pad_ndims, - aml_layout_pad_element_size + aml_layout_pad_element_size, + NULL, + NULL }; diff --git a/tests/layout.c b/tests/layout.c index 826feb0d..76a4764d 100644 --- a/tests/layout.c +++ b/tests/layout.c @@ -1,7 +1,162 @@ #include #include -int main(int argc, char *argv[]) + +void test_reshape_contiguous(void) +{ + int memory[4*5*6]; + + size_t dims_col[3] = {4, 5, 6}; + size_t dims_row[3] = {6, 5, 4}; + + size_t stride[3] = {1, 1, 1}; + + size_t new_dims_col[2] = {24, 5}; + size_t new_dims_row[2] = {5, 24}; + + int i; + for(i = 0; i < 4*5*6; i++) + memory[i] = i; + + struct aml_layout *a; + aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)memory, sizeof(int), 3, dims_col, + stride, dims_col); + struct aml_layout *b = aml_layout_areshape(a, 2, new_dims_col); + assert(AML_TYPE_LAYOUT_COLUMN_ORDER == aml_layout_order(b)); + + i = 0; + for(size_t j = 0; j < 5; j++) + for(size_t k = 0; k < 24; k++, i++) + assert(i == *(int *)aml_layout_deref(b, k, j)); + + free(a); + free(b); + + aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_ROW_ORDER, + (void *)memory, sizeof(int), 3, dims_row, + stride, dims_row); + b = aml_layout_areshape(a, 2, new_dims_row); + assert(AML_TYPE_LAYOUT_ROW_ORDER == aml_layout_order(b)); + + i = 0; + for(size_t j = 0; j < 5; j++) + for(size_t k = 0; k < 24; k++, i++) + assert(i == *(int *)aml_layout_deref(b, j, k)); + + free(a); + free(b); +} + +void test_reshape_discontiguous(void) +{ + int memory[7][6][5]; + + size_t dims_col[3] = {4, 5, 6}; + size_t dims_row[3] = {6, 5, 4}; + + size_t stride[3] = {1, 1, 1}; + + size_t pitch_col[3] = {5, 6, 7}; + size_t pitch_row[3] = {7, 6, 5}; + + size_t new_dims_col[5] = {2, 2, 5, 2, 3}; + size_t new_dims_row[5] = {3, 2, 5, 2, 2}; + + int i = 0; + for(int j = 0; j < 6; j++) + for(int k = 0; k < 5; k++) + for(int l = 0; l < 4; l++, i++) + memory[j][k][l] = i; + + struct aml_layout *a; + aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)memory, sizeof(int), 3, dims_col, + stride, pitch_col); + struct aml_layout *b = aml_layout_areshape(a, 5, new_dims_col); + + i = 0; + for(size_t j = 0; j < 3; j++) + for(size_t k = 0; k < 2; k++) + for(size_t l = 0; l < 5; l++) + for(size_t m = 0; m < 2; m++) + for(size_t n = 0; n < 2; n++, i++) + assert(i == *(int *)aml_layout_deref(b, n, m, l, k, j)); + + free(a); + free(b); + + aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_ROW_ORDER, + (void *)memory, sizeof(int), 3, dims_row, + stride, pitch_row); + b = aml_layout_areshape(a, 5, new_dims_row); + + i = 0; + for(size_t j = 0; j < 3; j++) + for(size_t k = 0; k < 2; k++) + for(size_t l = 0; l < 5; l++) + for(size_t m = 0; m < 2; m++) + for(size_t n = 0; n < 2; n++, i++) + assert(i == *(int *)aml_layout_deref(b, j, k, l, m, n)); + + free(a); + free(b); +} + +void test_reshape_strided(void) +{ + int memory[12][5][8]; + + size_t dims_col[3] = {4, 5, 6}; + size_t dims_row[3] = {6, 5, 4}; + + size_t stride[3] = {2, 1, 2}; + + size_t pitch_col[3] = {8, 5, 12}; + size_t pitch_row[3] = {12, 5, 8}; + + size_t new_dims_col[4] = {2, 10, 2, 3}; + size_t new_dims_row[4] = {3, 2, 10, 2}; + + int i = 0; + for(int j = 0; j < 6; j++) + for(int k = 0; k < 5; k++) + for(int l = 0; l < 4; l++, i++) + memory[2*j][1*k][2*l] = i; + + struct aml_layout *a; + aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)memory, sizeof(int), 3, dims_col, + stride, pitch_col); + struct aml_layout *b = aml_layout_areshape(a, 4, new_dims_col); + + i = 0; + for(size_t j = 0; j < 3; j++) + for(size_t k = 0; k < 2; k++) + for(size_t l = 0; l < 10; l++) + for(size_t m = 0; m < 2; m++, i++) + assert(i == *(int *)aml_layout_deref(b, m, l, k, j)); + + free(a); + free(b); + + aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_ROW_ORDER, + (void *)memory, sizeof(int), 3, dims_row, + stride, pitch_row); + b = aml_layout_areshape(a, 4, new_dims_row); + + i = 0; + for(size_t j = 0; j < 3; j++) + for(size_t k = 0; k < 2; k++) + for(size_t l = 0; l < 10; l++) + for(size_t m = 0; m < 2; m++, i++) + assert(i == *(int *)aml_layout_deref(b, j, k, l, m)); + + free(a); + free(b); +} + +void test_base(void) { struct aml_layout *a; AML_LAYOUT_NATIVE_DECL(b, 5); @@ -9,7 +164,7 @@ int main(int argc, char *argv[]) /* padd the dims to the closest multiple of 2 */ float memory[16][12][8][8][4]; size_t pitch[5] = {4, 8, 8, 12, 16}; - size_t cpitch[5] = {4, 4*4, 4*4*8, 4*4*8*8, 4*4*8*8*12}; + size_t cpitch[6] = {4, 4*4, 4*4*8, 4*4*8*8, 4*4*8*8*12, 4*4*8*8*12*16}; size_t dims[5] = {2, 3, 7, 11, 13}; size_t stride[5] = {1, 2, 1, 1, 1}; @@ -25,8 +180,6 @@ int main(int argc, char *argv[]) for(size_t i = 0; i < 4*8*8*12*16; i++) ((float*)(&memory[0][0][0][0][0]))[i] = (float)i; - /* library initialization */ - aml_init(&argc, &argv); /* initialize column order layouts */ aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_COLUMN_ORDER, @@ -52,11 +205,11 @@ int main(int argc, char *argv[]) assert(!memcmp(adataptr->dims, dims, sizeof(size_t)*5)); assert(!memcmp(adataptr->stride, stride, sizeof(size_t)*5)); assert(!memcmp(adataptr->pitch, pitch, sizeof(size_t)*5)); - assert(!memcmp(adataptr->cpitch, cpitch, sizeof(size_t)*5)); + assert(!memcmp(adataptr->cpitch, cpitch, sizeof(size_t)*6)); assert(!memcmp(bdataptr->dims, dims, sizeof(size_t)*5)); assert(!memcmp(bdataptr->stride, stride, sizeof(size_t)*5)); assert(!memcmp(bdataptr->pitch, pitch, sizeof(size_t)*5)); - assert(!memcmp(bdataptr->cpitch, cpitch, sizeof(size_t)*5)); + assert(!memcmp(bdataptr->cpitch, cpitch, sizeof(size_t)*6)); /* test column major subroutines */ size_t dims_res[5]; @@ -105,13 +258,13 @@ int main(int argc, char *argv[]) assert(!memcmp(adataptr->dims, dims, sizeof(size_t)*5)); assert(!memcmp(adataptr->stride, stride, sizeof(size_t)*5)); assert(!memcmp(adataptr->pitch, pitch, sizeof(size_t)*5)); - assert(!memcmp(adataptr->cpitch, cpitch, sizeof(size_t)*5)); + assert(!memcmp(adataptr->cpitch, cpitch, sizeof(size_t)*6)); assert(!memcmp(bdataptr->dims, dims, sizeof(size_t)*5)); assert(!memcmp(bdataptr->stride, stride, sizeof(size_t)*5)); assert(!memcmp(bdataptr->pitch, pitch, sizeof(size_t)*5)); - assert(!memcmp(bdataptr->cpitch, cpitch, sizeof(size_t)*5)); + assert(!memcmp(bdataptr->cpitch, cpitch, sizeof(size_t)*6)); - /* test column major subroutines */ + /* test row major subroutines */ size_t coords_test_row[5] = { 5, 4, 3, 2, 1 }; aml_layout_adims(a, dims_res); assert(!memcmp(dims_res, dims_row, sizeof(size_t)*5)); @@ -132,7 +285,18 @@ int main(int argc, char *argv[]) assert(AML_TYPE_LAYOUT_ROW_ORDER == aml_layout_order(a)); free(a); +} +int main(int argc, char *argv[]) +{ + /* library initialization */ + aml_init(&argc, &argv); + + test_base(); + test_reshape_contiguous(); + test_reshape_discontiguous(); + test_reshape_strided(); aml_finalize(); return 0; } + From 3d4fdf0ae164a5cdad1134e30ed153f7b85dc638 Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Tue, 15 Jan 2019 15:15:27 -0600 Subject: [PATCH 26/47] Bugfix --- src/aml-layout-dense.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aml-layout-dense.h b/src/aml-layout-dense.h index 62fffc70..d7782cdd 100644 --- a/src/aml-layout-dense.h +++ b/src/aml-layout-dense.h @@ -33,7 +33,7 @@ struct aml_layout_data_native { (ndims * 4 + 1) * sizeof(size_t)) #define AML_LAYOUT_NATIVE_DECL(name, ndims) \ - size_t __ ##name## _inner_data[ndims * 4]; \ + size_t __ ##name## _inner_data[ndims * 4 + 1]; \ struct aml_layout_data_native __ ##name## _inner_struct = { \ NULL, \ ndims, \ From 9d72fde5d743822f4ea620a359df59a6f5f0cf7a Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Wed, 16 Jan 2019 14:40:13 -0600 Subject: [PATCH 27/47] Added a reshaping layout to be used on padding layouts or as a fallback... --- src/Makefile.am | 5 +- src/aml-layout-reshape.h | 60 +++++++ src/aml.h | 1 + src/layout_pad.c | 12 +- src/layout_reshape.c | 349 +++++++++++++++++++++++++++++++++++++++ tests/layout.c | 48 +++++- 6 files changed, 462 insertions(+), 13 deletions(-) create mode 100644 src/aml-layout-reshape.h create mode 100644 src/layout_reshape.c diff --git a/src/Makefile.am b/src/Makefile.am index 8d5e568d..8ce51dc2 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -12,7 +12,8 @@ AREA_POSIX_CSOURCES = area_posix.c LAYOUT_CSOURCES = layout.c \ layout_dense.c \ - layout_pad.c + layout_pad.c \ + layout_reshape.c TILING_CSOURCES = tiling.c \ tiling_1d.c \ @@ -44,7 +45,7 @@ LIBCSOURCES = aml.c area.c arena.c \ $(LAYOUT_CSOURCES) \ copy.c -LIBHSOURCES = aml.h aml-layout.h aml-layout-dense.h aml-layout-pad.h aml-copy.h +LIBHSOURCES = aml.h aml-layout.h aml-layout-dense.h aml-layout-pad.h aml-layout-reshape.h aml-copy.h libaml_la_SOURCES = $(LIBCSOURCES) $(LIBHSOURCES) libaml_la_LIBADD = -L$(top_srcdir)/jemalloc/lib/ -ljemalloc-aml diff --git a/src/aml-layout-reshape.h b/src/aml-layout-reshape.h new file mode 100644 index 00000000..c8207c0c --- /dev/null +++ b/src/aml-layout-reshape.h @@ -0,0 +1,60 @@ +#ifndef AML_LAYOUT_RESHAPE_H +#define AML_LAYOUT_RESHAPE_H + +#include + +struct aml_layout_data_reshape { + struct aml_layout *target; + size_t ndims; + size_t target_ndims; + size_t *dims; + size_t *coffsets; + size_t *target_dims; + size_t *target_coffsets; +}; + +#define AML_LAYOUT_RESHAPE_ALLOCSIZE(ndims, target_ndims) ( \ + sizeof(struct aml_layout) + \ + sizeof(struct aml_layout_data_reshape) + \ + 2 * ndims * sizeof(size_t) + \ + target_ndims * sizeof(size_t) ) + +#define AML_LAYOUT_RESHAPE_DECL(name, ndims, target_ndims) \ + size_t __ ##name## _inner_data[ 2 * ndims + target_ndims]; \ + struct aml_layout_data_reshape __ ##name## _inner_struct = { \ + NULL, \ + ndims, \ + target_ndims, \ + __ ##name## _inner_data, \ + __ ##name## _inner_data + ndims \ + __ ##name## _inner_data + 2 * ndims \ + }; \ + struct aml_layout name = { \ + 0, \ + NULL, \ + (struct aml_layout_data *)& __ ##name## _inner_struct \ + }; + +int aml_layout_reshape_struct_init(struct aml_layout *l, size_t ndims, + void *data); +int aml_layout_reshape_ainit(struct aml_layout *l, uint64_t tags, + struct aml_layout *target, size_t ndims, + const size_t *dims); +int aml_layout_reshape_vinit(struct aml_layout *l, uint64_t tags, + struct aml_layout *target, size_t ndims, + va_list data); +int aml_layout_reshape_init(struct aml_layout *l, uint64_t tags, + struct aml_layout *target, size_t ndims, ...); +int aml_layout_reshape_acreate(struct aml_layout **l, uint64_t tags, + struct aml_layout *target, size_t ndims, + const size_t *dims); +int aml_layout_reshape_vcreate(struct aml_layout **l, uint64_t tags, + struct aml_layout *target, size_t ndims, + va_list data); +int aml_layout_reshape_create(struct aml_layout **l, uint64_t tags, + struct aml_layout *target, size_t ndims, ...); + +extern struct aml_layout_ops aml_layout_reshape_column_ops; +extern struct aml_layout_ops aml_layout_reshape_row_ops; + +#endif diff --git a/src/aml.h b/src/aml.h index a194b5ef..87ea0260 100644 --- a/src/aml.h +++ b/src/aml.h @@ -22,6 +22,7 @@ #include "aml-layout.h" #include "aml-layout-dense.h" #include "aml-layout-pad.h" +#include "aml-layout-reshape.h" #include "aml-copy.h" /******************************************************************************* * Forward Declarations: diff --git a/src/layout_pad.c b/src/layout_pad.c index 00268c4e..66b91f07 100644 --- a/src/layout_pad.c +++ b/src/layout_pad.c @@ -6,8 +6,7 @@ int aml_layout_pad_struct_init(struct aml_layout *layout, size_t ndims, struct aml_layout_data_pad *dataptr; assert(layout == (struct aml_layout *)memory); - memory = (void *)((uintptr_t)memory + - sizeof(struct aml_layout)); + memory = (void *)((uintptr_t)memory + sizeof(struct aml_layout)); dataptr = memory; layout->data = memory; memory = (void *)((uintptr_t)memory + @@ -16,7 +15,7 @@ int aml_layout_pad_struct_init(struct aml_layout *layout, size_t ndims, dataptr->ndims = ndims; dataptr->element_size = element_size; dataptr->dims = (size_t *)memory; - dataptr->target_dims = (void *)(dataptr->dims + ndims); + dataptr->target_dims = dataptr->dims + ndims; dataptr->neutral = (void *)(dataptr->target_dims + ndims); return 0; } @@ -92,6 +91,7 @@ int aml_layout_pad_acreate(struct aml_layout **layout, uint64_t tags, void *neutral) { assert(target != NULL); + assert(target->ops != NULL); size_t ndims = aml_layout_ndims(target); size_t element_size = aml_layout_element_size(target); void *baseptr = calloc(1, AML_LAYOUT_PAD_ALLOCSIZE(ndims, @@ -105,6 +105,7 @@ int aml_layout_pad_vcreate(struct aml_layout **layout, uint64_t tags, struct aml_layout *target, va_list ap) { assert(target != NULL); + assert(target->ops != NULL); size_t ndims = aml_layout_ndims(target); size_t element_size = aml_layout_element_size(target); void *baseptr = calloc(1, AML_LAYOUT_PAD_ALLOCSIZE(ndims, @@ -120,6 +121,7 @@ int aml_layout_pad_create(struct aml_layout **layout, uint64_t tags, int err; va_list ap; assert(target != NULL); + assert(target->ops != NULL); size_t ndims = aml_layout_ndims(target); size_t element_size = aml_layout_element_size(target); void *baseptr = calloc(1, AML_LAYOUT_PAD_ALLOCSIZE(ndims, @@ -137,7 +139,7 @@ int aml_layout_pad_create(struct aml_layout **layout, uint64_t tags, ******************************************************************************/ void *aml_layout_pad_column_aderef(const struct aml_layout_data *data, - const size_t *coords) + const size_t *coords) { const struct aml_layout_data_pad *d = (const struct aml_layout_data_pad *)data; @@ -161,7 +163,7 @@ void *aml_layout_pad_column_aderef(const struct aml_layout_data *data, } void *aml_layout_pad_column_deref(const struct aml_layout_data *data, - va_list coords) + va_list coords) { const struct aml_layout_data_pad *d = (const struct aml_layout_data_pad *)data; diff --git a/src/layout_reshape.c b/src/layout_reshape.c new file mode 100644 index 00000000..044f94f0 --- /dev/null +++ b/src/layout_reshape.c @@ -0,0 +1,349 @@ +#include + +int aml_layout_reshape_struct_init(struct aml_layout *layout, size_t ndims, + void *memory) +{ + struct aml_layout_data_reshape *dataptr; + + assert(layout == (struct aml_layout *)memory); + memory = (void *)((uintptr_t)memory + sizeof(struct aml_layout)); + dataptr = memory; + layout->data = memory; + memory = (void *)((uintptr_t)memory + + sizeof(struct aml_layout_data_reshape)); + dataptr->target = NULL; + dataptr->ndims = ndims; + dataptr->dims = (size_t *)memory; + dataptr->coffsets = dataptr->dims + ndims; + dataptr->target_dims = dataptr->dims + 2 * ndims; + return 0; +} + +int aml_layout_reshape_ainit(struct aml_layout *layout, uint64_t tags, + struct aml_layout *target, size_t ndims, + const size_t *dims) +{ + assert(layout != NULL); + assert(layout->data != NULL); + struct aml_layout_data_reshape *data = + (struct aml_layout_data_reshape *)layout->data; + size_t target_ndims = aml_layout_ndims(target); + assert(ndims != 0); + assert(data->ndims == ndims); + assert(data->dims); + assert(data->coffsets); + assert(data->target_dims); + data->target_ndims = target_ndims; + data->target = target; + assert(data->target_ndims != 0); + int type = AML_TYPE_GET(tags, AML_TYPE_LAYOUT_ORDER); + if (type == AML_TYPE_LAYOUT_ROW_ORDER) { + AML_TYPE_SET(layout->tags, AML_TYPE_LAYOUT_ORDER, + AML_TYPE_LAYOUT_ROW_ORDER); + layout->ops = &aml_layout_reshape_row_ops; + for(size_t i = 0; i < ndims; i++) + data->dims[i] = dims[ndims-i-1]; + } else { + AML_TYPE_SET(layout->tags, AML_TYPE_LAYOUT_ORDER, + AML_TYPE_LAYOUT_COLUMN_ORDER); + layout->ops = &aml_layout_reshape_column_ops; + memcpy(data->dims, dims, ndims * sizeof(size_t)); + } + type = aml_layout_order(target); + if(type == AML_TYPE_LAYOUT_ROW_ORDER) { + size_t target_dims[target_ndims]; + aml_layout_adims(target, target_dims); + for(size_t i = 0; i < target_ndims; i++) + data->target_dims[i] = target_dims[target_ndims-i-1]; + } else { + aml_layout_adims(target, data->target_dims); + } + size_t prod, target_prod; + prod = 1; + for(size_t i = 0; i < ndims; i++) { + data->coffsets[i] = prod; + prod *= data->dims[i]; + } + target_prod = 1; + for(size_t i = 0; i < data->target_ndims; i++) + target_prod *= data->target_dims[i]; + assert(target_prod == prod); + return 0; +} + +int aml_layout_reshape_vinit(struct aml_layout *layout, uint64_t tags, + struct aml_layout *target, size_t ndims, + va_list data) +{ + size_t dims[ndims]; + for(size_t i = 0; i < ndims; i++) + dims[i] = va_arg(data, size_t); + return aml_layout_reshape_ainit(layout, tags, target, ndims, dims); +} + +int aml_layout_reshape_init(struct aml_layout *layout, uint64_t tags, + struct aml_layout *target, size_t ndims, ...) +{ + int err; + va_list ap; + va_start(ap, ndims); + err = aml_layout_reshape_vinit(layout, tags, target, ndims, ap); + va_end(ap); + return err; +} + +int aml_layout_reshape_acreate(struct aml_layout **layout, uint64_t tags, + struct aml_layout *target, size_t ndims, + const size_t *dims) +{ + assert(target != NULL); + assert(target->ops != NULL); + size_t target_ndims = aml_layout_ndims(target); + void *baseptr = calloc(1, AML_LAYOUT_RESHAPE_ALLOCSIZE(ndims, + target_ndims)); + assert(baseptr != NULL); + *layout = (struct aml_layout *)baseptr; + aml_layout_reshape_struct_init(*layout, ndims, baseptr); + return aml_layout_reshape_ainit(*layout, tags, target, ndims, dims); +} + +int aml_layout_reshape_vcreate(struct aml_layout **layout, uint64_t tags, + struct aml_layout *target, size_t ndims, + va_list data) +{ + assert(target != NULL); + assert(target->ops != NULL); + size_t target_ndims = aml_layout_ndims(target); + void *baseptr = calloc(1, AML_LAYOUT_RESHAPE_ALLOCSIZE(ndims, + target_ndims)); + assert(baseptr != NULL); + *layout = (struct aml_layout *)baseptr; + aml_layout_reshape_struct_init(*layout, ndims, baseptr); + return aml_layout_reshape_vinit(*layout, tags, target, ndims, data); +} + +int aml_layout_reshape_create(struct aml_layout **layout, uint64_t tags, + struct aml_layout *target, size_t ndims, ...) +{ + int err; + va_list data; + assert(target != NULL); + assert(target->ops != NULL); + size_t target_ndims = aml_layout_ndims(target); + void *baseptr = calloc(1, AML_LAYOUT_RESHAPE_ALLOCSIZE(ndims, + target_ndims)); + assert(baseptr != NULL); + *layout = (struct aml_layout *)baseptr; + aml_layout_reshape_struct_init(*layout, ndims, baseptr); + va_start(data, ndims); + err = aml_layout_reshape_vinit(*layout, tags, target, ndims, data); + va_end(data); + return err; +} + +/******************************************************************************* + * COLUMN OPERATORS: + ******************************************************************************/ + +void *aml_layout_reshape_column_aderef(const struct aml_layout_data *data, + const size_t *coords) +{ + const struct aml_layout_data_reshape *d = + (const struct aml_layout_data_reshape *)data; + assert(d !=NULL); + + size_t ndims = d->ndims; + + for (int i = 0; i < ndims; i++) + assert(coords[i] < d->dims[i]); + + size_t target_ndims = d->target_ndims; + size_t offset = 0; + size_t remainder; + size_t target_coords[target_ndims]; + + for (int i = 0; i < ndims; i++) + offset += coords[i] * d->coffsets[i]; + + int type = aml_layout_order(d->target); + if (type == AML_TYPE_LAYOUT_COLUMN_ORDER) { + for (int i = 0; i < target_ndims; i++) { + target_coords[i] = offset % d->target_dims[i]; + offset /= d->target_dims[i]; + } + } else { + for (int i = 0; i < target_ndims; i++) { + target_coords[target_ndims - i - 1] = + offset % d->target_dims[i]; + offset /= d->target_dims[i]; + } + } + return aml_layout_aderef(d->target, target_coords); +} + +void *aml_layout_reshape_column_deref(const struct aml_layout_data *data, + va_list coords) +{ + const struct aml_layout_data_reshape *d = + (const struct aml_layout_data_reshape *)data; + assert(d !=NULL); + size_t target_coords[d->ndims]; + for (int i = 0; i < d->ndims; i++) + target_coords[i] = va_arg(coords, size_t); + aml_layout_reshape_column_aderef(data, target_coords); +} + +int aml_layout_reshape_column_order(const struct aml_layout_data *data) +{ + return AML_TYPE_LAYOUT_COLUMN_ORDER; +} + +int aml_layout_reshape_column_dims(const struct aml_layout_data *data, va_list dims) +{ + const struct aml_layout_data_reshape *d = + (const struct aml_layout_data_reshape *)data; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) + { + size_t *dim = va_arg(dims, size_t*); + assert(dim != NULL); + *dim = d->dims[i]; + } + return 0; +} + +int aml_layout_reshape_column_adims(const struct aml_layout_data *data, + size_t *dims) +{ + const struct aml_layout_data_reshape *d = + (const struct aml_layout_data_reshape *)data; + assert(d != NULL); + assert(dims != NULL); + memcpy((void*)dims, (void*)d->dims, sizeof(size_t)*d->ndims); + return 0; +} + +size_t aml_layout_reshape_ndims(const struct aml_layout_data *data) +{ + const struct aml_layout_data_reshape *d = + (const struct aml_layout_data_reshape *)data; + return d->ndims; +} + +size_t aml_layout_reshape_element_size(const struct aml_layout_data *data) +{ + const struct aml_layout_data_reshape *d = + (const struct aml_layout_data_reshape *)data; + return aml_layout_element_size(d->target); +} + +struct aml_layout_ops aml_layout_reshape_column_ops = { + aml_layout_reshape_column_deref, + aml_layout_reshape_column_aderef, + aml_layout_reshape_column_order, + aml_layout_reshape_column_dims, + aml_layout_reshape_column_adims, + aml_layout_reshape_ndims, + aml_layout_reshape_element_size, + NULL, + NULL +}; + +/******************************************************************************* + * ROW OPERATORS: + ******************************************************************************/ + +void *aml_layout_reshape_row_aderef(const struct aml_layout_data *data, + const size_t *coords) +{ + const struct aml_layout_data_reshape *d = + (const struct aml_layout_data_reshape *)data; + assert(d !=NULL); + + size_t ndims = d->ndims; + + for (int i = 0; i < ndims; i++) + assert(coords[ndims - i - 1] < d->dims[i]); + + size_t target_ndims = d->target_ndims; + size_t offset = 0; + size_t remainder; + size_t target_coords[target_ndims]; + + for (int i = 0; i < ndims; i++) + offset += coords[ndims - i - 1] * d->coffsets[i]; + + int type = aml_layout_order(d->target); + if (type == AML_TYPE_LAYOUT_COLUMN_ORDER) { + for (int i = 0; i < target_ndims; i++) { + target_coords[i] = offset % d->target_dims[i]; + offset /= d->target_dims[i]; + } + } else { + for (int i = 0; i < target_ndims; i++) { + target_coords[target_ndims - i - 1] = + offset % d->target_dims[i]; + offset /= d->target_dims[i]; + } + } + return aml_layout_aderef(d->target, target_coords); +} + +void *aml_layout_reshape_row_deref(const struct aml_layout_data *data, + va_list coords) +{ + const struct aml_layout_data_reshape *d = + (const struct aml_layout_data_reshape *)data; + assert(d !=NULL); + size_t target_coords[d->ndims]; + for (int i = 0; i < d->ndims; i++) + target_coords[i] = va_arg(coords, size_t); + aml_layout_reshape_row_aderef(data, target_coords); +} + +int aml_layout_reshape_row_order(const struct aml_layout_data *data) +{ + return AML_TYPE_LAYOUT_ROW_ORDER; +} + +int aml_layout_reshape_row_dims(const struct aml_layout_data *data, + va_list dims) +{ + const struct aml_layout_data_reshape *d = + (const struct aml_layout_data_reshape *)data; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) + { + size_t *dim = va_arg(dims, size_t*); + assert(dim != NULL); + *dim = d->dims[d->ndims - i - 1]; + } + return 0; +} + +int aml_layout_reshape_row_adims(const struct aml_layout_data *data, + size_t *dims) +{ + const struct aml_layout_data_reshape *d = + (const struct aml_layout_data_reshape *)data; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) + { + dims[i] = d->dims[d->ndims - i - 1]; + } + return 0; +} + +struct aml_layout_ops aml_layout_reshape_row_ops = { + aml_layout_reshape_row_deref, + aml_layout_reshape_row_aderef, + aml_layout_reshape_row_order, + aml_layout_reshape_row_dims, + aml_layout_reshape_row_adims, + aml_layout_reshape_ndims, + aml_layout_reshape_element_size, + NULL, + NULL +}; + + diff --git a/tests/layout.c b/tests/layout.c index 76a4764d..ed1edd27 100644 --- a/tests/layout.c +++ b/tests/layout.c @@ -24,28 +24,41 @@ void test_reshape_contiguous(void) stride, dims_col); struct aml_layout *b = aml_layout_areshape(a, 2, new_dims_col); assert(AML_TYPE_LAYOUT_COLUMN_ORDER == aml_layout_order(b)); + struct aml_layout *c; + aml_layout_reshape_acreate(&c, AML_TYPE_LAYOUT_COLUMN_ORDER, + a, 2, new_dims_col); + assert(AML_TYPE_LAYOUT_COLUMN_ORDER == aml_layout_order(c)); i = 0; for(size_t j = 0; j < 5; j++) - for(size_t k = 0; k < 24; k++, i++) + for(size_t k = 0; k < 24; k++, i++) { assert(i == *(int *)aml_layout_deref(b, k, j)); + assert(i == *(int *)aml_layout_deref(c, k, j)); + } free(a); free(b); + free(c); aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_ROW_ORDER, (void *)memory, sizeof(int), 3, dims_row, stride, dims_row); b = aml_layout_areshape(a, 2, new_dims_row); assert(AML_TYPE_LAYOUT_ROW_ORDER == aml_layout_order(b)); + aml_layout_reshape_acreate(&c, AML_TYPE_LAYOUT_ROW_ORDER, + a, 2, new_dims_row); + assert(AML_TYPE_LAYOUT_ROW_ORDER == aml_layout_order(c)); i = 0; for(size_t j = 0; j < 5; j++) - for(size_t k = 0; k < 24; k++, i++) + for(size_t k = 0; k < 24; k++, i++) { assert(i == *(int *)aml_layout_deref(b, j, k)); + assert(i == *(int *)aml_layout_deref(c, j, k)); + } free(a); free(b); + free(c); } void test_reshape_discontiguous(void) @@ -74,33 +87,45 @@ void test_reshape_discontiguous(void) (void *)memory, sizeof(int), 3, dims_col, stride, pitch_col); struct aml_layout *b = aml_layout_areshape(a, 5, new_dims_col); + struct aml_layout *c; + aml_layout_reshape_acreate(&c, AML_TYPE_LAYOUT_COLUMN_ORDER, + a, 5, new_dims_col); i = 0; for(size_t j = 0; j < 3; j++) for(size_t k = 0; k < 2; k++) for(size_t l = 0; l < 5; l++) for(size_t m = 0; m < 2; m++) - for(size_t n = 0; n < 2; n++, i++) + for(size_t n = 0; n < 2; n++, i++) { assert(i == *(int *)aml_layout_deref(b, n, m, l, k, j)); + assert(i == *(int *)aml_layout_deref(c, n, m, l, k, j)); + } free(a); free(b); + free(c); aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_ROW_ORDER, (void *)memory, sizeof(int), 3, dims_row, stride, pitch_row); b = aml_layout_areshape(a, 5, new_dims_row); + aml_layout_reshape_acreate(&c, AML_TYPE_LAYOUT_ROW_ORDER, + a, 5, new_dims_row); + i = 0; for(size_t j = 0; j < 3; j++) for(size_t k = 0; k < 2; k++) for(size_t l = 0; l < 5; l++) for(size_t m = 0; m < 2; m++) - for(size_t n = 0; n < 2; n++, i++) + for(size_t n = 0; n < 2; n++, i++) { assert(i == *(int *)aml_layout_deref(b, j, k, l, m, n)); + assert(i == *(int *)aml_layout_deref(c, j, k, l, m, n)); + } free(a); free(b); + free(c); } void test_reshape_strided(void) @@ -129,31 +154,42 @@ void test_reshape_strided(void) (void *)memory, sizeof(int), 3, dims_col, stride, pitch_col); struct aml_layout *b = aml_layout_areshape(a, 4, new_dims_col); + struct aml_layout *c; + aml_layout_reshape_acreate(&c, AML_TYPE_LAYOUT_COLUMN_ORDER, + a, 4, new_dims_col); i = 0; for(size_t j = 0; j < 3; j++) for(size_t k = 0; k < 2; k++) for(size_t l = 0; l < 10; l++) - for(size_t m = 0; m < 2; m++, i++) + for(size_t m = 0; m < 2; m++, i++) { assert(i == *(int *)aml_layout_deref(b, m, l, k, j)); + assert(i == *(int *)aml_layout_deref(c, m, l, k, j)); + } free(a); free(b); + free(c); aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_ROW_ORDER, (void *)memory, sizeof(int), 3, dims_row, stride, pitch_row); b = aml_layout_areshape(a, 4, new_dims_row); + aml_layout_reshape_acreate(&c, AML_TYPE_LAYOUT_ROW_ORDER, + a, 4, new_dims_row); i = 0; for(size_t j = 0; j < 3; j++) for(size_t k = 0; k < 2; k++) for(size_t l = 0; l < 10; l++) - for(size_t m = 0; m < 2; m++, i++) + for(size_t m = 0; m < 2; m++, i++) { assert(i == *(int *)aml_layout_deref(b, j, k, l, m)); + assert(i == *(int *)aml_layout_deref(c, j, k, l, m)); + } free(a); free(b); + free(c); } void test_base(void) From 5e6148d60dcddbc16c6736790b1ac13e23dde73c Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Wed, 16 Jan 2019 14:49:33 -0600 Subject: [PATCH 28/47] Bugfix --- src/layout_reshape.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/layout_reshape.c b/src/layout_reshape.c index 044f94f0..321b28c6 100644 --- a/src/layout_reshape.c +++ b/src/layout_reshape.c @@ -190,7 +190,7 @@ void *aml_layout_reshape_column_deref(const struct aml_layout_data *data, size_t target_coords[d->ndims]; for (int i = 0; i < d->ndims; i++) target_coords[i] = va_arg(coords, size_t); - aml_layout_reshape_column_aderef(data, target_coords); + return aml_layout_reshape_column_aderef(data, target_coords); } int aml_layout_reshape_column_order(const struct aml_layout_data *data) @@ -298,7 +298,7 @@ void *aml_layout_reshape_row_deref(const struct aml_layout_data *data, size_t target_coords[d->ndims]; for (int i = 0; i < d->ndims; i++) target_coords[i] = va_arg(coords, size_t); - aml_layout_reshape_row_aderef(data, target_coords); + return aml_layout_reshape_row_aderef(data, target_coords); } int aml_layout_reshape_row_order(const struct aml_layout_data *data) From a670594bfdc229cd6425302cdbdc87785665f6c9 Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Fri, 25 Jan 2019 18:10:23 -0600 Subject: [PATCH 29/47] Added dims accessor in column order for layout. --- src/aml-layout.h | 3 ++- src/layout_dense.c | 2 ++ src/layout_pad.c | 2 ++ src/layout_reshape.c | 2 ++ 4 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/aml-layout.h b/src/aml-layout.h index 5a383da2..fc068be6 100644 --- a/src/aml-layout.h +++ b/src/aml-layout.h @@ -16,7 +16,7 @@ struct aml_layout_data; /* Layout type tags. Defined as the bit offset to set to one. */ #define AML_TYPE_LAYOUT_ORDER (1 << 0) -#define AML_TYPE_MAX (1 << 1) +#define AML_TYPE_LAYOUT_MAX (1 << 1) #define AML_TYPE_LAYOUT_ROW_ORDER 1 #define AML_TYPE_LAYOUT_COLUMN_ORDER 0 @@ -34,6 +34,7 @@ struct aml_layout_ops { int (*order)(const struct aml_layout_data *); int (*dims)(const struct aml_layout_data *, va_list dim_ptrs); int (*adims)(const struct aml_layout_data *, size_t *dims); + int (*adims_column)(const struct aml_layout_data *, size_t *dims); size_t (*ndims)(const struct aml_layout_data *); size_t (*element_size)(const struct aml_layout_data *); struct aml_layout * (*reshape)(const struct aml_layout_data *, diff --git a/src/layout_dense.c b/src/layout_dense.c index 60ff4f23..7779ed88 100644 --- a/src/layout_dense.c +++ b/src/layout_dense.c @@ -343,6 +343,7 @@ struct aml_layout_ops aml_layout_column_ops = { aml_layout_column_order, aml_layout_column_dims, aml_layout_column_adims, + aml_layout_column_adims, aml_layout_column_ndims, aml_layout_column_element_size, aml_layout_column_reshape, @@ -486,6 +487,7 @@ struct aml_layout_ops aml_layout_row_ops = { aml_layout_row_order, aml_layout_row_dims, aml_layout_row_adims, + aml_layout_column_adims, aml_layout_row_ndims, aml_layout_row_element_size, aml_layout_row_reshape, diff --git a/src/layout_pad.c b/src/layout_pad.c index 66b91f07..2d2fa5a9 100644 --- a/src/layout_pad.c +++ b/src/layout_pad.c @@ -225,6 +225,7 @@ struct aml_layout_ops aml_layout_pad_column_ops = { aml_layout_pad_column_order, aml_layout_pad_column_dims, aml_layout_pad_column_adims, + aml_layout_pad_column_adims, aml_layout_pad_ndims, aml_layout_pad_element_size, NULL, @@ -309,6 +310,7 @@ struct aml_layout_ops aml_layout_pad_row_ops = { aml_layout_pad_row_order, aml_layout_pad_row_dims, aml_layout_pad_row_adims, + aml_layout_pad_column_adims, aml_layout_pad_ndims, aml_layout_pad_element_size, NULL, diff --git a/src/layout_reshape.c b/src/layout_reshape.c index 321b28c6..b5177fc4 100644 --- a/src/layout_reshape.c +++ b/src/layout_reshape.c @@ -243,6 +243,7 @@ struct aml_layout_ops aml_layout_reshape_column_ops = { aml_layout_reshape_column_order, aml_layout_reshape_column_dims, aml_layout_reshape_column_adims, + aml_layout_reshape_column_adims, aml_layout_reshape_ndims, aml_layout_reshape_element_size, NULL, @@ -340,6 +341,7 @@ struct aml_layout_ops aml_layout_reshape_row_ops = { aml_layout_reshape_row_order, aml_layout_reshape_row_dims, aml_layout_reshape_row_adims, + aml_layout_reshape_column_adims, aml_layout_reshape_ndims, aml_layout_reshape_element_size, NULL, From 695d6413d60bcbc1dc05af71ad966cd431834140 Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Fri, 25 Jan 2019 18:15:19 -0600 Subject: [PATCH 30/47] WIP --- src/aml-tiling-resize.h | 40 +++++++++++++ src/aml-tiling.h | 44 +++++++++++++++ src/tiling_nd.c | 72 ++++++++++++++++++++++++ src/tiling_nd_resize.c | 121 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 277 insertions(+) create mode 100644 src/aml-tiling-resize.h create mode 100644 src/aml-tiling.h create mode 100644 src/tiling_nd.c create mode 100644 src/tiling_nd_resize.c diff --git a/src/aml-tiling-resize.h b/src/aml-tiling-resize.h new file mode 100644 index 00000000..0e9bc227 --- /dev/null +++ b/src/aml-tiling-resize.h @@ -0,0 +1,40 @@ +#ifndef AML_TILING_RESIZE_H +#define AML_TILING_RESIZE_H + +#include + +struct aml_tiling_nd_data_resize { + struct aml_layout *l; + size_t ndims; + size_t *tile_dims; + size_t *dims; + size_t *border_tile_dims; +} + +#define AML_TILING_RESIZE_ALLOCSIZE(ndims) (sizeof(struct aml_tiling_nd) +\ + sizeof(struct aml_tiling_nd_resize) +\ + (ndims * 3) * sizeof(size_t)) + +int aml_tiling_nd_resize_struct_init(struct aml_tiling_nd *t, size_t ndims, + void *data); +int aml_tiling_nd_resize_ainit(struct aml_tiling_nd *t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + const size_t *tile_dims); +int aml_tiling_nd_resize_vinit(struct aml_tiling_nd *t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + va_list data); +int aml_tiling_nd_resize_init(struct aml_tiling_nd *t, uint64_t tags, + const struct aml_layout *l, size_t ndims, ...); +int aml_tiling_nd_resize_acreate(struct aml_tiling_nd **t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + const size_t *tile_dims); +int aml_tiling_nd_resize_vcreate(struct aml_tiling_nd **t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + va_list data); +int aml_tiling_nd_resize_create(struct aml_tiling_nd **t, uint64_t tags, + const struct aml_layout *l, size_t ndims, ...); + +extern struct aml_tiling_nd_ops aml_tiling_nd_resize_column_ops; +extern struct aml_tiling_nd_ops aml_tiling_nd_resize_row_ops; + +#endif diff --git a/src/aml-tiling.h b/src/aml-tiling.h new file mode 100644 index 00000000..c559c6af --- /dev/null +++ b/src/aml-tiling.h @@ -0,0 +1,44 @@ +#ifndef AML_TILING_H +#define AML_TILING_H 1 + +#include + +struct aml_tiling_nd; +struct aml_tiling_nd_data; + +#define AML_TYPE_TILING_ORDER (1 << 0) +#define AML_TYPE_TILING_MAX (1 << 1) + +#define AML_TYPE_LILING_ROW_ORDER 1 +#define AML_TYPE_LILING_COLUMN_ORDER 0 + +struct aml_tiling_nd_ops { + struct aml_layout* (*index)(const struct aml_tiling_nd_data *, + va_list coords); + struct aml_layout* (*aindex)(const struct aml_tiling_nd_data *, + const size_t *coords); + int (*order)(const struct aml_tiling_nd_data *); + int (*tile_dims)(const struct aml_tiling_nd_data *, va_list dim_ptrs); + int (*tile_adims)(const struct aml_tiling_nd_data *, size_t *dims); + int (*dims)(const struct aml_tiling_nd_data *, va_list dim_ptrs); + int (*adims)(const struct aml_tiling_nd_data *, size_t *dims); + size_t (*ndims)(const struct aml_tiling_nd_data *); +}; + +struct aml_tiling_nd { + uint64_t tags; + struct aml_tiling_nd_ops *ops; + struct aml_tiling_nd_data *data; +}; + +struct aml_layout *aml_tiling_nd_index(const struct aml_tiling_nd *t, ...); +struct aml_layout *aml_tiling_nd_aindex(const struct aml_tiling_nd *t, + const size_t *coords); +int aml_tiling_nd_order(const struct aml_tiling_nd *t); +int aml_tiling_nd_tile_dims(const struct aml_tiling_nd *t, ...); +int aml_tiling_nd_tile_adims(const struct aml_tiling_nd *t, size_t *dims); +int aml_tiling_nd_dims(const struct aml_tiling_nd *t, ...); +int aml_tiling_nd_adims(const struct aml_tiling_nd *t, size_t *dims); +size_t aml_tiling_nd_ndims(const struct aml_tiling_nd *t); + +#endif diff --git a/src/tiling_nd.c b/src/tiling_nd.c new file mode 100644 index 00000000..b87436d0 --- /dev/null +++ b/src/tiling_nd.c @@ -0,0 +1,72 @@ +#include + +struct aml_layout *aml_tiling_nd_index(const struct aml_tiling_nd *t, ...) +{ + assert(t != NULL); + assert(t->ops != NULL); + va_list ap; + struct aml_layout *ret; + va_start(ap, t); + ret = t->ops->index(t->data, ap); + va_end(ap); + return ret; +} + +struct aml_layout *aml_tiling_nd_aindex(const struct aml_tiling_nd *t, const size_t *coords) +{ + assert(t != NULL); + assert(t->ops != NULL); + return t->ops->aindex(t->data, coords); +} + +int aml_tiling_nd_order(const struct aml_tiling_nd *t) +{ + assert(t != NULL); + assert(t->ops != NULL); + return t->ops->order(t->data); +} + +int aml_tiling_nd_tile_dims(const struct aml_tiling_nd *t, ...) +{ + assert(t != NULL); + assert(t->ops != NULL); + va_list ap; + int ret; + va_start(ap, t); + ret = t->ops->tile_dims(t->data, ap) + va_end(ap); + return ret; +} + +int aml_tiling_nd_tile_adims(const struct aml_tiling_nd *t, size_t *dims) +{ + assert(t != NULL); + assert(t->ops != NULL); + return t->ops->tile_adims(t->data, dims); +} + +int aml_tiling_nd_dims(const struct aml_tiling_nd *t, ...) +{ + assert(t != NULL); + assert(t->ops != NULL); + va_list ap; + int ret; + va_start(ap, t); + ret = t->ops->dims(t->data, ap) + va_end(ap); + return ret; +} + +int aml_tiling_nd_adims(const struct aml_tiling_nd *t, size_t *dims) +{ + assert(t != NULL); + assert(t->ops != NULL); + return t->ops->adims(t->data, dims); +} + +size_t aml_tiling_nd_ndims(const struct aml_tiling_nd *t) +{ + assert(t != NULL); + assert(t->ops != NULL); + return t->ops->ndims(t->data); +} diff --git a/src/tiling_nd_resize.c b/src/tiling_nd_resize.c new file mode 100644 index 00000000..548f5d18 --- /dev/null +++ b/src/tiling_nd_resize.c @@ -0,0 +1,121 @@ +#include + +int aml_tiling_nd_resize_struct_init(struct aml_tiling_nd *t, size_t ndims, + void *memory) +{ + struct aml_tiling_nd_data_resize *dataptr; + + assert(t == (struct aml_tiling_nd *)memory); + memory = (void *)((uintptr_t)memory + + sizeof(struct aml_tiling_nd)); + dataptr = memory; + t->data = memory; + memory = (void *)((uintptr_t)memory + + sizeof(struct aml_tiling_nd_data_resize)); + dataptr->l = NULL; + dataptr->ndims = ndims; + dataptr->tile_dims = (size_t *)memory; + dataptr->dims = dataptr->tile_dims + ndims; + dataptr->border_tile_dims = dataptr->dims + ndims; + return 0; +} + +int aml_tiling_nd_resize_ainit(struct aml_tiling_nd *t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + const size_t *tile_dims) +{ + assert(t != NULL); + assert(t->data != NULL); + struct aml_tiling_nd_data_resize *data = + (struct aml_layout_data_native *)t->data; + assert(data->ndims == ndims); + assert(data->tile_dims); + assert(data->dims); + assert(data->border_tile_dims); + data->l = l; + int type = AML_TYPE_GET(tags, AML_TYPE_TILING_ORDER); + if (type == AML_TYPE_TILING_ROW_ORDER) { + AML_TYPE_SET(t->tags, AML_TYPE_TILING_ORDER, + AML_TYPE_TILING_ROW_ORDER); + t->ops = &aml_tiling_nd_resize_row_ops; + for (size_t i = 0; i < ndims; i++) + data->tile_dims[i] = tile_dims[ndims-i-1]; + } else { + AML_TYPE_SET(t->tags, AML_TYPE_TILING_ORDER, + AML_TYPE_TILING_COLUMN_ORDER); + t->ops = &aml_tiling_nd_resize_column_ops; + for (size_t i = 0; i < ndims; i++) + data->tile_dims[i] = tile_dims[i]; + } + size_t target_dims[ndims]; + aml_layout_adims_column(l, target_dims); + for (size_t i = 0; i < ndims; i++) + data->border_tile_dims[i] = target_dims[i] % data->tile_dims[i]; + data->dims[i] = target_dims[i] / data->tile_dims[i]; + if (data->border_tile_dims[i] == 0) + data->border_tile_dims[i] = target_dims[i]; + else + data->dims[i] += 1; + } + return 0; +} + +int aml_tiling_nd_resize_vinit(struct aml_tiling_nd *t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + va_list data) +{ + size_t tile_dims[ndims]; + for(size_t i = 0; i < ndims; i++) + tile_dims[i] = va_arg(ap, size_t); + return aml_tiling_nd_resize_ainit(t, tags, l, ndims, tile_dims); +} + +int aml_tiling_nd_resize_init(struct aml_tiling_nd *t, uint64_t tags, + const struct aml_layout *l, size_t ndims, ...) +{ + int err; + va_list ap; + va_start(ap, ndims); + err = aml_tiling_nd_resize_vinit(t, tags, l, ndims, ap); + va_end(ap); + return err; +} + +int aml_tiling_nd_resize_acreate(struct aml_tiling_nd **t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + const size_t *tile_dims) +{ + assert(ndims > 0); + void *baseptr = calloc(1, AML_TILING_RESIZE_ALLOCSIZE(ndims)); + *t = (struct aml_tiling_nd *)baseptr; + aml_tiling_nd_resize_struct_init(*t, ndims, baseptr); + return aml_tiling_nd_resize_ainit(*t, tags, l, ndims, tile_dims); +} + +int aml_tiling_nd_resize_vcreate(struct aml_tiling_nd **t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + va_list data) +{ + assert(ndims > 0); + void *baseptr = calloc(1, AML_TILING_RESIZE_ALLOCSIZE(ndims)); + *t = (struct aml_tiling_nd *)baseptr; + aml_tiling_nd_resize_struct_init(*t, ndims, baseptr); + return aml_tiling_nd_resize_vinit(*t, tags, l, ndims, data); +} + +int aml_tiling_nd_resize_create(struct aml_tiling_nd **t, uint64_t tags, + const struct aml_layout *l, size_t ndims, ...) +{ + int err; + va_list ap; + assert(ndims > 0); + void *baseptr = calloc(1, AML_TILING_RESIZE_ALLOCSIZE(ndims)); + *t = (struct aml_tiling_nd *)baseptr; + aml_tiling_nd_resize_struct_init(*t, ndims, baseptr); + va_start(ap, ndims); + err = aml_tiling_nd_resize_vinit(*t, tags, l, ndims, ap); + va_end(ap); + return err; +} + + From 58a8b88ef9e7e7640231259ced041b9df7c03081 Mon Sep 17 00:00:00 2001 From: Swann Perarnau Date: Mon, 28 Jan 2019 09:39:08 -0600 Subject: [PATCH 31/47] [build] add libexcit as build dependency No used yet, but we will need it for iterators on tilings. --- configure.ac | 3 +++ src/Makefile.am | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index bf7aeec9..06260db0 100644 --- a/configure.ac +++ b/configure.ac @@ -52,6 +52,9 @@ AM_CONDITIONAL([ADD_BENCHMARKS],[test "x$benchmarks" = xtrue]) AC_CHECK_HEADERS(numa.h) AC_CHECK_LIB(numa, move_pages) +# excit iterators +PKG_CHECK_MODULES([EXCIT],[libexcit]) + # internal jemalloc ac_configure_args="$ac_configure_args \ '--with-jemalloc-prefix=jemk_aml_' \ diff --git a/src/Makefile.am b/src/Makefile.am index 8ce51dc2..6bbf5075 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,4 +1,4 @@ -AM_CPPFLAGS = -I$(top_srcdir)/jemalloc/include +AM_CPPFLAGS = -I$(top_srcdir)/jemalloc/include @EXCIT_CFLAGS@ lib_LTLIBRARIES = libaml.la ARENA_JEMALLOC_CSOURCES = arena_jemalloc.c @@ -48,5 +48,5 @@ LIBCSOURCES = aml.c area.c arena.c \ LIBHSOURCES = aml.h aml-layout.h aml-layout-dense.h aml-layout-pad.h aml-layout-reshape.h aml-copy.h libaml_la_SOURCES = $(LIBCSOURCES) $(LIBHSOURCES) -libaml_la_LIBADD = -L$(top_srcdir)/jemalloc/lib/ -ljemalloc-aml +libaml_la_LIBADD = -L$(top_srcdir)/jemalloc/lib/ -ljemalloc-aml @EXCIT_LIBS@ include_HEADERS = $(LIBHSOURCES) From 03db551362e06140842ac7523a4ed564ca8111d8 Mon Sep 17 00:00:00 2001 From: Swann Perarnau Date: Mon, 28 Jan 2019 10:13:13 -0600 Subject: [PATCH 32/47] [fix] try to fix CI --- .gitlab-ci.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index f7bc2814..03129427 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -4,11 +4,19 @@ stages: make:generic: stage: build script: + - git clone https://xgitlab.cels.anl.gov/argo/excit.git + - cd excit - ./autogen.sh - mkdir build - ./configure --prefix=`pwd`/build - make - make install + - cd .. + - ./autogen.sh + - mkdir build + - PKG_CONFIG_PATH=excit/build/lib/pkgconfig ./configure --prefix=`pwd`/build + - make + - make install - make check artifacts: when: on_failure From e8e71d04aeec0119eb7e5559690d7941c93966ba Mon Sep 17 00:00:00 2001 From: Swann Perarnau Date: Mon, 28 Jan 2019 10:22:48 -0600 Subject: [PATCH 33/47] [ci] fix knl ci for excit --- .gitlab-ci.yml | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 03129427..6e354846 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -30,9 +30,17 @@ make:knl: stage: build script: - source /opt/intel/compilers_and_libraries/linux/bin/compilervars.sh intel64 + - git clone https://xgitlab.cels.anl.gov/argo/excit.git + - cd excit + - ./autogen.sh + - mkdir build + - ./configure --prefix=`pwd`/build + - make + - make install + - cd .. - ./autogen.sh - mkdir build - - CC=icc CFLAGS="-mkl -xhost" ./configure --prefix=`pwd`/build --enable-benchmarks + - CC=icc CFLAGS="-mkl -xhost" PKG_CONFIG_PATH=excit/build/lib/pkgconfig ./configure --prefix=`pwd`/build --enable-benchmarks - make -j64 - make install - make check From 93ba5f0c42c7a5b2faab3ee42efe4cee0e4d9298 Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Mon, 28 Jan 2019 10:52:20 -0600 Subject: [PATCH 34/47] Added slice operation to dense layouts. --- src/aml-layout.h | 9 ++++ src/layout.c | 23 +++++++++ src/layout_dense.c | 112 +++++++++++++++++++++++++++++++++++++++- src/layout_pad.c | 4 ++ src/layout_reshape.c | 4 ++ tests/layout.c | 118 +++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 268 insertions(+), 2 deletions(-) diff --git a/src/aml-layout.h b/src/aml-layout.h index fc068be6..5e4781ff 100644 --- a/src/aml-layout.h +++ b/src/aml-layout.h @@ -41,6 +41,11 @@ struct aml_layout_ops { size_t ndims, va_list dims); struct aml_layout * (*areshape)(const struct aml_layout_data *, size_t ndims, const size_t *dims); + struct aml_layout * (*slice)(const struct aml_layout_data *, + va_list dims); + struct aml_layout * (*aslice)(const struct aml_layout_data *, + const size_t *offsets, const size_t *dims, + const size_t *strides); }; struct aml_layout { @@ -60,5 +65,9 @@ struct aml_layout * aml_layout_areshape(const struct aml_layout *l, size_t ndims, const size_t *dims); struct aml_layout * aml_layout_reshape(const struct aml_layout *l, size_t ndims, ...); +struct aml_layout * aml_layout_slice(const struct aml_layout *l, ...); +struct aml_layout * aml_layout_aslice(const struct aml_layout *l, + const size_t *offsets, const size_t *dims, + const size_t *strides); #endif diff --git a/src/layout.c b/src/layout.c index 9a21da26..635b4759 100644 --- a/src/layout.c +++ b/src/layout.c @@ -87,3 +87,26 @@ struct aml_layout * aml_layout_reshape(const struct aml_layout *layout, va_end(ap); return ret; } + +struct aml_layout * aml_layout_slice(const struct aml_layout *layout, ...) +{ + assert(layout != NULL); + assert(layout->ops != NULL); + assert(layout->ops->slice != NULL); + va_list ap; + struct aml_layout *ret; + va_start(ap, layout); + ret = layout->ops->slice(layout->data, ap); + va_end(ap); + return ret; +} + +struct aml_layout * aml_layout_aslice(const struct aml_layout *layout, + const size_t *offsets, const size_t *dims, + const size_t *strides) +{ + assert(layout != NULL); + assert(layout->ops != NULL); + assert(layout->ops->aslice != NULL); + return layout->ops->aslice(layout->data, offsets, dims, strides); +} diff --git a/src/layout_dense.c b/src/layout_dense.c index 7779ed88..9b8099fe 100644 --- a/src/layout_dense.c +++ b/src/layout_dense.c @@ -337,6 +337,54 @@ aml_layout_column_reshape(const struct aml_layout_data *data, size_t ndims, return aml_layout_column_areshape(data, ndims, n_dims); } +struct aml_layout * +aml_layout_column_aslice(const struct aml_layout_data *data, + const size_t *offsets, const size_t *dims, + const size_t *strides) +{ + const struct aml_layout_data_native *d = + (const struct aml_layout_data_native *)data; + size_t ndims = d->ndims; + for (size_t i = 0; i < ndims; i++) + assert(offsets[i] + (dims[i] - 1) * strides[i] < d->dims[i]); + void * ptr = aml_layout_column_aderef(data, offsets); + size_t cpitch[ndims + 1]; + size_t new_strides[ndims]; + cpitch[ndims] = d->cpitch[ndims]; + for (size_t i = 0; i < ndims; i++) { + cpitch[i] = d->cpitch[i]; + new_strides[i] = strides[i] * d->stride[i]; + cpitch[ndims] -= cpitch[i] * offsets[i] * d->stride[i]; + } + void *baseptr = calloc(1, AML_LAYOUT_NATIVE_ALLOCSIZE(ndims)); + struct aml_layout *layout = (struct aml_layout *)baseptr; + aml_layout_native_struct_init(layout, ndims, baseptr); + + aml_layout_native_ainit_cpitch(layout, AML_TYPE_LAYOUT_COLUMN_ORDER, + ptr, ndims, dims, new_strides, cpitch); + layout->ops = &aml_layout_column_ops; + + return layout; +} + +struct aml_layout * +aml_layout_column_slice(const struct aml_layout_data *data, va_list args) +{ + const struct aml_layout_data_native *d = + (const struct aml_layout_data_native *)data; + size_t ndims = d->ndims; + size_t offsets[ndims]; + size_t dims[ndims]; + size_t strides[ndims]; + for (int i = 0; i < ndims; i++) + offsets[i] = va_arg(args, size_t); + for (int i = 0; i < ndims; i++) + dims[i] = va_arg(args, size_t); + for (int i = 0; i < ndims; i++) + strides[i] = va_arg(args, size_t); + return aml_layout_column_aslice(data, offsets, dims, strides); +} + struct aml_layout_ops aml_layout_column_ops = { aml_layout_column_deref, aml_layout_column_aderef, @@ -347,7 +395,9 @@ struct aml_layout_ops aml_layout_column_ops = { aml_layout_column_ndims, aml_layout_column_element_size, aml_layout_column_reshape, - aml_layout_column_areshape + aml_layout_column_areshape, + aml_layout_column_slice, + aml_layout_column_aslice }; /******************************************************************************* @@ -481,6 +531,62 @@ aml_layout_row_reshape(const struct aml_layout_data *data, size_t ndims, } +struct aml_layout * +aml_layout_row_aslice(const struct aml_layout_data *data, + const size_t *offsets, const size_t *dims, + const size_t *strides) +{ + const struct aml_layout_data_native *d = + (const struct aml_layout_data_native *)data; + size_t ndims = d->ndims; + size_t n_offsets[ndims]; + size_t n_dims[ndims]; + size_t n_strides[ndims]; + for (size_t i = 0; i < ndims; i++) { + n_offsets[i] = offsets[ndims - i - 1]; + n_dims[i] = dims[ndims - i - 1]; + n_strides[i] = strides[ndims - i - 1]; + } + for (size_t i = 0; i < ndims; i++) + assert(n_offsets[i] + (n_dims[i] - 1) * n_strides[i] < + d->dims[i]); + void * ptr = aml_layout_column_aderef(data, n_offsets); + size_t cpitch[ndims + 1]; + cpitch[ndims] = d->cpitch[ndims]; + for (size_t i = 0; i < ndims; i++) { + cpitch[i] = d->cpitch[i]; + n_strides[i] *= d->stride[i]; + cpitch[ndims] -= cpitch[i] * n_offsets[i] * d->stride[i]; + } + void *baseptr = calloc(1, AML_LAYOUT_NATIVE_ALLOCSIZE(ndims)); + struct aml_layout *layout = (struct aml_layout *)baseptr; + aml_layout_native_struct_init(layout, ndims, baseptr); + + aml_layout_native_ainit_cpitch(layout, AML_TYPE_LAYOUT_ROW_ORDER, + ptr, ndims, n_dims, n_strides, cpitch); + layout->ops = &aml_layout_row_ops; + + return layout; +} + +struct aml_layout * +aml_layout_row_slice(const struct aml_layout_data *data, va_list args) +{ + const struct aml_layout_data_native *d = + (const struct aml_layout_data_native *)data; + size_t ndims = d->ndims; + size_t offsets[ndims]; + size_t dims[ndims]; + size_t strides[ndims]; + for (int i = 0; i < ndims; i++) + offsets[i] = va_arg(args, size_t); + for (int i = 0; i < ndims; i++) + dims[i] = va_arg(args, size_t); + for (int i = 0; i < ndims; i++) + strides[i] = va_arg(args, size_t); + return aml_layout_row_aslice(data, offsets, dims, strides); +} + struct aml_layout_ops aml_layout_row_ops = { aml_layout_row_deref, aml_layout_row_aderef, @@ -491,6 +597,8 @@ struct aml_layout_ops aml_layout_row_ops = { aml_layout_row_ndims, aml_layout_row_element_size, aml_layout_row_reshape, - aml_layout_row_areshape + aml_layout_row_areshape, + aml_layout_row_slice, + aml_layout_row_aslice }; diff --git a/src/layout_pad.c b/src/layout_pad.c index 2d2fa5a9..7bbc29ac 100644 --- a/src/layout_pad.c +++ b/src/layout_pad.c @@ -229,6 +229,8 @@ struct aml_layout_ops aml_layout_pad_column_ops = { aml_layout_pad_ndims, aml_layout_pad_element_size, NULL, + NULL, + NULL, NULL }; @@ -314,6 +316,8 @@ struct aml_layout_ops aml_layout_pad_row_ops = { aml_layout_pad_ndims, aml_layout_pad_element_size, NULL, + NULL, + NULL, NULL }; diff --git a/src/layout_reshape.c b/src/layout_reshape.c index b5177fc4..d8019c27 100644 --- a/src/layout_reshape.c +++ b/src/layout_reshape.c @@ -247,6 +247,8 @@ struct aml_layout_ops aml_layout_reshape_column_ops = { aml_layout_reshape_ndims, aml_layout_reshape_element_size, NULL, + NULL, + NULL, NULL }; @@ -345,6 +347,8 @@ struct aml_layout_ops aml_layout_reshape_row_ops = { aml_layout_reshape_ndims, aml_layout_reshape_element_size, NULL, + NULL, + NULL, NULL }; diff --git a/tests/layout.c b/tests/layout.c index ed1edd27..83400259 100644 --- a/tests/layout.c +++ b/tests/layout.c @@ -1,6 +1,121 @@ #include #include +void test_slice_contiguous(void) +{ + int memory[6][5][4]; + size_t dims_col[3] = {4, 5, 6}; + size_t dims_row[3] = {6, 5, 4}; + + size_t stride[3] = {1, 1, 1}; + + size_t offsets_col[3] = {2, 2, 3}; + size_t offsets_row[3] = {3, 2, 2}; + + size_t new_dims_col[3] = {2, 3, 3}; + size_t new_dims_row[3] = {3, 3, 2}; + + + int l = 0; + for(size_t i = 0; i < 6; i++) + for(size_t j = 0; j < 5; j++) + for(size_t k = 0; k < 4; k++, l++) + memory[i][j][k] = l; + + struct aml_layout *a; + aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)memory, sizeof(int), 3, dims_col, + stride, dims_col); + struct aml_layout *b = aml_layout_aslice(a, offsets_col, new_dims_col, stride); + assert(AML_TYPE_LAYOUT_COLUMN_ORDER == aml_layout_order(b)); + + for(size_t i = 0; i < 3; i++) + for(size_t j = 0; j < 3; j++) + for(size_t k = 0; k < 2; k++) + { + assert( memory[i+3][j+2][k+2] == *(int *)aml_layout_deref(b, k, j, i)); + fprintf(stderr, "%d == %d\n", memory[i+3][j+2][k+2], *(int *)aml_layout_deref(b, k, j, i)); + } + free(a); + free(b); + + aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_ROW_ORDER, + (void *)memory, sizeof(int), 3, dims_row, + stride, dims_row); + b = aml_layout_aslice(a, offsets_row, new_dims_row, stride); + assert(AML_TYPE_LAYOUT_ROW_ORDER == aml_layout_order(b)); + + for(size_t i = 0; i < 3; i++) + for(size_t j = 0; j < 3; j++) + for(size_t k = 0; k < 2; k++) + { + assert( memory[i+3][j+2][k+2] == *(int *)aml_layout_deref(b, i, j, k)); + fprintf(stderr, "%d == %d\n", memory[i+3][j+2][k+2], *(int *)aml_layout_deref(b, i, j, k)); + } + free(a); + free(b); + +} + +void test_slice_strided(void) +{ + int memory[12][5][8]; + + size_t dims_col[3] = {4, 5, 6}; + size_t dims_row[3] = {6, 5, 4}; + + size_t stride[3] = {2, 1, 2}; + + size_t pitch_col[3] = {8, 5, 12}; + size_t pitch_row[3] = {12, 5, 8}; + + size_t offsets_col[3] = {1, 2, 0}; + size_t offsets_row[3] = {0, 2, 1}; + + size_t new_dims_col[3] = {2, 3, 3}; + size_t new_dims_row[3] = {3, 3, 2}; + + size_t new_stride_col[3] = {2, 1, 1}; + size_t new_stride_row[3] = {1, 1, 2}; + + int l = 0; + for(size_t i = 0; i < 12; i++) + for(size_t j = 0; j < 5; j++) + for(size_t k = 0; k < 8; k++, l++) + memory[i][j][k] = l; + + struct aml_layout *a; + aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)memory, sizeof(int), 3, dims_col, + stride, pitch_col); + struct aml_layout *b = aml_layout_aslice(a, offsets_col, new_dims_col, new_stride_col); + + for(size_t i = 0; i < 3; i++) + for(size_t j = 0; j < 3; j++) + for(size_t k = 0; k < 2; k++) + assert( memory[stride[2] * (offsets_col[2] + new_stride_col[2] * i)][ + stride[1] * (offsets_col[1] + new_stride_col[1] * j)][ + stride[0] * (offsets_col[0] + new_stride_col[0] * k)] == *(int *)aml_layout_deref(b, k, j, i)); + + free(a); + free(b); + + aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_ROW_ORDER, + (void *)memory, sizeof(int), 3, dims_row, + stride, pitch_row); + b = aml_layout_aslice(a, offsets_row, new_dims_row, new_stride_row); + + for(size_t i = 0; i < 3; i++) + for(size_t j = 0; j < 3; j++) + for(size_t k = 0; k < 2; k++) + assert( memory[stride[2] * (offsets_col[2] + new_stride_col[2] * i)][ + stride[1] * (offsets_col[1] + new_stride_col[1] * j)][ + stride[0] * (offsets_col[0] + new_stride_col[0] * k)] == *(int *)aml_layout_deref(b, i, j, k)); + + free(a); + free(b); + +} void test_reshape_contiguous(void) { @@ -332,6 +447,9 @@ int main(int argc, char *argv[]) test_reshape_discontiguous(); test_reshape_strided(); + test_slice_contiguous(); + test_slice_strided(); + aml_finalize(); return 0; } From efbc93b26fbb41d899ebaad54ea280ae1fc94b49 Mon Sep 17 00:00:00 2001 From: Swann Perarnau Date: Mon, 28 Jan 2019 11:20:17 -0600 Subject: [PATCH 35/47] [fix] various typos on new tilings Mostly obvious stuff. --- src/Makefile.am | 13 ++++++++++++- src/aml-tiling-resize.h | 4 ++-- src/aml-tiling.h | 4 ++-- src/aml.h | 2 ++ src/tiling_nd.c | 4 ++-- src/tiling_nd_resize.c | 6 +++--- 6 files changed, 23 insertions(+), 10 deletions(-) diff --git a/src/Makefile.am b/src/Makefile.am index 6bbf5075..0041f1e8 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -19,6 +19,9 @@ TILING_CSOURCES = tiling.c \ tiling_1d.c \ tiling_2d.c +TILING_ND_CSOURCES = tiling_nd.c \ + tiling_nd_resize.c + BINDING_CSOURCES = binding.c \ binding_single.c \ binding_interleave.c @@ -39,13 +42,21 @@ LIBCSOURCES = aml.c area.c arena.c \ $(AREA_LINUX_CSOURCES) \ $(AREA_POSIX_CSOURCES) \ $(TILING_CSOURCES) \ + $(TILING_ND_CSOURCES) \ $(BINDING_CSOURCES) \ $(DMA_CSOURCES) \ $(SCRATCH_CSOURCES) \ $(LAYOUT_CSOURCES) \ copy.c -LIBHSOURCES = aml.h aml-layout.h aml-layout-dense.h aml-layout-pad.h aml-layout-reshape.h aml-copy.h +LIBHSOURCES = aml.h \ + aml-layout.h \ + aml-layout-dense.h \ + aml-layout-pad.h \ + aml-layout-reshape.h \ + aml-tiling.h \ + aml-tiling-resize.h \ + aml-copy.h libaml_la_SOURCES = $(LIBCSOURCES) $(LIBHSOURCES) libaml_la_LIBADD = -L$(top_srcdir)/jemalloc/lib/ -ljemalloc-aml @EXCIT_LIBS@ diff --git a/src/aml-tiling-resize.h b/src/aml-tiling-resize.h index 0e9bc227..99a5a88e 100644 --- a/src/aml-tiling-resize.h +++ b/src/aml-tiling-resize.h @@ -9,10 +9,10 @@ struct aml_tiling_nd_data_resize { size_t *tile_dims; size_t *dims; size_t *border_tile_dims; -} +}; #define AML_TILING_RESIZE_ALLOCSIZE(ndims) (sizeof(struct aml_tiling_nd) +\ - sizeof(struct aml_tiling_nd_resize) +\ + sizeof(struct aml_tiling_nd_data_resize) +\ (ndims * 3) * sizeof(size_t)) int aml_tiling_nd_resize_struct_init(struct aml_tiling_nd *t, size_t ndims, diff --git a/src/aml-tiling.h b/src/aml-tiling.h index c559c6af..7bf2293d 100644 --- a/src/aml-tiling.h +++ b/src/aml-tiling.h @@ -9,8 +9,8 @@ struct aml_tiling_nd_data; #define AML_TYPE_TILING_ORDER (1 << 0) #define AML_TYPE_TILING_MAX (1 << 1) -#define AML_TYPE_LILING_ROW_ORDER 1 -#define AML_TYPE_LILING_COLUMN_ORDER 0 +#define AML_TYPE_TILING_ROW_ORDER 1 +#define AML_TYPE_TILING_COLUMN_ORDER 0 struct aml_tiling_nd_ops { struct aml_layout* (*index)(const struct aml_tiling_nd_data *, diff --git a/src/aml.h b/src/aml.h index 87ea0260..ffce9502 100644 --- a/src/aml.h +++ b/src/aml.h @@ -23,6 +23,8 @@ #include "aml-layout-dense.h" #include "aml-layout-pad.h" #include "aml-layout-reshape.h" +#include "aml-tiling.h" +#include "aml-tiling-resize.h" #include "aml-copy.h" /******************************************************************************* * Forward Declarations: diff --git a/src/tiling_nd.c b/src/tiling_nd.c index b87436d0..b2051884 100644 --- a/src/tiling_nd.c +++ b/src/tiling_nd.c @@ -33,7 +33,7 @@ int aml_tiling_nd_tile_dims(const struct aml_tiling_nd *t, ...) va_list ap; int ret; va_start(ap, t); - ret = t->ops->tile_dims(t->data, ap) + ret = t->ops->tile_dims(t->data, ap); va_end(ap); return ret; } @@ -52,7 +52,7 @@ int aml_tiling_nd_dims(const struct aml_tiling_nd *t, ...) va_list ap; int ret; va_start(ap, t); - ret = t->ops->dims(t->data, ap) + ret = t->ops->dims(t->data, ap); va_end(ap); return ret; } diff --git a/src/tiling_nd_resize.c b/src/tiling_nd_resize.c index 548f5d18..ae941cee 100644 --- a/src/tiling_nd_resize.c +++ b/src/tiling_nd_resize.c @@ -27,7 +27,7 @@ int aml_tiling_nd_resize_ainit(struct aml_tiling_nd *t, uint64_t tags, assert(t != NULL); assert(t->data != NULL); struct aml_tiling_nd_data_resize *data = - (struct aml_layout_data_native *)t->data; + (struct aml_tiling_nd_data_resize *)t->data; assert(data->ndims == ndims); assert(data->tile_dims); assert(data->dims); @@ -49,7 +49,7 @@ int aml_tiling_nd_resize_ainit(struct aml_tiling_nd *t, uint64_t tags, } size_t target_dims[ndims]; aml_layout_adims_column(l, target_dims); - for (size_t i = 0; i < ndims; i++) + for (size_t i = 0; i < ndims; i++) { data->border_tile_dims[i] = target_dims[i] % data->tile_dims[i]; data->dims[i] = target_dims[i] / data->tile_dims[i]; if (data->border_tile_dims[i] == 0) @@ -66,7 +66,7 @@ int aml_tiling_nd_resize_vinit(struct aml_tiling_nd *t, uint64_t tags, { size_t tile_dims[ndims]; for(size_t i = 0; i < ndims; i++) - tile_dims[i] = va_arg(ap, size_t); + tile_dims[i] = va_arg(data, size_t); return aml_tiling_nd_resize_ainit(t, tags, l, ndims, tile_dims); } From 0134a289eb1dc00e3f0d6081a18451af97d9c9b4 Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Mon, 28 Jan 2019 15:18:19 -0600 Subject: [PATCH 36/47] Added native (column) version of some functions. --- src/aml-layout.h | 6 ++ src/layout_dense.c | 36 ++++++- src/layout_pad.c | 14 +-- src/layout_reshape.c | 38 +++---- src/tiling_nd_resize.c | 239 ++++++++++++++++++++++++++++++++++++++++- 5 files changed, 295 insertions(+), 38 deletions(-) diff --git a/src/aml-layout.h b/src/aml-layout.h index 5e4781ff..21a647c3 100644 --- a/src/aml-layout.h +++ b/src/aml-layout.h @@ -31,6 +31,8 @@ struct aml_layout_data; struct aml_layout_ops { void *(*deref)(const struct aml_layout_data *, va_list coords); void *(*aderef)(const struct aml_layout_data *, const size_t *coords); + void *(*aderef_column)(const struct aml_layout_data *, + const size_t *coords); int (*order)(const struct aml_layout_data *); int (*dims)(const struct aml_layout_data *, va_list dim_ptrs); int (*adims)(const struct aml_layout_data *, size_t *dims); @@ -46,6 +48,10 @@ struct aml_layout_ops { struct aml_layout * (*aslice)(const struct aml_layout_data *, const size_t *offsets, const size_t *dims, const size_t *strides); + struct aml_layout * (*aslice_column)(const struct aml_layout_data *, + const size_t *offsets, + const size_t *dims, + const size_t *strides); }; struct aml_layout { diff --git a/src/layout_dense.c b/src/layout_dense.c index 9b8099fe..2c7ea9ee 100644 --- a/src/layout_dense.c +++ b/src/layout_dense.c @@ -388,6 +388,7 @@ aml_layout_column_slice(const struct aml_layout_data *data, va_list args) struct aml_layout_ops aml_layout_column_ops = { aml_layout_column_deref, aml_layout_column_aderef, + aml_layout_column_aderef, aml_layout_column_order, aml_layout_column_dims, aml_layout_column_adims, @@ -397,6 +398,7 @@ struct aml_layout_ops aml_layout_column_ops = { aml_layout_column_reshape, aml_layout_column_areshape, aml_layout_column_slice, + aml_layout_column_aslice, aml_layout_column_aslice }; @@ -587,9 +589,40 @@ aml_layout_row_slice(const struct aml_layout_data *data, va_list args) return aml_layout_row_aslice(data, offsets, dims, strides); } +struct aml_layout * +aml_layout_row_aslice_column(const struct aml_layout_data *data, + const size_t *offsets, const size_t *dims, + const size_t *strides) +{ + const struct aml_layout_data_native *d = + (const struct aml_layout_data_native *)data; + size_t ndims = d->ndims; + for (size_t i = 0; i < ndims; i++) + assert(offsets[i] + (dims[i] - 1) * strides[i] < d->dims[i]); + void * ptr = aml_layout_column_aderef(data, offsets); + size_t cpitch[ndims + 1]; + size_t new_strides[ndims]; + cpitch[ndims] = d->cpitch[ndims]; + for (size_t i = 0; i < ndims; i++) { + cpitch[i] = d->cpitch[i]; + new_strides[i] = strides[i] * d->stride[i]; + cpitch[ndims] -= cpitch[i] * offsets[i] * d->stride[i]; + } + void *baseptr = calloc(1, AML_LAYOUT_NATIVE_ALLOCSIZE(ndims)); + struct aml_layout *layout = (struct aml_layout *)baseptr; + aml_layout_native_struct_init(layout, ndims, baseptr); + + aml_layout_native_ainit_cpitch(layout, AML_TYPE_LAYOUT_ROW_ORDER, + ptr, ndims, dims, new_strides, cpitch); + layout->ops = &aml_layout_row_ops; + + return layout; +} + struct aml_layout_ops aml_layout_row_ops = { aml_layout_row_deref, aml_layout_row_aderef, + aml_layout_column_aderef, aml_layout_row_order, aml_layout_row_dims, aml_layout_row_adims, @@ -599,6 +632,7 @@ struct aml_layout_ops aml_layout_row_ops = { aml_layout_row_reshape, aml_layout_row_areshape, aml_layout_row_slice, - aml_layout_row_aslice + aml_layout_row_aslice, + aml_layout_row_aslice_column }; diff --git a/src/layout_pad.c b/src/layout_pad.c index 7bbc29ac..3e1564bd 100644 --- a/src/layout_pad.c +++ b/src/layout_pad.c @@ -151,15 +151,7 @@ void *aml_layout_pad_column_aderef(const struct aml_layout_data *data, if(coords[i] >= d->target_dims[i]) return d->neutral; } - int type = aml_layout_order(d->target); - if (type == AML_TYPE_LAYOUT_COLUMN_ORDER) - return aml_layout_aderef(d->target, coords); - else { - size_t target_coords[ndims]; - for (int i = 0; i < ndims; i++) - target_coords[i] = coords[ndims - i - 1]; - return aml_layout_aderef(d->target, coords); - } + return d->target->ops->aderef_column(d->target->data, coords); } void *aml_layout_pad_column_deref(const struct aml_layout_data *data, @@ -222,6 +214,7 @@ size_t aml_layout_pad_element_size(const struct aml_layout_data *data) struct aml_layout_ops aml_layout_pad_column_ops = { aml_layout_pad_column_deref, aml_layout_pad_column_aderef, + aml_layout_pad_column_aderef, aml_layout_pad_column_order, aml_layout_pad_column_dims, aml_layout_pad_column_adims, @@ -231,6 +224,7 @@ struct aml_layout_ops aml_layout_pad_column_ops = { NULL, NULL, NULL, + NULL, NULL }; @@ -309,6 +303,7 @@ int aml_layout_pad_row_adims(const struct aml_layout_data *data, size_t *dims) struct aml_layout_ops aml_layout_pad_row_ops = { aml_layout_pad_row_deref, aml_layout_pad_row_aderef, + aml_layout_pad_column_aderef, aml_layout_pad_row_order, aml_layout_pad_row_dims, aml_layout_pad_row_adims, @@ -318,6 +313,7 @@ struct aml_layout_ops aml_layout_pad_row_ops = { NULL, NULL, NULL, + NULL, NULL }; diff --git a/src/layout_reshape.c b/src/layout_reshape.c index d8019c27..9cef96d5 100644 --- a/src/layout_reshape.c +++ b/src/layout_reshape.c @@ -165,20 +165,11 @@ void *aml_layout_reshape_column_aderef(const struct aml_layout_data *data, for (int i = 0; i < ndims; i++) offset += coords[i] * d->coffsets[i]; - int type = aml_layout_order(d->target); - if (type == AML_TYPE_LAYOUT_COLUMN_ORDER) { - for (int i = 0; i < target_ndims; i++) { - target_coords[i] = offset % d->target_dims[i]; - offset /= d->target_dims[i]; - } - } else { - for (int i = 0; i < target_ndims; i++) { - target_coords[target_ndims - i - 1] = - offset % d->target_dims[i]; - offset /= d->target_dims[i]; - } + for (int i = 0; i < target_ndims; i++) { + target_coords[i] = offset % d->target_dims[i]; + offset /= d->target_dims[i]; } - return aml_layout_aderef(d->target, target_coords); + return d->target->ops->aderef_column(d->target->data, target_coords); } void *aml_layout_reshape_column_deref(const struct aml_layout_data *data, @@ -240,6 +231,7 @@ size_t aml_layout_reshape_element_size(const struct aml_layout_data *data) struct aml_layout_ops aml_layout_reshape_column_ops = { aml_layout_reshape_column_deref, aml_layout_reshape_column_aderef, + aml_layout_reshape_column_aderef, aml_layout_reshape_column_order, aml_layout_reshape_column_dims, aml_layout_reshape_column_adims, @@ -249,6 +241,7 @@ struct aml_layout_ops aml_layout_reshape_column_ops = { NULL, NULL, NULL, + NULL, NULL }; @@ -276,20 +269,11 @@ void *aml_layout_reshape_row_aderef(const struct aml_layout_data *data, for (int i = 0; i < ndims; i++) offset += coords[ndims - i - 1] * d->coffsets[i]; - int type = aml_layout_order(d->target); - if (type == AML_TYPE_LAYOUT_COLUMN_ORDER) { - for (int i = 0; i < target_ndims; i++) { - target_coords[i] = offset % d->target_dims[i]; - offset /= d->target_dims[i]; - } - } else { - for (int i = 0; i < target_ndims; i++) { - target_coords[target_ndims - i - 1] = - offset % d->target_dims[i]; - offset /= d->target_dims[i]; - } + for (int i = 0; i < target_ndims; i++) { + target_coords[i] = offset % d->target_dims[i]; + offset /= d->target_dims[i]; } - return aml_layout_aderef(d->target, target_coords); + return d->target->ops->aderef_column(d->target->data, target_coords); } void *aml_layout_reshape_row_deref(const struct aml_layout_data *data, @@ -340,6 +324,7 @@ int aml_layout_reshape_row_adims(const struct aml_layout_data *data, struct aml_layout_ops aml_layout_reshape_row_ops = { aml_layout_reshape_row_deref, aml_layout_reshape_row_aderef, + aml_layout_reshape_column_aderef, aml_layout_reshape_row_order, aml_layout_reshape_row_dims, aml_layout_reshape_row_adims, @@ -349,6 +334,7 @@ struct aml_layout_ops aml_layout_reshape_row_ops = { NULL, NULL, NULL, + NULL, NULL }; diff --git a/src/tiling_nd_resize.c b/src/tiling_nd_resize.c index ae941cee..0352a737 100644 --- a/src/tiling_nd_resize.c +++ b/src/tiling_nd_resize.c @@ -48,12 +48,12 @@ int aml_tiling_nd_resize_ainit(struct aml_tiling_nd *t, uint64_t tags, data->tile_dims[i] = tile_dims[i]; } size_t target_dims[ndims]; - aml_layout_adims_column(l, target_dims); + l->ops->adims_column(l->data, target_dims); for (size_t i = 0; i < ndims; i++) { data->border_tile_dims[i] = target_dims[i] % data->tile_dims[i]; data->dims[i] = target_dims[i] / data->tile_dims[i]; if (data->border_tile_dims[i] == 0) - data->border_tile_dims[i] = target_dims[i]; + data->border_tile_dims[i] = data->tile_dims[i]; else data->dims[i] += 1; } @@ -118,4 +118,239 @@ int aml_tiling_nd_resize_create(struct aml_tiling_nd **t, uint64_t tags, return err; } +/*----------------------------------------------------------------------------*/ +struct aml_layout* +aml_tiling_nd_resize_column_aindex(const struct aml_tiling_nd_data *l, + const size_t *coords) +{ + const struct aml_tiling_nd_data_resize *d = + (const struct aml_tiling_nd_data_resize *)l; + assert(d != NULL); + size_t ndims = d->ndims; + size_t offsets[ndims]; + size_t dims[ndims]; + size_t strides[ndims]; + for(size_t i = 0; i < ndims; i++) + assert(coords[i] < d->dims[i]); + for(size_t i = 0; i < ndims; i++) { + offsets[i] = coords[i] * d->tile_dims[i]; + strides[i] = 1; + } + for(size_t i = 0; i < ndims; i++) + dims[i] = (coords[i] == d->dims[i] - 1 ? + d->border_tile_dims[i] : + d->tile_dims[i] ); + return d->l->ops->aslice_column(d->l->data, offsets, dims, strides); +} + +struct aml_layout* +aml_tiling_nd_resize_column_index(const struct aml_tiling_nd_data *l, + va_list coords) +{ + const struct aml_tiling_nd_data_resize *d = + (const struct aml_tiling_nd_data_resize *)l; + size_t n_coords[d->ndims]; + for(size_t i = 0; i < d->ndims; i++) + n_coords[i] = va_arg(coords, size_t); + return aml_tiling_nd_resize_column_aindex(l, n_coords); +} + +int +aml_tiling_nd_resize_column_order(const struct aml_tiling_nd_data * l) +{ + return AML_TYPE_TILING_COLUMN_ORDER; +} + +int +aml_tiling_nd_resize_column_tile_dims(const struct aml_tiling_nd_data *l, + va_list dims_ptrs) +{ + const struct aml_tiling_nd_data_resize *d = + (const struct aml_tiling_nd_data_resize *)l; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) { + size_t *dim = va_arg(dims_ptrs, size_t*); + assert(dim != NULL); + *dim = d->tile_dims[i]; + } + return 0; +} + +int +aml_tiling_nd_resize_column_tile_adims(const struct aml_tiling_nd_data *l, + size_t *tile_dims) +{ + const struct aml_tiling_nd_data_resize *d = + (const struct aml_tiling_nd_data_resize *)l; + assert(d != NULL); + memcpy((void*)tile_dims, (void*)d->tile_dims, sizeof(size_t)*d->ndims); + return 0; +} + +int +aml_tiling_nd_resize_column_dims(const struct aml_tiling_nd_data *l, + va_list dims_ptrs) +{ + const struct aml_tiling_nd_data_resize *d = + (const struct aml_tiling_nd_data_resize *)l; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) { + size_t *dim = va_arg(dims_ptrs, size_t*); + assert(dim != NULL); + *dim = d->dims[i]; + } + return 0; +} + +int +aml_tiling_nd_resize_column_adims(const struct aml_tiling_nd_data *l, + size_t *dims) +{ + const struct aml_tiling_nd_data_resize *d = + (const struct aml_tiling_nd_data_resize *)l; + assert(d != NULL); + memcpy((void*)dims, (void*)d->dims, sizeof(size_t)*d->ndims); + return 0; +} + +size_t +aml_tiling_nd_resize_column_ndims(const struct aml_tiling_nd_data *l) +{ + const struct aml_tiling_nd_data_resize *d = + (const struct aml_tiling_nd_data_resize *)l; + assert(d != NULL); + return d->ndims; +} + +struct aml_tiling_nd_ops aml_tiling_nd_resize_column_ops = { + aml_tiling_nd_resize_column_index, + aml_tiling_nd_resize_column_aindex, + aml_tiling_nd_resize_column_order, + aml_tiling_nd_resize_column_tile_dims, + aml_tiling_nd_resize_column_tile_adims, + aml_tiling_nd_resize_column_dims, + aml_tiling_nd_resize_column_adims, + aml_tiling_nd_resize_column_ndims +}; + +/*----------------------------------------------------------------------------*/ + +struct aml_layout* +aml_tiling_nd_resize_row_aindex(const struct aml_tiling_nd_data *l, + const size_t *coords) +{ + const struct aml_tiling_nd_data_resize *d = + (const struct aml_tiling_nd_data_resize *)l; + assert(d != NULL); + size_t ndims = d->ndims; + size_t offsets[ndims]; + size_t dims[ndims]; + size_t strides[ndims]; + + for(size_t i = 0; i < ndims; i++) + assert(coords[ndims - i - 1] < d->dims[i]); + for(size_t i = 0; i < ndims; i++) { + offsets[i] = coords[ndims - i - 1] * d->tile_dims[i]; + strides[i] = 1; + } + for(size_t i = 0; i < ndims; i++) + dims[i] = (coords[ndims - i - 1] == d->dims[i] - 1 ? + d->border_tile_dims[i] : + d->tile_dims[i] ); + return d->l->ops->aslice_column(d->l->data, offsets, dims, strides); +} + +struct aml_layout* +aml_tiling_nd_resize_row_index(const struct aml_tiling_nd_data *l, + va_list coords) +{ + const struct aml_tiling_nd_data_resize *d = + (const struct aml_tiling_nd_data_resize *)l; + size_t n_coords[d->ndims]; + for(size_t i = 0; i < d->ndims; i++) + n_coords[i] = va_arg(coords, size_t); + return aml_tiling_nd_resize_row_aindex(l, n_coords); +} + +int +aml_tiling_nd_resize_row_order(const struct aml_tiling_nd_data * l) +{ + return AML_TYPE_TILING_ROW_ORDER; +} + +int +aml_tiling_nd_resize_row_tile_dims(const struct aml_tiling_nd_data *l, + va_list dims_ptrs) +{ + const struct aml_tiling_nd_data_resize *d = + (const struct aml_tiling_nd_data_resize *)l; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) { + size_t *dim = va_arg(dims_ptrs, size_t*); + assert(dim != NULL); + *dim = d->tile_dims[d->ndims - i - 1]; + } + return 0; +} + +int +aml_tiling_nd_resize_row_tile_adims(const struct aml_tiling_nd_data *l, + size_t *tile_dims) +{ + const struct aml_tiling_nd_data_resize *d = + (const struct aml_tiling_nd_data_resize *)l; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) { + tile_dims[i] = d->tile_dims[d->ndims - i - 1]; + } + return 0; +} + +int +aml_tiling_nd_resize_row_dims(const struct aml_tiling_nd_data *l, + va_list dims_ptrs) +{ + const struct aml_tiling_nd_data_resize *d = + (const struct aml_tiling_nd_data_resize *)l; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) { + size_t *dim = va_arg(dims_ptrs, size_t*); + assert(dim != NULL); + *dim = d->dims[d->ndims - i - 1]; + } + return 0; +} + +int +aml_tiling_nd_resize_row_adims(const struct aml_tiling_nd_data *l, + size_t *dims) +{ + const struct aml_tiling_nd_data_resize *d = + (const struct aml_tiling_nd_data_resize *)l; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) { + dims[i] = d->dims[d->ndims - i - 1]; + } + return 0; +} + +size_t +aml_tiling_nd_resize_row_ndims(const struct aml_tiling_nd_data *l) +{ + const struct aml_tiling_nd_data_resize *d = + (const struct aml_tiling_nd_data_resize *)l; + assert(d != NULL); + return d->ndims; +} + +struct aml_tiling_nd_ops aml_tiling_nd_resize_row_ops = { + aml_tiling_nd_resize_row_index, + aml_tiling_nd_resize_row_aindex, + aml_tiling_nd_resize_row_order, + aml_tiling_nd_resize_row_tile_dims, + aml_tiling_nd_resize_row_tile_adims, + aml_tiling_nd_resize_row_dims, + aml_tiling_nd_resize_row_adims, + aml_tiling_nd_resize_row_ndims +}; From a4facf51fb693dda95720b8a6c6e9adc05b09786 Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Mon, 28 Jan 2019 15:18:59 -0600 Subject: [PATCH 37/47] Working version of resizing tiling with tests. --- src/aml-tiling-resize.h | 2 +- tests/Makefile.am | 2 +- tests/tiling_nd.c | 215 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 217 insertions(+), 2 deletions(-) create mode 100644 tests/tiling_nd.c diff --git a/src/aml-tiling-resize.h b/src/aml-tiling-resize.h index 99a5a88e..eb7fee8e 100644 --- a/src/aml-tiling-resize.h +++ b/src/aml-tiling-resize.h @@ -4,7 +4,7 @@ #include struct aml_tiling_nd_data_resize { - struct aml_layout *l; + const struct aml_layout *l; size_t ndims; size_t *tile_dims; size_t *dims; diff --git a/tests/Makefile.am b/tests/Makefile.am index 16d34b70..2219b7c9 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -40,7 +40,7 @@ UNIT_TESTS = $(ARENA_JEMALLOC_TESTS) \ $(AREA_POSIX_TESTS) \ $(DMA_LINUX_TESTS) \ $(SCRATCH_TESTS) \ - layout copy + layout copy tiling_nd # all tests TST_PROGS = $(UNIT_TESTS) diff --git a/tests/tiling_nd.c b/tests/tiling_nd.c new file mode 100644 index 00000000..f301a032 --- /dev/null +++ b/tests/tiling_nd.c @@ -0,0 +1,215 @@ +#include +#include + +void test_tiling_even(void) +{ + int memory[9][10][8]; + int memoryres[9][10][8]; + size_t dims_col[3] = {8, 10, 9}; + size_t dims_row[3] = {9, 10, 8}; + + size_t stride[3] = {1, 1, 1}; + + size_t dims_tile_col[3] = {4, 10, 3}; + size_t dims_tile_row[3] = {3, 10, 4}; + + size_t expected_dims_col[3] = {2, 1, 3}; + size_t expected_dims_row[3] = {3, 1, 2}; + + int l = 0; + for(size_t i = 0; i < 9; i++) + for(size_t j = 0; j < 10; j++) + for(size_t k = 0; k < 8; k++, l++) { + memory[i][j][k] = l; + memoryres[i][j][k] = 0.0; + } + + struct aml_layout *a, *ares; + aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)memory, sizeof(int), 3, dims_col, + stride, dims_col); + aml_layout_native_acreate(&ares, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)memoryres, sizeof(int), 3, dims_col, + stride, dims_col); + + + struct aml_tiling_nd *t, *tres; + aml_tiling_nd_resize_acreate(&t, AML_TYPE_TILING_COLUMN_ORDER, + a, 3, dims_tile_col); + aml_tiling_nd_resize_acreate(&tres, AML_TYPE_TILING_COLUMN_ORDER, + a, 3, dims_tile_col); + + + assert(aml_tiling_nd_order(t) == AML_TYPE_TILING_COLUMN_ORDER); + assert(aml_tiling_nd_ndims(t) == 3); + + size_t dims[3]; + aml_tiling_nd_tile_adims(t, dims); + assert(memcmp(dims, dims_tile_col, 3*sizeof(size_t)) == 0); + aml_tiling_nd_adims(t, dims); + assert(memcmp(dims, expected_dims_col, 3*sizeof(size_t)) == 0); + + for(size_t i = 0; i < expected_dims_col[2]; i++) + for(size_t j = 0; j < expected_dims_col[1]; j++) + for(size_t k = 0; k < expected_dims_col[0]; k++) { + struct aml_layout *b, *bres; + b = aml_tiling_nd_index(t, k, j, i); + bres = aml_tiling_nd_index(tres, k, j, i); + aml_copy_layout_generic(bres, b); + free(b); + free(bres); + } + assert(memcmp(memory, memoryres, 8 * 10 * 9 *sizeof(int))); + + free(a); + free(t); + + aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_ROW_ORDER, + (void *)memory, sizeof(int), 3, dims_row, + stride, dims_row); + + aml_tiling_nd_resize_acreate(&t, AML_TYPE_TILING_ROW_ORDER, + a, 3, dims_tile_row); + + assert(aml_tiling_nd_order(t) == AML_TYPE_TILING_ROW_ORDER); + assert(aml_tiling_nd_ndims(t) == 3); + + aml_tiling_nd_tile_adims(t, dims); + assert(memcmp(dims, dims_tile_row, 3*sizeof(size_t)) == 0); + aml_tiling_nd_adims(t, dims); + assert(memcmp(dims, expected_dims_row, 3*sizeof(size_t)) == 0); + + for(size_t i = 0; i < 9; i++) + for(size_t j = 0; j < 10; j++) + for(size_t k = 0; k < 8; k++, l++) + memoryres[i][j][k] = 0.0; + + for(size_t i = 0; i < expected_dims_col[2]; i++) + for(size_t j = 0; j < expected_dims_col[1]; j++) + for(size_t k = 0; k < expected_dims_col[0]; k++) { + struct aml_layout *b, *bres; + b = aml_tiling_nd_index(t, i, j, k); + bres = aml_tiling_nd_index(tres, k, j, i); + aml_copy_layout_generic(bres, b); + free(b); + free(bres); + } + assert(memcmp(memory, memoryres, 8 * 10 * 9 *sizeof(int))); + + free(a); + free(t); + +} + +void test_tiling_uneven(void) +{ + + int memory[8][10][7]; + int memoryres[9][10][8]; + size_t dims_col[3] = {7, 10, 8}; + size_t dims_row[3] = {8, 10, 7}; + + size_t stride[3] = {1, 1, 1}; + + size_t dims_tile_col[3] = {4, 10, 3}; + size_t dims_tile_row[3] = {3, 10, 4}; + + size_t expected_dims_col[3] = {2, 1, 3}; + size_t expected_dims_row[3] = {3, 1, 2}; + + int l = 0; + for(size_t i = 0; i < 8; i++) + for(size_t j = 0; j < 10; j++) + for(size_t k = 0; k < 7; k++, l++) { + memory[i][j][k] = l; + memoryres[i][j][k] = 0.0; + } + + struct aml_layout *a, *ares; + aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)memory, sizeof(int), 3, dims_col, + stride, dims_col); + aml_layout_native_acreate(&ares, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)memoryres, sizeof(int), 3, dims_col, + stride, dims_col); + + + struct aml_tiling_nd *t, *tres; + aml_tiling_nd_resize_acreate(&t, AML_TYPE_TILING_COLUMN_ORDER, + a, 3, dims_tile_col); + aml_tiling_nd_resize_acreate(&tres, AML_TYPE_TILING_COLUMN_ORDER, + a, 3, dims_tile_col); + + + assert(aml_tiling_nd_order(t) == AML_TYPE_TILING_COLUMN_ORDER); + assert(aml_tiling_nd_ndims(t) == 3); + + size_t dims[3]; + aml_tiling_nd_tile_adims(t, dims); + assert(memcmp(dims, dims_tile_col, 3*sizeof(size_t)) == 0); + aml_tiling_nd_adims(t, dims); + assert(memcmp(dims, expected_dims_col, 3*sizeof(size_t)) == 0); + + for(size_t i = 0; i < expected_dims_col[2]; i++) + for(size_t j = 0; j < expected_dims_col[1]; j++) + for(size_t k = 0; k < expected_dims_col[0]; k++) { + struct aml_layout *b, *bres; + b = aml_tiling_nd_index(t, k, j, i); + bres = aml_tiling_nd_index(tres, k, j, i); + aml_copy_layout_generic(bres, b); + free(b); + free(bres); + } + assert(memcmp(memory, memoryres, 7 * 10 * 8 *sizeof(int))); + + free(a); + free(t); + + aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_ROW_ORDER, + (void *)memory, sizeof(int), 3, dims_row, + stride, dims_row); + + aml_tiling_nd_resize_acreate(&t, AML_TYPE_TILING_ROW_ORDER, + a, 3, dims_tile_row); + + assert(aml_tiling_nd_order(t) == AML_TYPE_TILING_ROW_ORDER); + assert(aml_tiling_nd_ndims(t) == 3); + + aml_tiling_nd_tile_adims(t, dims); + assert(memcmp(dims, dims_tile_row, 3*sizeof(size_t)) == 0); + aml_tiling_nd_adims(t, dims); + assert(memcmp(dims, expected_dims_row, 3*sizeof(size_t)) == 0); + + for(size_t i = 0; i < 8; i++) + for(size_t j = 0; j < 10; j++) + for(size_t k = 0; k < 7; k++, l++) + memoryres[i][j][k] = 0.0; + + for(size_t i = 0; i < expected_dims_col[2]; i++) + for(size_t j = 0; j < expected_dims_col[1]; j++) + for(size_t k = 0; k < expected_dims_col[0]; k++) { + struct aml_layout *b, *bres; + b = aml_tiling_nd_index(t, i, j, k); + bres = aml_tiling_nd_index(tres, k, j, i); + aml_copy_layout_generic(bres, b); + free(b); + free(bres); + } + assert(memcmp(memory, memoryres, 7 * 10 * 8 *sizeof(int))); + + free(a); + free(t); + +} + +int main(int argc, char *argv[]) +{ + /* library initialization */ + aml_init(&argc, &argv); + + test_tiling_even(); + test_tiling_uneven(); + + return 0; +} + From 4863232a04c31af24b64ea1bacf52ad0f834af5e Mon Sep 17 00:00:00 2001 From: Swann Perarnau Date: Mon, 28 Jan 2019 22:48:51 -0600 Subject: [PATCH 38/47] [feature] add layout-aware dma Surprisingly, there's no need to modify the top dma type to add this feature. It's missing operator support, to allow users to specify one of the functions in copy.c as the copy operator. --- src/Makefile.am | 3 +- src/aml-dma-layout.h | 39 ++++++++++ src/aml.h | 1 + src/dma_layout.c | 170 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 212 insertions(+), 1 deletion(-) create mode 100644 src/aml-dma-layout.h create mode 100644 src/dma_layout.c diff --git a/src/Makefile.am b/src/Makefile.am index 0041f1e8..7a2fa3e0 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -28,7 +28,8 @@ BINDING_CSOURCES = binding.c \ DMA_CSOURCES = dma.c \ dma_linux_par.c \ - dma_linux_seq.c + dma_linux_seq.c \ + dma_layout.c SCRATCH_CSOURCES = scratch.c \ scratch_seq.c \ diff --git a/src/aml-dma-layout.h b/src/aml-dma-layout.h new file mode 100644 index 00000000..6148a9c1 --- /dev/null +++ b/src/aml-dma-layout.h @@ -0,0 +1,39 @@ +#ifndef AML_DMA_LAYOUT_H +#define AML_DMA_LAYOUT_H 1 + +/******************************************************************************* + * Layout aware DMA + * DMA using layouts as source and destination. + ******************************************************************************/ + +extern struct aml_dma_ops aml_dma_ops_layout; + +struct aml_dma_request_layout { + int type; + struct aml_layout *dest; + struct aml_layout *src; +}; + +struct aml_dma_layout { + struct aml_vector requests; + pthread_mutex_t lock; + int (*do_work)(struct aml_layout *dest, struct aml_layout *src); +}; + +#define AML_DMA_LAYOUT_DECL(name) \ + struct aml_dma_layout __ ##name## _inner_data; \ + struct aml_dma name = { \ + &aml_dma_ops_layout, \ + (struct aml_dma_data *)&__ ## name ## _inner_data, \ + }; + +#define AML_DMA_LAYOUT_ALLOCSIZE \ + (sizeof(struct aml_dma_layout) + \ + sizeof(struct aml_dma)) + +int aml_dma_layout_create(struct aml_dma **dma, ...); +int aml_dma_layout_init(struct aml_dma *dma, ...); +int aml_dma_layout_vinit(struct aml_dma *dma, va_list args); +int aml_dma_layout_destroy(struct aml_dma *dma); + +#endif diff --git a/src/aml.h b/src/aml.h index ffce9502..98e6c5c2 100644 --- a/src/aml.h +++ b/src/aml.h @@ -1304,6 +1304,7 @@ int aml_dma_wait(struct aml_dma *dma, struct aml_dma_request *req); */ int aml_dma_cancel(struct aml_dma *dma, struct aml_dma_request *req); +#include "aml-dma-layout.h" /******************************************************************************* * Linux Sequential DMA API: * DMA logic implemented based on general linux API, with the caller thread diff --git a/src/dma_layout.c b/src/dma_layout.c new file mode 100644 index 00000000..2e376e17 --- /dev/null +++ b/src/dma_layout.c @@ -0,0 +1,170 @@ +#include +#include +#include +#include + +/******************************************************************************* + * Requests: + ******************************************************************************/ + +int aml_dma_request_layout_init(struct aml_dma_request_layout *req, + struct aml_layout *dl, + struct aml_layout *sl) +{ + assert(req != NULL); + req->type = AML_DMA_REQUEST_TYPE_COPY; + /* figure out pointers */ + req->dest = dl; + req->src = sl; + return 0; +} + +int aml_dma_request_layout_destroy(struct aml_dma_request_layout *r) +{ + assert(r != NULL); + return 0; +} + +/******************************************************************************* + * Internal functions + ******************************************************************************/ +int aml_dma_layout_do_work(struct aml_dma_layout *dma, + struct aml_dma_request_layout *req) +{ + assert(dma != NULL); + assert(req != NULL); + //memcpy(req->dest, req->src, req->size); + return 0; +} + + +/******************************************************************************* + * Public API + ******************************************************************************/ + +int aml_dma_layout_create_request(struct aml_dma_data *d, + struct aml_dma_request **r, + int type, va_list ap) +{ + assert(d != NULL); + assert(r != NULL); + struct aml_dma_layout *dma = + (struct aml_dma_layout *)d; + + struct aml_dma_request_layout *req; + + pthread_mutex_lock(&dma->lock); + req = aml_vector_add(&dma->requests); + + /* we don't support move at this time */ + assert(type == AML_DMA_REQUEST_TYPE_COPY); + struct aml_layout *dl, *sl; + dl = va_arg(ap, struct aml_layout *); + sl = va_arg(ap, struct aml_layout *); + aml_dma_request_layout_init(req, dl, sl); + + pthread_mutex_unlock(&dma->lock); + *r = (struct aml_dma_request *)req; + return 0; +} + +int aml_dma_layout_destroy_request(struct aml_dma_data *d, + struct aml_dma_request *r) +{ + assert(d != NULL); + assert(r != NULL); + struct aml_dma_layout *dma = + (struct aml_dma_layout *)d; + + struct aml_dma_request_layout *req = + (struct aml_dma_request_layout *)r; + + assert(req->type == AML_DMA_REQUEST_TYPE_COPY); + aml_dma_request_layout_destroy(req); + + /* enough to remove from request vector */ + pthread_mutex_lock(&dma->lock); + aml_vector_remove(&dma->requests, req); + pthread_mutex_unlock(&dma->lock); + return 0; +} + +int aml_dma_layout_wait_request(struct aml_dma_data *d, + struct aml_dma_request *r) +{ + assert(d != NULL); + assert(r != NULL); + struct aml_dma_layout *dma = (struct aml_dma_layout *)d; + struct aml_dma_request_layout *req = + (struct aml_dma_request_layout *)r; + + /* execute */ + assert(req->type == AML_DMA_REQUEST_TYPE_COPY); + dma->do_work(dma, req); + + /* destroy a completed request */ + aml_dma_layout_destroy_request(d, r); + return 0; +} + +struct aml_dma_ops aml_dma_ops_layout = { + aml_dma_layout_create_request, + aml_dma_layout_destroy_request, + aml_dma_layout_wait_request, +}; + +/******************************************************************************* + * Init functions: + ******************************************************************************/ + +int aml_dma_layout_create(struct aml_dma **d, ...) +{ + va_list ap; + struct aml_dma *ret = NULL; + intptr_t baseptr, dataptr; + va_start(ap, d); + + /* alloc */ + baseptr = (intptr_t) calloc(1, AML_DMA_LAYOUT_ALLOCSIZE); + dataptr = baseptr + sizeof(struct aml_dma); + + ret = (struct aml_dma *)baseptr; + ret->data = (struct aml_dma_data *)dataptr; + + aml_dma_layout_vinit(ret, ap); + + va_end(ap); + *d = ret; + return 0; +} +int aml_dma_layout_vinit(struct aml_dma *d, va_list ap) +{ + d->ops = &aml_dma_ops_layout; + struct aml_dma_layout *dma = (struct aml_dma_layout *)d->data; + + /* request vector */ + size_t nbreqs = va_arg(ap, size_t); + aml_vector_init(&dma->requests, nbreqs, + sizeof(struct aml_dma_request_layout), + offsetof(struct aml_dma_request_layout, type), + AML_DMA_REQUEST_TYPE_INVALID); + pthread_mutex_init(&dma->lock, NULL); + return 0; +} +int aml_dma_layout_init(struct aml_dma *d, ...) +{ + int err; + va_list ap; + va_start(ap, d); + err = aml_dma_layout_vinit(d, ap); + va_end(ap); + return err; +} + +int aml_dma_layout_destroy(struct aml_dma *d) +{ + struct aml_dma_layout *dma = (struct aml_dma_layout *)d->data; + aml_vector_destroy(&dma->requests); + pthread_mutex_destroy(&dma->lock); + return 0; +} From 8aefa5e2d508a154ca9cfa20a51d072f7d187128 Mon Sep 17 00:00:00 2001 From: Swann Perarnau Date: Tue, 29 Jan 2019 10:49:22 -0600 Subject: [PATCH 39/47] [feature] add dma operator to the layout dma Now the user can specify which of the aml_copy operator to use for dma. --- src/aml-dma-layout.h | 4 +++- src/dma_layout.c | 23 +++++++---------------- 2 files changed, 10 insertions(+), 17 deletions(-) diff --git a/src/aml-dma-layout.h b/src/aml-dma-layout.h index 6148a9c1..f2a57ca8 100644 --- a/src/aml-dma-layout.h +++ b/src/aml-dma-layout.h @@ -12,12 +12,14 @@ struct aml_dma_request_layout { int type; struct aml_layout *dest; struct aml_layout *src; + void *arg; }; +typedef int (*aml_dma_operator)(struct aml_layout *, struct aml_layout *, void*); struct aml_dma_layout { struct aml_vector requests; pthread_mutex_t lock; - int (*do_work)(struct aml_layout *dest, struct aml_layout *src); + aml_dma_operator do_work; }; #define AML_DMA_LAYOUT_DECL(name) \ diff --git a/src/dma_layout.c b/src/dma_layout.c index 2e376e17..f223db8e 100644 --- a/src/dma_layout.c +++ b/src/dma_layout.c @@ -9,13 +9,14 @@ int aml_dma_request_layout_init(struct aml_dma_request_layout *req, struct aml_layout *dl, - struct aml_layout *sl) + struct aml_layout *sl, void *arg) { assert(req != NULL); req->type = AML_DMA_REQUEST_TYPE_COPY; /* figure out pointers */ req->dest = dl; req->src = sl; + req->arg = arg; return 0; } @@ -25,19 +26,6 @@ int aml_dma_request_layout_destroy(struct aml_dma_request_layout *r) return 0; } -/******************************************************************************* - * Internal functions - ******************************************************************************/ -int aml_dma_layout_do_work(struct aml_dma_layout *dma, - struct aml_dma_request_layout *req) -{ - assert(dma != NULL); - assert(req != NULL); - //memcpy(req->dest, req->src, req->size); - return 0; -} - - /******************************************************************************* * Public API ******************************************************************************/ @@ -59,9 +47,11 @@ int aml_dma_layout_create_request(struct aml_dma_data *d, /* we don't support move at this time */ assert(type == AML_DMA_REQUEST_TYPE_COPY); struct aml_layout *dl, *sl; + void *arg; dl = va_arg(ap, struct aml_layout *); sl = va_arg(ap, struct aml_layout *); - aml_dma_request_layout_init(req, dl, sl); + arg = va_arg(ap, void *); + aml_dma_request_layout_init(req, dl, sl, arg); pthread_mutex_unlock(&dma->lock); *r = (struct aml_dma_request *)req; @@ -100,7 +90,7 @@ int aml_dma_layout_wait_request(struct aml_dma_data *d, /* execute */ assert(req->type == AML_DMA_REQUEST_TYPE_COPY); - dma->do_work(dma, req); + dma->do_work(req->dest, req->src, req->arg); /* destroy a completed request */ aml_dma_layout_destroy_request(d, r); @@ -144,6 +134,7 @@ int aml_dma_layout_vinit(struct aml_dma *d, va_list ap) /* request vector */ size_t nbreqs = va_arg(ap, size_t); + dma->do_work = va_arg(ap, aml_dma_operator); aml_vector_init(&dma->requests, nbreqs, sizeof(struct aml_dma_request_layout), offsetof(struct aml_dma_request_layout, type), From 323fe9ea67597adf86b291ccb417a23cd42c6c9d Mon Sep 17 00:00:00 2001 From: Swann Perarnau Date: Tue, 29 Jan 2019 13:27:39 -0600 Subject: [PATCH 40/47] [fix] add tests for the new dma, make it work Add tests based on the copy/transpose tests. This include moving the additional request argument into the dma, as for most purposes that argument will be stable across dma (when moving a single tiling). --- src/aml-dma-layout.h | 2 +- src/dma_layout.c | 9 ++-- tests/Makefile.am | 2 +- tests/dma_layout.c | 111 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 117 insertions(+), 7 deletions(-) create mode 100644 tests/dma_layout.c diff --git a/src/aml-dma-layout.h b/src/aml-dma-layout.h index f2a57ca8..4e3e68a3 100644 --- a/src/aml-dma-layout.h +++ b/src/aml-dma-layout.h @@ -12,7 +12,6 @@ struct aml_dma_request_layout { int type; struct aml_layout *dest; struct aml_layout *src; - void *arg; }; typedef int (*aml_dma_operator)(struct aml_layout *, struct aml_layout *, void*); @@ -20,6 +19,7 @@ struct aml_dma_layout { struct aml_vector requests; pthread_mutex_t lock; aml_dma_operator do_work; + void *work_arg; }; #define AML_DMA_LAYOUT_DECL(name) \ diff --git a/src/dma_layout.c b/src/dma_layout.c index f223db8e..609cab96 100644 --- a/src/dma_layout.c +++ b/src/dma_layout.c @@ -9,14 +9,13 @@ int aml_dma_request_layout_init(struct aml_dma_request_layout *req, struct aml_layout *dl, - struct aml_layout *sl, void *arg) + struct aml_layout *sl) { assert(req != NULL); req->type = AML_DMA_REQUEST_TYPE_COPY; /* figure out pointers */ req->dest = dl; req->src = sl; - req->arg = arg; return 0; } @@ -50,8 +49,7 @@ int aml_dma_layout_create_request(struct aml_dma_data *d, void *arg; dl = va_arg(ap, struct aml_layout *); sl = va_arg(ap, struct aml_layout *); - arg = va_arg(ap, void *); - aml_dma_request_layout_init(req, dl, sl, arg); + aml_dma_request_layout_init(req, dl, sl); pthread_mutex_unlock(&dma->lock); *r = (struct aml_dma_request *)req; @@ -90,7 +88,7 @@ int aml_dma_layout_wait_request(struct aml_dma_data *d, /* execute */ assert(req->type == AML_DMA_REQUEST_TYPE_COPY); - dma->do_work(req->dest, req->src, req->arg); + dma->do_work(req->dest, req->src, dma->work_arg); /* destroy a completed request */ aml_dma_layout_destroy_request(d, r); @@ -135,6 +133,7 @@ int aml_dma_layout_vinit(struct aml_dma *d, va_list ap) /* request vector */ size_t nbreqs = va_arg(ap, size_t); dma->do_work = va_arg(ap, aml_dma_operator); + dma->work_arg = va_arg(ap, void *); aml_vector_init(&dma->requests, nbreqs, sizeof(struct aml_dma_request_layout), offsetof(struct aml_dma_request_layout, type), diff --git a/tests/Makefile.am b/tests/Makefile.am index 2219b7c9..35ccff90 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -40,7 +40,7 @@ UNIT_TESTS = $(ARENA_JEMALLOC_TESTS) \ $(AREA_POSIX_TESTS) \ $(DMA_LINUX_TESTS) \ $(SCRATCH_TESTS) \ - layout copy tiling_nd + layout copy tiling_nd dma_layout # all tests TST_PROGS = $(UNIT_TESTS) diff --git a/tests/dma_layout.c b/tests/dma_layout.c new file mode 100644 index 00000000..3469b2e3 --- /dev/null +++ b/tests/dma_layout.c @@ -0,0 +1,111 @@ +#include +#include + +void test_dma_copy_generic() +{ + size_t elem_number[3] = { 5, 3, 2 }; + size_t c_src_pitch[3] = { 10, 6, 4 }; + size_t src_stride[3] = { 1, 1, 1}; + size_t c_dst_pitch[3] = { 5, 3, 2 }; + size_t dst_stride[3] = { 1, 1, 1}; + + double src[4][6][10]; + double dst[2][3][5]; + + double ref_dst[2][3][5]; + + AML_LAYOUT_NATIVE_DECL(src_layout, 3); + AML_LAYOUT_NATIVE_DECL(dst_layout, 3); + AML_DMA_LAYOUT_DECL(dma); + + /* library initialization */ + + aml_layout_native_ainit(&src_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)src, sizeof(double), 3, elem_number, + src_stride, c_src_pitch); + aml_layout_native_ainit(&dst_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)dst, sizeof(double), 3, elem_number, + dst_stride, c_dst_pitch); + aml_dma_layout_init(&dma, 1, aml_copy_layout_generic, NULL); + + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) { + src[k][j][i] = + (double)(i + j * 10 + k * 10 * 6); + } + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) { + dst[k][j][i] = 0.0; + ref_dst[k][j][i] = src[k][j][i]; + } + + aml_dma_copy(&dma, &dst_layout, &src_layout); + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst[k][j][i] == dst[k][j][i]); + + aml_dma_layout_destroy(&dma); +} + +void test_dma_transpose_generic(void) +{ + size_t elem_number[4] = { 5, 3, 2, 4 }; + size_t elem_number2[4] = { 3, 2, 4, 5 }; + size_t c_src_pitch[4] = { 10, 6, 4, 8 }; + size_t src_stride[4] = { 2, 2, 2, 2 }; + size_t c_dst_pitch[4] = { 3, 2, 4, 5 }; + size_t dst_stride[4] = { 1, 1, 1, 1 }; + + double src[8][4][6][10]; + double dst[5][4][2][3]; + + double ref_dst[5][4][2][3]; + + AML_LAYOUT_NATIVE_DECL(src_layout, 4); + AML_LAYOUT_NATIVE_DECL(dst_layout, 4); + AML_DMA_LAYOUT_DECL(dma); + + aml_layout_native_ainit(&src_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)src, sizeof(double), 4, elem_number, + src_stride, c_src_pitch); + aml_layout_native_ainit(&dst_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)dst, sizeof(double), 4, elem_number2, + dst_stride, c_dst_pitch); + aml_dma_layout_init(&dma, 1, aml_copy_layout_transpose_generic, NULL); + + for (int l = 0; l < 8; l++) + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) { + src[l][k][j][i] = + (double)(i + j * 10 + k * 10 * 6 + + l * 10 * 6 * 4); + } + for (int l = 0; l < 4; l++) + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) { + dst[i][l][k][j] = 0.0; + ref_dst[i][l][k][j] = + src[2 * l][2 * k][2 * j][2 * i]; + } + aml_dma_copy(&dma, &dst_layout, &src_layout); + for (int l = 0; l < 4; l++) + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst[i][l][k][j] == + dst[i][l][k][j]); + aml_dma_layout_destroy(&dma); +} + +int main(int argc, char *argv[]) +{ + aml_init(&argc, &argv); + test_dma_copy_generic(); + aml_finalize(); + return 0; +} From 87e0c55fe84d43fb94667d4765edfce346acb3de Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Tue, 29 Jan 2019 14:59:53 -0600 Subject: [PATCH 41/47] Added padding tiling and corrected tests. --- src/Makefile.am | 4 +- src/aml-tiling-pad.h | 44 +++++ src/aml.h | 1 + src/tiling_nd_pad.c | 426 +++++++++++++++++++++++++++++++++++++++++++ tests/tiling_nd.c | 274 +++++++++++++++++++++++++++- 5 files changed, 740 insertions(+), 9 deletions(-) create mode 100644 src/aml-tiling-pad.h create mode 100644 src/tiling_nd_pad.c diff --git a/src/Makefile.am b/src/Makefile.am index 7a2fa3e0..099f9b47 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -20,7 +20,8 @@ TILING_CSOURCES = tiling.c \ tiling_2d.c TILING_ND_CSOURCES = tiling_nd.c \ - tiling_nd_resize.c + tiling_nd_resize.c \ + tiling_nd_pad.c BINDING_CSOURCES = binding.c \ binding_single.c \ @@ -57,6 +58,7 @@ LIBHSOURCES = aml.h \ aml-layout-reshape.h \ aml-tiling.h \ aml-tiling-resize.h \ + aml-tiling-pad.h \ aml-copy.h libaml_la_SOURCES = $(LIBCSOURCES) $(LIBHSOURCES) diff --git a/src/aml-tiling-pad.h b/src/aml-tiling-pad.h new file mode 100644 index 00000000..46f23d86 --- /dev/null +++ b/src/aml-tiling-pad.h @@ -0,0 +1,44 @@ +#ifndef AML_TILING_PAD_H +#define AML_TILING_PAD_H + +#include + +struct aml_tiling_nd_data_pad { + const struct aml_layout *l; + size_t ndims; + size_t *tile_dims; + size_t *dims; + size_t *border_tile_dims; + size_t *pad; + void *neutral; +}; + +#define AML_TILING_PAD_ALLOCSIZE(ndims, neutral_size) ( \ + sizeof(struct aml_tiling_nd) + \ + sizeof(struct aml_tiling_nd_data_pad) + \ + (ndims * 4) * sizeof(size_t) + \ + neutral_size ) + +int aml_tiling_nd_pad_struct_init(struct aml_tiling_nd *t, size_t ndims, + void *data); +int aml_tiling_nd_pad_ainit(struct aml_tiling_nd *t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + const size_t *tile_dims, void *neutral); +int aml_tiling_nd_pad_vinit(struct aml_tiling_nd *t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + va_list data); +int aml_tiling_nd_pad_init(struct aml_tiling_nd *t, uint64_t tags, + const struct aml_layout *l, size_t ndims, ...); +int aml_tiling_nd_pad_acreate(struct aml_tiling_nd **t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + const size_t *tile_dims, void *neutral); +int aml_tiling_nd_pad_vcreate(struct aml_tiling_nd **t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + va_list data); +int aml_tiling_nd_pad_create(struct aml_tiling_nd **t, uint64_t tags, + const struct aml_layout *l, size_t ndims, ...); + +extern struct aml_tiling_nd_ops aml_tiling_nd_pad_column_ops; +extern struct aml_tiling_nd_ops aml_tiling_nd_pad_row_ops; + +#endif diff --git a/src/aml.h b/src/aml.h index 98e6c5c2..32b2230b 100644 --- a/src/aml.h +++ b/src/aml.h @@ -25,6 +25,7 @@ #include "aml-layout-reshape.h" #include "aml-tiling.h" #include "aml-tiling-resize.h" +#include "aml-tiling-pad.h" #include "aml-copy.h" /******************************************************************************* * Forward Declarations: diff --git a/src/tiling_nd_pad.c b/src/tiling_nd_pad.c new file mode 100644 index 00000000..8445ddc2 --- /dev/null +++ b/src/tiling_nd_pad.c @@ -0,0 +1,426 @@ +#include + +int aml_tiling_nd_pad_struct_init(struct aml_tiling_nd *t, size_t ndims, + void *memory) +{ + struct aml_tiling_nd_data_pad *dataptr; + + assert(t == (struct aml_tiling_nd *)memory); + memory = (void *)((uintptr_t)memory + + sizeof(struct aml_tiling_nd)); + dataptr = memory; + t->data = memory; + memory = (void *)((uintptr_t)memory + + sizeof(struct aml_tiling_nd_data_pad)); + dataptr->l = NULL; + dataptr->ndims = ndims; + dataptr->tile_dims = (size_t *)memory; + dataptr->dims = dataptr->tile_dims + ndims; + dataptr->border_tile_dims = dataptr->dims + ndims; + dataptr->pad = dataptr->border_tile_dims + ndims; + dataptr->neutral = (void *)(dataptr->pad + ndims); + return 0; +} + +int aml_tiling_nd_pad_ainit(struct aml_tiling_nd *t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + const size_t *tile_dims, void *neutral) +{ + assert(t != NULL); + assert(t->data != NULL); + struct aml_tiling_nd_data_pad *data = + (struct aml_tiling_nd_data_pad *)t->data; + size_t element_size = aml_layout_element_size(l); + assert(data->ndims == ndims); + assert(data->tile_dims); + assert(data->dims); + assert(data->border_tile_dims); + assert(data->pad); + assert(data->neutral); + data->l = l; + int type = AML_TYPE_GET(tags, AML_TYPE_TILING_ORDER); + if (type == AML_TYPE_TILING_ROW_ORDER) { + AML_TYPE_SET(t->tags, AML_TYPE_TILING_ORDER, + AML_TYPE_TILING_ROW_ORDER); + t->ops = &aml_tiling_nd_pad_row_ops; + for (size_t i = 0; i < ndims; i++) + data->tile_dims[i] = tile_dims[ndims-i-1]; + } else { + AML_TYPE_SET(t->tags, AML_TYPE_TILING_ORDER, + AML_TYPE_TILING_COLUMN_ORDER); + t->ops = &aml_tiling_nd_pad_column_ops; + for (size_t i = 0; i < ndims; i++) + data->tile_dims[i] = tile_dims[i]; + } + size_t target_dims[ndims]; + l->ops->adims_column(l->data, target_dims); + for (size_t i = 0; i < ndims; i++) { + data->border_tile_dims[i] = target_dims[i] % data->tile_dims[i]; + data->dims[i] = target_dims[i] / data->tile_dims[i]; + if (data->border_tile_dims[i] == 0) + data->border_tile_dims[i] = data->tile_dims[i]; + else { + data->dims[i] += 1; + data->pad[i] = 1; + } + } + memcpy(data->neutral, neutral, element_size); + return 0; +} + +int aml_tiling_nd_pad_vinit(struct aml_tiling_nd *t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + va_list data) +{ + size_t tile_dims[ndims]; + void *neutral; + for(size_t i = 0; i < ndims; i++) + tile_dims[i] = va_arg(data, size_t); + neutral = va_arg(data, void*); + return aml_tiling_nd_pad_ainit(t, tags, l, ndims, tile_dims, neutral); +} + +int aml_tiling_nd_pad_init(struct aml_tiling_nd *t, uint64_t tags, + const struct aml_layout *l, size_t ndims, ...) +{ + int err; + va_list ap; + va_start(ap, ndims); + err = aml_tiling_nd_pad_vinit(t, tags, l, ndims, ap); + va_end(ap); + return err; +} + +int aml_tiling_nd_pad_acreate(struct aml_tiling_nd **t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + const size_t *tile_dims, void *neutral) +{ + assert(ndims > 0); + size_t element_size = aml_layout_element_size(l); + void *baseptr = calloc(1, AML_TILING_PAD_ALLOCSIZE(ndims, + element_size)); + *t = (struct aml_tiling_nd *)baseptr; + aml_tiling_nd_pad_struct_init(*t, ndims, baseptr); + return aml_tiling_nd_pad_ainit(*t, tags, l, ndims, tile_dims, neutral); +} + +int aml_tiling_nd_pad_vcreate(struct aml_tiling_nd **t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + va_list data) +{ + assert(ndims > 0); + size_t element_size = aml_layout_element_size(l); + void *baseptr = calloc(1, AML_TILING_PAD_ALLOCSIZE(ndims, + element_size)); + *t = (struct aml_tiling_nd *)baseptr; + aml_tiling_nd_pad_struct_init(*t, ndims, baseptr); + return aml_tiling_nd_pad_vinit(*t, tags, l, ndims, data); +} + +int aml_tiling_nd_pad_create(struct aml_tiling_nd **t, uint64_t tags, + const struct aml_layout *l, size_t ndims, ...) +{ + int err; + va_list ap; + assert(ndims > 0); + size_t element_size = aml_layout_element_size(l); + void *baseptr = calloc(1, AML_TILING_PAD_ALLOCSIZE(ndims, + element_size)); + *t = (struct aml_tiling_nd *)baseptr; + aml_tiling_nd_pad_struct_init(*t, ndims, baseptr); + va_start(ap, ndims); + err = aml_tiling_nd_pad_vinit(*t, tags, l, ndims, ap); + va_end(ap); + return err; +} + +/*----------------------------------------------------------------------------*/ + +struct aml_layout* +aml_tiling_nd_pad_column_aindex(const struct aml_tiling_nd_data *l, + const size_t *coords) +{ + const struct aml_tiling_nd_data_pad *d = + (const struct aml_tiling_nd_data_pad *)l; + assert(d != NULL); + size_t ndims = d->ndims; + size_t offsets[ndims]; + size_t dims[ndims]; + size_t strides[ndims]; + for (size_t i = 0; i < ndims; i++) + assert(coords[i] < d->dims[i]); + for (size_t i = 0; i < ndims; i++) { + offsets[i] = coords[i] * d->tile_dims[i]; + strides[i] = 1; + } + + int pad = 0; + for (size_t i = 0; i < ndims; i++) { + if (coords[i] == d->dims[i] - 1) { + dims[i] = d->border_tile_dims[i]; + if (d->pad[i]) + pad = 1; + } else + dims[i] = d->tile_dims[i]; + } + struct aml_layout *res = d->l->ops->aslice_column(d->l->data, offsets, + dims, strides); + if (pad) { + struct aml_layout *p_layout; + int order = aml_layout_order(d->l); + if (order == AML_TYPE_LAYOUT_COLUMN_ORDER) { + /* WARNING: OWNERSHIP!!! */ + aml_layout_pad_acreate(&p_layout, + AML_TYPE_LAYOUT_COLUMN_ORDER, + res, d->tile_dims, d->neutral); + } else { + size_t row_dims[ndims]; + for (size_t i = 0; i < ndims; i++) + row_dims[i] = d->tile_dims[i]; + /* WARNING: OWNERSHIP!!! */ + aml_layout_pad_acreate(&p_layout, + AML_TYPE_LAYOUT_ROW_ORDER, + res, row_dims, d->neutral); + } + return p_layout; + } else + return res; +} + +struct aml_layout* +aml_tiling_nd_pad_column_index(const struct aml_tiling_nd_data *l, + va_list coords) +{ + const struct aml_tiling_nd_data_pad *d = + (const struct aml_tiling_nd_data_pad *)l; + size_t n_coords[d->ndims]; + for (size_t i = 0; i < d->ndims; i++) + n_coords[i] = va_arg(coords, size_t); + return aml_tiling_nd_pad_column_aindex(l, n_coords); +} + +int +aml_tiling_nd_pad_column_order(const struct aml_tiling_nd_data * l) +{ + return AML_TYPE_TILING_COLUMN_ORDER; +} + +int +aml_tiling_nd_pad_column_tile_dims(const struct aml_tiling_nd_data *l, + va_list dims_ptrs) +{ + const struct aml_tiling_nd_data_pad *d = + (const struct aml_tiling_nd_data_pad *)l; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) { + size_t *dim = va_arg(dims_ptrs, size_t*); + assert(dim != NULL); + *dim = d->tile_dims[i]; + } + return 0; +} + +int +aml_tiling_nd_pad_column_tile_adims(const struct aml_tiling_nd_data *l, + size_t *tile_dims) +{ + const struct aml_tiling_nd_data_pad *d = + (const struct aml_tiling_nd_data_pad *)l; + assert(d != NULL); + memcpy((void*)tile_dims, (void*)d->tile_dims, sizeof(size_t)*d->ndims); + return 0; +} + +int +aml_tiling_nd_pad_column_dims(const struct aml_tiling_nd_data *l, + va_list dims_ptrs) +{ + const struct aml_tiling_nd_data_pad *d = + (const struct aml_tiling_nd_data_pad *)l; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) { + size_t *dim = va_arg(dims_ptrs, size_t*); + assert(dim != NULL); + *dim = d->dims[i]; + } + return 0; +} + +int +aml_tiling_nd_pad_column_adims(const struct aml_tiling_nd_data *l, + size_t *dims) +{ + const struct aml_tiling_nd_data_pad *d = + (const struct aml_tiling_nd_data_pad *)l; + assert(d != NULL); + memcpy((void*)dims, (void*)d->dims, sizeof(size_t)*d->ndims); + return 0; +} + +size_t +aml_tiling_nd_pad_column_ndims(const struct aml_tiling_nd_data *l) +{ + const struct aml_tiling_nd_data_pad *d = + (const struct aml_tiling_nd_data_pad *)l; + assert(d != NULL); + return d->ndims; +} + +struct aml_tiling_nd_ops aml_tiling_nd_pad_column_ops = { + aml_tiling_nd_pad_column_index, + aml_tiling_nd_pad_column_aindex, + aml_tiling_nd_pad_column_order, + aml_tiling_nd_pad_column_tile_dims, + aml_tiling_nd_pad_column_tile_adims, + aml_tiling_nd_pad_column_dims, + aml_tiling_nd_pad_column_adims, + aml_tiling_nd_pad_column_ndims +}; + +/*----------------------------------------------------------------------------*/ + +struct aml_layout* +aml_tiling_nd_pad_row_aindex(const struct aml_tiling_nd_data *l, + const size_t *coords) +{ + const struct aml_tiling_nd_data_pad *d = + (const struct aml_tiling_nd_data_pad *)l; + assert(d != NULL); + size_t ndims = d->ndims; + size_t offsets[ndims]; + size_t dims[ndims]; + size_t strides[ndims]; + + for(size_t i = 0; i < ndims; i++) + assert(coords[ndims - i - 1] < d->dims[i]); + for(size_t i = 0; i < ndims; i++) { + offsets[i] = coords[ndims - i - 1] * d->tile_dims[i]; + strides[i] = 1; + } + + int pad = 0; + for (size_t i = 0; i < ndims; i++) { + if (coords[ndims - i - 1] == d->dims[i] - 1) { + dims[i] = d->border_tile_dims[i]; + if (d->pad[i]) + pad = 1; + } else + dims[i] = d->tile_dims[i]; + } + struct aml_layout *res = d->l->ops->aslice_column(d->l->data, offsets, + dims, strides); + if (pad) { + struct aml_layout *p_layout; + int order = aml_layout_order(d->l); + if (order == AML_TYPE_LAYOUT_COLUMN_ORDER) { + /* WARNING: OWNERSHIP!!! */ + aml_layout_pad_acreate(&p_layout, + AML_TYPE_LAYOUT_COLUMN_ORDER, + res, d->tile_dims, d->neutral); + } else { + size_t row_dims[ndims]; + for (size_t i = 0; i < ndims; i++) + row_dims[i] = d->tile_dims[ndims - i - 1]; + /* WARNING: OWNERSHIP!!! */ + aml_layout_pad_acreate(&p_layout, + AML_TYPE_LAYOUT_ROW_ORDER, + res, row_dims, d->neutral); + } + return p_layout; + } else + return res; +} + +struct aml_layout* +aml_tiling_nd_pad_row_index(const struct aml_tiling_nd_data *l, + va_list coords) +{ + const struct aml_tiling_nd_data_pad *d = + (const struct aml_tiling_nd_data_pad *)l; + size_t n_coords[d->ndims]; + for(size_t i = 0; i < d->ndims; i++) + n_coords[i] = va_arg(coords, size_t); + return aml_tiling_nd_pad_row_aindex(l, n_coords); +} + +int +aml_tiling_nd_pad_row_order(const struct aml_tiling_nd_data * l) +{ + return AML_TYPE_TILING_ROW_ORDER; +} + +int +aml_tiling_nd_pad_row_tile_dims(const struct aml_tiling_nd_data *l, + va_list dims_ptrs) +{ + const struct aml_tiling_nd_data_pad *d = + (const struct aml_tiling_nd_data_pad *)l; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) { + size_t *dim = va_arg(dims_ptrs, size_t*); + assert(dim != NULL); + *dim = d->tile_dims[d->ndims - i - 1]; + } + return 0; +} + +int +aml_tiling_nd_pad_row_tile_adims(const struct aml_tiling_nd_data *l, + size_t *tile_dims) +{ + const struct aml_tiling_nd_data_pad *d = + (const struct aml_tiling_nd_data_pad *)l; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) { + tile_dims[i] = d->tile_dims[d->ndims - i - 1]; + } + return 0; +} + +int +aml_tiling_nd_pad_row_dims(const struct aml_tiling_nd_data *l, + va_list dims_ptrs) +{ + const struct aml_tiling_nd_data_pad *d = + (const struct aml_tiling_nd_data_pad *)l; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) { + size_t *dim = va_arg(dims_ptrs, size_t*); + assert(dim != NULL); + *dim = d->dims[d->ndims - i - 1]; + } + return 0; +} + +int +aml_tiling_nd_pad_row_adims(const struct aml_tiling_nd_data *l, + size_t *dims) +{ + const struct aml_tiling_nd_data_pad *d = + (const struct aml_tiling_nd_data_pad *)l; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) { + dims[i] = d->dims[d->ndims - i - 1]; + } + return 0; +} + +size_t +aml_tiling_nd_pad_row_ndims(const struct aml_tiling_nd_data *l) +{ + const struct aml_tiling_nd_data_pad *d = + (const struct aml_tiling_nd_data_pad *)l; + assert(d != NULL); + return d->ndims; +} + +struct aml_tiling_nd_ops aml_tiling_nd_pad_row_ops = { + aml_tiling_nd_pad_row_index, + aml_tiling_nd_pad_row_aindex, + aml_tiling_nd_pad_row_order, + aml_tiling_nd_pad_row_tile_dims, + aml_tiling_nd_pad_row_tile_adims, + aml_tiling_nd_pad_row_dims, + aml_tiling_nd_pad_row_adims, + aml_tiling_nd_pad_row_ndims +}; diff --git a/tests/tiling_nd.c b/tests/tiling_nd.c index f301a032..de3a8605 100644 --- a/tests/tiling_nd.c +++ b/tests/tiling_nd.c @@ -37,7 +37,7 @@ void test_tiling_even(void) aml_tiling_nd_resize_acreate(&t, AML_TYPE_TILING_COLUMN_ORDER, a, 3, dims_tile_col); aml_tiling_nd_resize_acreate(&tres, AML_TYPE_TILING_COLUMN_ORDER, - a, 3, dims_tile_col); + ares, 3, dims_tile_col); assert(aml_tiling_nd_order(t) == AML_TYPE_TILING_COLUMN_ORDER); @@ -59,17 +59,25 @@ void test_tiling_even(void) free(b); free(bres); } - assert(memcmp(memory, memoryres, 8 * 10 * 9 *sizeof(int))); + assert(memcmp(memory, memoryres, 8 * 10 * 9 *sizeof(int)) == 0); free(a); + free(ares); free(t); + free(tres); aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_ROW_ORDER, (void *)memory, sizeof(int), 3, dims_row, stride, dims_row); + aml_layout_native_acreate(&ares, AML_TYPE_LAYOUT_ROW_ORDER, + (void *)memoryres, sizeof(int), 3, dims_row, + stride, dims_row); + aml_tiling_nd_resize_acreate(&t, AML_TYPE_TILING_ROW_ORDER, a, 3, dims_tile_row); + aml_tiling_nd_resize_acreate(&tres, AML_TYPE_TILING_ROW_ORDER, + ares, 3, dims_tile_row); assert(aml_tiling_nd_order(t) == AML_TYPE_TILING_ROW_ORDER); assert(aml_tiling_nd_ndims(t) == 3); @@ -89,15 +97,17 @@ void test_tiling_even(void) for(size_t k = 0; k < expected_dims_col[0]; k++) { struct aml_layout *b, *bres; b = aml_tiling_nd_index(t, i, j, k); - bres = aml_tiling_nd_index(tres, k, j, i); + bres = aml_tiling_nd_index(tres, i, j, k); aml_copy_layout_generic(bres, b); free(b); free(bres); } - assert(memcmp(memory, memoryres, 8 * 10 * 9 *sizeof(int))); + assert(memcmp(memory, memoryres, 8 * 10 * 9 *sizeof(int)) == 0); free(a); + free(ares); free(t); + free(tres); } @@ -105,7 +115,7 @@ void test_tiling_uneven(void) { int memory[8][10][7]; - int memoryres[9][10][8]; + int memoryres[8][10][7]; size_t dims_col[3] = {7, 10, 8}; size_t dims_row[3] = {8, 10, 7}; @@ -138,7 +148,7 @@ void test_tiling_uneven(void) aml_tiling_nd_resize_acreate(&t, AML_TYPE_TILING_COLUMN_ORDER, a, 3, dims_tile_col); aml_tiling_nd_resize_acreate(&tres, AML_TYPE_TILING_COLUMN_ORDER, - a, 3, dims_tile_col); + ares, 3, dims_tile_col); assert(aml_tiling_nd_order(t) == AML_TYPE_TILING_COLUMN_ORDER); @@ -160,17 +170,24 @@ void test_tiling_uneven(void) free(b); free(bres); } - assert(memcmp(memory, memoryres, 7 * 10 * 8 *sizeof(int))); + assert(memcmp(memory, memoryres, 7 * 10 * 8 *sizeof(int)) == 0); free(a); + free(ares); free(t); + free(tres); aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_ROW_ORDER, (void *)memory, sizeof(int), 3, dims_row, stride, dims_row); + aml_layout_native_acreate(&ares, AML_TYPE_LAYOUT_ROW_ORDER, + (void *)memoryres, sizeof(int), 3, dims_row, + stride, dims_row); aml_tiling_nd_resize_acreate(&t, AML_TYPE_TILING_ROW_ORDER, a, 3, dims_tile_row); + aml_tiling_nd_resize_acreate(&tres, AML_TYPE_TILING_ROW_ORDER, + ares, 3, dims_tile_row); assert(aml_tiling_nd_order(t) == AML_TYPE_TILING_ROW_ORDER); assert(aml_tiling_nd_ndims(t) == 3); @@ -190,15 +207,254 @@ void test_tiling_uneven(void) for(size_t k = 0; k < expected_dims_col[0]; k++) { struct aml_layout *b, *bres; b = aml_tiling_nd_index(t, i, j, k); + bres = aml_tiling_nd_index(tres, i, j, k); + aml_copy_layout_generic(bres, b); + free(b); + free(bres); + } + assert(memcmp(memory, memoryres, 7 * 10 * 8 *sizeof(int)) == 0); + + free(a); + free(ares); + free(t); + free(tres); + +} + +void test_tiling_pad_even(void) +{ + int memory[9][10][8]; + int memoryres[9][10][8]; + size_t dims_col[3] = {8, 10, 9}; + size_t dims_row[3] = {9, 10, 8}; + + size_t stride[3] = {1, 1, 1}; + + size_t dims_tile_col[3] = {4, 10, 3}; + size_t dims_tile_row[3] = {3, 10, 4}; + + size_t expected_dims_col[3] = {2, 1, 3}; + size_t expected_dims_row[3] = {3, 1, 2}; + + int l = 0; + for(size_t i = 0; i < 9; i++) + for(size_t j = 0; j < 10; j++) + for(size_t k = 0; k < 8; k++, l++) { + memory[i][j][k] = l; + memoryres[i][j][k] = 0.0; + } + + struct aml_layout *a, *ares; + aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)memory, sizeof(int), 3, dims_col, + stride, dims_col); + aml_layout_native_acreate(&ares, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)memoryres, sizeof(int), 3, dims_col, + stride, dims_col); + + + struct aml_tiling_nd *t, *tres; + int neutral = 0xdeadbeef; + aml_tiling_nd_pad_acreate(&t, AML_TYPE_TILING_COLUMN_ORDER, + a, 3, dims_tile_col, &neutral); + aml_tiling_nd_pad_acreate(&tres, AML_TYPE_TILING_COLUMN_ORDER, + ares, 3, dims_tile_col, &neutral); + + + assert(aml_tiling_nd_order(t) == AML_TYPE_TILING_COLUMN_ORDER); + assert(aml_tiling_nd_ndims(t) == 3); + + size_t dims[3]; + aml_tiling_nd_tile_adims(t, dims); + assert(memcmp(dims, dims_tile_col, 3*sizeof(size_t)) == 0); + aml_tiling_nd_adims(t, dims); + assert(memcmp(dims, expected_dims_col, 3*sizeof(size_t)) == 0); + + for(size_t i = 0; i < expected_dims_col[2]; i++) + for(size_t j = 0; j < expected_dims_col[1]; j++) + for(size_t k = 0; k < expected_dims_col[0]; k++) { + struct aml_layout *b, *bres; + b = aml_tiling_nd_index(t, k, j, i); + bres = aml_tiling_nd_index(tres, k, j, i); + aml_copy_layout_generic(bres, b); + free(b); + free(bres); + } + assert(memcmp(memory, memoryres, 8 * 10 * 9 *sizeof(int)) == 0); + + free(a); + free(ares); + free(t); + free(tres); + + aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_ROW_ORDER, + (void *)memory, sizeof(int), 3, dims_row, + stride, dims_row); + aml_layout_native_acreate(&ares, AML_TYPE_LAYOUT_ROW_ORDER, + (void *)memoryres, sizeof(int), 3, dims_row, + stride, dims_row); + + aml_tiling_nd_pad_acreate(&t, AML_TYPE_TILING_ROW_ORDER, + a, 3, dims_tile_row, &neutral); + aml_tiling_nd_pad_acreate(&tres, AML_TYPE_TILING_ROW_ORDER, + ares, 3, dims_tile_row, &neutral); + + assert(aml_tiling_nd_order(t) == AML_TYPE_TILING_ROW_ORDER); + assert(aml_tiling_nd_ndims(t) == 3); + + aml_tiling_nd_tile_adims(t, dims); + assert(memcmp(dims, dims_tile_row, 3*sizeof(size_t)) == 0); + aml_tiling_nd_adims(t, dims); + assert(memcmp(dims, expected_dims_row, 3*sizeof(size_t)) == 0); + + for(size_t i = 0; i < 9; i++) + for(size_t j = 0; j < 10; j++) + for(size_t k = 0; k < 8; k++, l++) + memoryres[i][j][k] = 0.0; + + for(size_t i = 0; i < expected_dims_col[2]; i++) + for(size_t j = 0; j < expected_dims_col[1]; j++) + for(size_t k = 0; k < expected_dims_col[0]; k++) { + struct aml_layout *b, *bres; + b = aml_tiling_nd_index(t, i, j, k); + bres = aml_tiling_nd_index(tres, i, j, k); + aml_copy_layout_generic(bres, b); + free(b); + free(bres); + } + assert(memcmp(memory, memoryres, 8 * 10 * 9 *sizeof(int)) == 0); + + free(a); + free(ares); + free(t); + free(tres); + +} + +void test_tiling_pad_uneven(void) +{ + + int memory[8][10][7]; + int memoryres[9][10][8]; + size_t dims_col[3] = {7, 10, 8}; + size_t dims_row[3] = {8, 10, 7}; + size_t dims_col_res[3] = {8, 10, 9}; + size_t dims_row_res[3] = {9, 10, 8}; + + size_t stride[3] = {1, 1, 1}; + + size_t dims_tile_col[3] = {4, 10, 3}; + size_t dims_tile_row[3] = {3, 10, 4}; + + size_t expected_dims_col[3] = {2, 1, 3}; + size_t expected_dims_row[3] = {3, 1, 2}; + + int l = 0; + for(size_t i = 0; i < 8; i++) + for(size_t j = 0; j < 10; j++) + for(size_t k = 0; k < 7; k++, l++) + memory[i][j][k] = l; + + for(size_t i = 0; i < 9; i++) + for(size_t j = 0; j < 10; j++) + for(size_t k = 0; k < 8; k++, l++) + memoryres[i][j][k] = 0.0; + + + struct aml_layout *a, *ares; + aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)memory, sizeof(int), 3, dims_col, + stride, dims_col); + aml_layout_native_acreate(&ares, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)memoryres, sizeof(int), 3, dims_col_res, + stride, dims_col_res); + + + struct aml_tiling_nd *t, *tres; + int neutral = 0xdeadbeef; + aml_tiling_nd_pad_acreate(&t, AML_TYPE_TILING_COLUMN_ORDER, + a, 3, dims_tile_col, &neutral); + aml_tiling_nd_pad_acreate(&tres, AML_TYPE_TILING_COLUMN_ORDER, + ares, 3, dims_tile_col, &neutral); + + + assert(aml_tiling_nd_order(t) == AML_TYPE_TILING_COLUMN_ORDER); + assert(aml_tiling_nd_ndims(t) == 3); + + size_t dims[3]; + aml_tiling_nd_tile_adims(t, dims); + assert(memcmp(dims, dims_tile_col, 3*sizeof(size_t)) == 0); + aml_tiling_nd_adims(t, dims); + assert(memcmp(dims, expected_dims_col, 3*sizeof(size_t)) == 0); + + for(size_t i = 0; i < expected_dims_col[2]; i++) + for(size_t j = 0; j < expected_dims_col[1]; j++) + for(size_t k = 0; k < expected_dims_col[0]; k++) { + struct aml_layout *b, *bres; + b = aml_tiling_nd_index(t, k, j, i); bres = aml_tiling_nd_index(tres, k, j, i); aml_copy_layout_generic(bres, b); free(b); free(bres); } - assert(memcmp(memory, memoryres, 7 * 10 * 8 *sizeof(int))); + for(size_t i = 0; i < 9; i++) + for(size_t j = 0; j < 10; j++) + for(size_t k = 0; k < 8; k++, l++) + if( k >= 7 || i >= 8) + assert(memoryres[i][j][k] == 0xdeadbeef); + else + assert(memoryres[i][j][k] == memory[i][j][k]); + + free(a); + free(t); + + aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_ROW_ORDER, + (void *)memory, sizeof(int), 3, dims_row, + stride, dims_row); + aml_layout_native_acreate(&ares, AML_TYPE_LAYOUT_ROW_ORDER, + (void *)memoryres, sizeof(int), 3, dims_row_res, + stride, dims_row_res); + + aml_tiling_nd_pad_acreate(&t, AML_TYPE_TILING_ROW_ORDER, + a, 3, dims_tile_row, &neutral); + aml_tiling_nd_pad_acreate(&tres, AML_TYPE_TILING_ROW_ORDER, + ares, 3, dims_tile_row, &neutral); + + assert(aml_tiling_nd_order(t) == AML_TYPE_TILING_ROW_ORDER); + assert(aml_tiling_nd_ndims(t) == 3); + + aml_tiling_nd_tile_adims(t, dims); + assert(memcmp(dims, dims_tile_row, 3*sizeof(size_t)) == 0); + aml_tiling_nd_adims(t, dims); + assert(memcmp(dims, expected_dims_row, 3*sizeof(size_t)) == 0); + + for(size_t i = 0; i < 9; i++) + for(size_t j = 0; j < 10; j++) + for(size_t k = 0; k < 8; k++, l++) + memoryres[i][j][k] = 0.0; + + for(size_t i = 0; i < expected_dims_col[2]; i++) + for(size_t j = 0; j < expected_dims_col[1]; j++) + for(size_t k = 0; k < expected_dims_col[0]; k++) { + struct aml_layout *b, *bres; + b = aml_tiling_nd_index(t, i, j, k); + bres = aml_tiling_nd_index(tres, i, j, k); + aml_copy_layout_generic(bres, b); + free(b); + free(bres); + } + for(size_t i = 0; i < 9; i++) + for(size_t j = 0; j < 10; j++) + for(size_t k = 0; k < 8; k++, l++) + if( k >= 7 || i >= 8) + assert(memoryres[i][j][k] == 0xdeadbeef); + else + assert(memoryres[i][j][k] == memory[i][j][k]); free(a); + free(ares); free(t); + free(tres); } @@ -209,6 +465,8 @@ int main(int argc, char *argv[]) test_tiling_even(); test_tiling_uneven(); + test_tiling_pad_even(); + test_tiling_pad_uneven(); return 0; } From ea73d4fe8651157eb993c6f2386ee4025a9cb9a9 Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Tue, 29 Jan 2019 15:45:03 -0600 Subject: [PATCH 42/47] Exposed layout column api. --- src/aml-layout.h | 7 +++++++ src/layout.c | 26 ++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/src/aml-layout.h b/src/aml-layout.h index 21a647c3..44327472 100644 --- a/src/aml-layout.h +++ b/src/aml-layout.h @@ -62,9 +62,12 @@ struct aml_layout { void *aml_layout_deref(const struct aml_layout *l, ...); void *aml_layout_aderef(const struct aml_layout *l, const size_t *coords); +void *aml_layout_aderef_column(const struct aml_layout *l, + const size_t *coords); int aml_layout_order(const struct aml_layout *l); int aml_layout_dims(const struct aml_layout *l, ...); int aml_layout_adims(const struct aml_layout *l, size_t *dims); +int aml_layout_adims_column(const struct aml_layout *l, size_t *dims); size_t aml_layout_ndims(const struct aml_layout *l); size_t aml_layout_element_size(const struct aml_layout *l); struct aml_layout * aml_layout_areshape(const struct aml_layout *l, @@ -75,5 +78,9 @@ struct aml_layout * aml_layout_slice(const struct aml_layout *l, ...); struct aml_layout * aml_layout_aslice(const struct aml_layout *l, const size_t *offsets, const size_t *dims, const size_t *strides); +struct aml_layout * aml_layout_aslice_column(const struct aml_layout *l, + const size_t *offsets, + const size_t *dims, + const size_t *strides); #endif diff --git a/src/layout.c b/src/layout.c index 635b4759..0d550fa2 100644 --- a/src/layout.c +++ b/src/layout.c @@ -23,6 +23,14 @@ void *aml_layout_aderef(const struct aml_layout *layout, const size_t *coords) return layout->ops->aderef(layout->data, coords); } +void *aml_layout_aderef_column(const struct aml_layout *layout, + const size_t *coords) +{ + assert(layout != NULL); + assert(layout->ops != NULL); + return layout->ops->aderef_column(layout->data, coords); +} + int aml_layout_order(const struct aml_layout *layout) { assert(layout != NULL); @@ -49,6 +57,13 @@ int aml_layout_adims(const struct aml_layout *layout, size_t *dims) return layout->ops->adims(layout->data, dims); } +int aml_layout_adims_column(const struct aml_layout *layout, size_t *dims) +{ + assert(layout != NULL); + assert(layout->ops != NULL); + return layout->ops->adims_column(layout->data, dims); +} + size_t aml_layout_ndims(const struct aml_layout *layout) { assert(layout != NULL); @@ -110,3 +125,14 @@ struct aml_layout * aml_layout_aslice(const struct aml_layout *layout, assert(layout->ops->aslice != NULL); return layout->ops->aslice(layout->data, offsets, dims, strides); } + +struct aml_layout * aml_layout_aslice_column(const struct aml_layout *layout, + const size_t *offsets, + const size_t *dims, + const size_t *strides) +{ + assert(layout != NULL); + assert(layout->ops != NULL); + assert(layout->ops->aslice != NULL); + return layout->ops->aslice_column(layout->data, offsets, dims, strides); +} From dccfebef7b7a84a9d8329479fdfc64b71d1fa2f4 Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Tue, 29 Jan 2019 15:46:01 -0600 Subject: [PATCH 43/47] Use column api for copy operators and better checks of compatibility. --- src/copy.c | 24 ++++++++++++++++++------ src/copy.rb | 11 +++++++++-- 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/src/copy.c b/src/copy.c index 4fcd4d0d..0f3f37bf 100644 --- a/src/copy.c +++ b/src/copy.c @@ -535,8 +535,9 @@ static inline void aml_copy_layout_generic_helper(size_t d, for (size_t i = 0; i < elem_number[0]; i += 1) { coords[0] = i; coords[0] = i; - memcpy(aml_layout_aderef(dst, coords), - aml_layout_aderef(src, coords), elem_size); + memcpy(aml_layout_aderef_column(dst, coords), + aml_layout_aderef_column(src, coords), + elem_size); } else for (size_t i = 0; i < elem_number[d - 1]; i += 1) { coords[d - 1] = i; @@ -564,8 +565,9 @@ static inline void aml_copy_layout_transform_generic_helper(size_t d, for (size_t i = 0; i < elem_number[target_dims[0]]; i += 1) { coords_out[0] = i; coords[target_dims[0]] = i; - memcpy(aml_layout_aderef(dst, coords_out), - aml_layout_aderef(src, coords), elem_size); + memcpy(aml_layout_aderef_column(dst, coords_out), + aml_layout_aderef_column(src, coords), + elem_size); } else for (size_t i = 0; i < elem_number[target_dims[d - 1]]; i += 1) { coords_out[d - 1] = i; @@ -587,13 +589,18 @@ int aml_copy_layout_generic(struct aml_layout *dst, size_t elem_size; size_t *coords; size_t *elem_number; + size_t *elem_number2; assert(aml_layout_ndims(dst) == aml_layout_ndims(src)); d = aml_layout_ndims(dst); assert(aml_layout_element_size(dst) == aml_layout_element_size(src)); elem_size = aml_layout_element_size(dst); coords = (size_t *) alloca(d * sizeof(size_t)); elem_number = (size_t *) alloca(d * sizeof(size_t)); - aml_layout_adims(src, elem_number); + elem_number2 = (size_t *) alloca(d * sizeof(size_t)); + aml_layout_adims_column(src, elem_number); + aml_layout_adims_column(dst, elem_number2); + for (size_t i = 0; i < d; i += 1) + assert(elem_number[i] == elem_number2[i]); aml_copy_layout_generic_helper(d, dst, src, elem_number, elem_size, coords); return 0; @@ -608,6 +615,7 @@ int aml_copy_layout_transform_generic(struct aml_layout *dst, size_t *coords; size_t *coords_out; size_t *elem_number; + size_t *elem_number2; assert(aml_layout_ndims(dst) == aml_layout_ndims(src)); d = aml_layout_ndims(dst); assert(aml_layout_element_size(dst) == aml_layout_element_size(src)); @@ -615,7 +623,11 @@ int aml_copy_layout_transform_generic(struct aml_layout *dst, coords = (size_t *) alloca(d * sizeof(size_t)); coords_out = (size_t *) alloca(d * sizeof(size_t)); elem_number = (size_t *) alloca(d * sizeof(size_t)); - aml_layout_adims(src, elem_number); + elem_number2 = (size_t *) alloca(d * sizeof(size_t)); + aml_layout_adims_column(src, elem_number); + aml_layout_adims_column(dst, elem_number2); + for (size_t i = 0; i < d; i += 1) + assert(elem_number[target_dims[i]] == elem_number2[i]); aml_copy_layout_transform_generic_helper(d, dst, src, elem_number, elem_size, coords, coords_out, target_dims); diff --git a/src/copy.rb b/src/copy.rb index e38e1b8c..e276ed54 100644 --- a/src/copy.rb +++ b/src/copy.rb @@ -399,7 +399,7 @@ def aml_copy_layout_generic_helper(shuffle: false) pr For( i, 0, elem_number[elem_index[0]], operator: '<', declit: true ) { pr coord_dst[dst_index[0]] === i pr coord_src[src_index[0]] === i - pr memcpy( FuncCall(:aml_layout_aderef, dst, coord_dst), FuncCall(:aml_layout_aderef, src, coord_src), elem_size ) + pr memcpy( FuncCall(:aml_layout_aderef_column, dst, coord_dst), FuncCall(:aml_layout_aderef_column, src, coord_src), elem_size ) } }, else: lambda { pr For( i, 0, elem_number[elem_index[d - 1]], operator: '<', declit: true ) { @@ -465,9 +465,11 @@ def aml_copy_layout(native: true, shuffle: false) coords = Sizet :coords, dim: Dim() coords_out = Sizet :coords_out, dim: Dim() elem_number = Sizet :elem_number, dim: Dim() + elem_number2 = Sizet :elem_number2, dim: Dim() decl coords decl coords_out if shuffle decl elem_number + decl elem_number2 pr assert( FuncCall( :aml_layout_ndims, dst ) == FuncCall( :aml_layout_ndims, src ) ) pr d === FuncCall( :aml_layout_ndims, dst ) @@ -476,7 +478,12 @@ def aml_copy_layout(native: true, shuffle: false) pr coords === alloca(d * sizeof("size_t")).cast(coords) pr coords_out === alloca(d * sizeof("size_t")).cast(coords_out) if shuffle pr elem_number === alloca(d * sizeof("size_t")).cast(elem_number) - pr FuncCall( :aml_layout_adims, src, elem_number ) + pr elem_number2 === alloca(d * sizeof("size_t")).cast(elem_number2) + pr FuncCall( :aml_layout_adims_column, src, elem_number ) + pr FuncCall( :aml_layout_adims_column, dst, elem_number2 ) + pr For(i, 0, d, operator: '<', declit: true) { + pr assert( "#{elem_number}[#{src_index[i]}] == #{elem_number2}[#{dst_index[i]}]" ) + } new_args = [d, dst, src, elem_number, elem_size, coords] new_args << coords_out << target_dims if shuffle From ff26f8c25e704ed33a87f7f24f33c8ed4070aee2 Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Tue, 29 Jan 2019 15:52:27 -0600 Subject: [PATCH 44/47] Test mixing row and column layout/tiling/copy. --- tests/tiling_nd.c | 94 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/tests/tiling_nd.c b/tests/tiling_nd.c index de3a8605..614da70c 100644 --- a/tests/tiling_nd.c +++ b/tests/tiling_nd.c @@ -1,6 +1,99 @@ #include #include +void test_tiling_even_mixed(void) +{ + int memory[9][10][8]; + int memoryres[9][10][8]; + size_t dims_col[3] = {8, 10, 9}; + size_t dims_row[3] = {9, 10, 8}; + + size_t stride[3] = {1, 1, 1}; + + size_t dims_tile_col[3] = {4, 10, 3}; + size_t dims_tile_row[3] = {3, 10, 4}; + + size_t expected_dims_col[3] = {2, 1, 3}; + size_t expected_dims_row[3] = {3, 1, 2}; + + int l = 0; + for(size_t i = 0; i < 9; i++) + for(size_t j = 0; j < 10; j++) + for(size_t k = 0; k < 8; k++, l++) { + memory[i][j][k] = l; + memoryres[i][j][k] = 0.0; + } + + struct aml_layout *a, *ares; + aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)memory, sizeof(int), 3, dims_col, + stride, dims_col); + aml_layout_native_acreate(&ares, AML_TYPE_LAYOUT_ROW_ORDER, + (void *)memoryres, sizeof(int), 3, dims_row, + stride, dims_row); + + + struct aml_tiling_nd *t, *tres; + aml_tiling_nd_resize_acreate(&t, AML_TYPE_TILING_COLUMN_ORDER, + a, 3, dims_tile_col); + aml_tiling_nd_resize_acreate(&tres, AML_TYPE_TILING_ROW_ORDER, + ares, 3, dims_tile_row); + + + for(size_t i = 0; i < expected_dims_col[2]; i++) + for(size_t j = 0; j < expected_dims_col[1]; j++) + for(size_t k = 0; k < expected_dims_col[0]; k++) { + struct aml_layout *b, *bres; + b = aml_tiling_nd_index(t, k, j, i); + bres = aml_tiling_nd_index(tres, i, j, k); + aml_copy_layout_generic(bres, b); + free(b); + free(bres); + } + assert(memcmp(memory, memoryres, 8 * 10 * 9 *sizeof(int)) == 0); + + free(a); + free(ares); + free(t); + free(tres); + + aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)memory, sizeof(int), 3, dims_col, + stride, dims_col); + aml_layout_native_acreate(&ares, AML_TYPE_LAYOUT_ROW_ORDER, + (void *)memoryres, sizeof(int), 3, dims_row, + stride, dims_row); + + + aml_tiling_nd_resize_acreate(&t, AML_TYPE_TILING_ROW_ORDER, + a, 3, dims_tile_row); + aml_tiling_nd_resize_acreate(&tres, AML_TYPE_TILING_COLUMN_ORDER, + ares, 3, dims_tile_col); + + for(size_t i = 0; i < 9; i++) + for(size_t j = 0; j < 10; j++) + for(size_t k = 0; k < 8; k++, l++) + memoryres[i][j][k] = 0.0; + + for(size_t i = 0; i < expected_dims_col[2]; i++) + for(size_t j = 0; j < expected_dims_col[1]; j++) + for(size_t k = 0; k < expected_dims_col[0]; k++) { + struct aml_layout *b, *bres; + b = aml_tiling_nd_index(t, i, j, k); + bres = aml_tiling_nd_index(tres, k, j, i); + aml_copy_layout_generic(bres, b); + free(b); + free(bres); + } + assert(memcmp(memory, memoryres, 8 * 10 * 9 *sizeof(int)) == 0); + + free(a); + free(ares); + free(t); + free(tres); + +} + void test_tiling_even(void) { int memory[9][10][8]; @@ -465,6 +558,7 @@ int main(int argc, char *argv[]) test_tiling_even(); test_tiling_uneven(); + test_tiling_even_mixed(); test_tiling_pad_even(); test_tiling_pad_uneven(); From 0a4a5bf374b19fc0cb34ad077036a7b56b011559 Mon Sep 17 00:00:00 2001 From: Swann Perarnau Date: Wed, 30 Jan 2019 17:25:37 -0600 Subject: [PATCH 45/47] [feature] double dma/layout scratch WIP: - missing key management to figure out which tile to retrieve in scratch - missing tests Introduce a NOOP request type too, to deal with operations that don't trigger movements. --- src/Makefile.am | 3 +- src/aml-scratch-double.h | 56 ++++++++ src/aml.h | 3 + src/scratch_double.c | 287 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 348 insertions(+), 1 deletion(-) create mode 100644 src/aml-scratch-double.h create mode 100644 src/scratch_double.c diff --git a/src/Makefile.am b/src/Makefile.am index 099f9b47..cdcded08 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -34,7 +34,8 @@ DMA_CSOURCES = dma.c \ SCRATCH_CSOURCES = scratch.c \ scratch_seq.c \ - scratch_par.c + scratch_par.c \ + scratch_double.c UTILS_CSOURCES = vector.c diff --git a/src/aml-scratch-double.h b/src/aml-scratch-double.h new file mode 100644 index 00000000..791dcecf --- /dev/null +++ b/src/aml-scratch-double.h @@ -0,0 +1,56 @@ +#ifndef AML_SCRATCH_DOUBLE_H +#define AML_SCRATCH_DOUBLE_H 1 + +/******************************************************************************* + * Sequential scratchpad API: + * Scratchpad uses calling thread to trigger asynchronous dma movements. + ******************************************************************************/ + +extern struct aml_scratch_ops aml_scratch_double_ops; + +struct aml_scratch_request_double { + int type; + struct aml_dma *dma; + struct aml_layout *src; + int srcid; + struct aml_layout *dest; + int dstid; + pthread_t thread; +}; + +struct aml_scratch_double_data { + struct aml_tiling_nd *src_tiling; + struct aml_tiling_nd *dest_tiling; + struct aml_dma *push_dma; + struct aml_dma *pull_dma; + struct aml_vector tilemap; + struct aml_vector requests; + pthread_mutex_t lock; +}; + +struct aml_scratch_double_ops { + void *(*do_thread)(void *); +}; + +struct aml_scratch_double { + struct aml_scratch_double_ops ops; + struct aml_scratch_double_data data; +}; + +#define AML_SCRATCH_DOUBLE_DECL(name) \ + struct aml_scratch_double __ ##name## _inner_data; \ + struct aml_scratch name = { \ + &aml_scratch_double_ops, \ + (struct aml_scratch_data *)&__ ## name ## _inner_data, \ + }; + +#define AML_SCRATCH_DOUBLE_ALLOCSIZE \ + (sizeof(struct aml_scratch_double) + \ + sizeof(struct aml_scratch)) + +int aml_scratch_double_create(struct aml_scratch **scratch, ...); +int aml_scratch_double_init(struct aml_scratch *scratch, ...); +int aml_scratch_double_vinit(struct aml_scratch *scratch, va_list args); +int aml_scratch_double_destroy(struct aml_scratch *scratch); + +#endif diff --git a/src/aml.h b/src/aml.h index 32b2230b..1cedde4f 100644 --- a/src/aml.h +++ b/src/aml.h @@ -1492,6 +1492,8 @@ struct aml_scratch_data; #define AML_SCRATCH_REQUEST_TYPE_PUSH 0 /* Pull from regular memory to the scratchpad. */ #define AML_SCRATCH_REQUEST_TYPE_PULL 1 +/* No-op/empty request */ +#define AML_SCRATCH_REQUEST_TYPE_NOOP 2 struct aml_scratch_ops { int (*create_request)(struct aml_scratch_data *scratch, @@ -1593,6 +1595,7 @@ void* aml_scratch_baseptr(const struct aml_scratch *scratch); */ int aml_scratch_release(struct aml_scratch *scratch, int scratchid); +#include "aml-scratch-double.h" /******************************************************************************* * Sequential scratchpad API: * Scratchpad uses calling thread to trigger asynchronous dma movements. diff --git a/src/scratch_double.c b/src/scratch_double.c new file mode 100644 index 00000000..8806e7cf --- /dev/null +++ b/src/scratch_double.c @@ -0,0 +1,287 @@ +#include +#include + +/******************************************************************************* + * Requests: + ******************************************************************************/ + +int aml_scratch_request_double_init(struct aml_scratch_request_double *req, + int type, struct aml_dma *dma, + struct aml_layout *dl, int dstid, + struct aml_layout *sl, int srcid) + +{ + assert(req != NULL); + req->type = type; + req->dma = dma; + req->dest = dl; + req->dstid = dstid; + req->src = sl; + req->srcid = srcid; + return 0; +} + +int aml_scratch_request_double_destroy(struct aml_scratch_request_double *r) +{ + assert(r != NULL); + return 0; +} + +/******************************************************************************* + * Internal functions + ******************************************************************************/ +void *aml_scratch_double_do_thread(void *arg) +{ + struct aml_scratch_request_double *req = + (struct aml_scratch_request_double *)arg; + + aml_dma_copy(req->dma, req->dest, req->src); +} + +struct aml_scratch_double_ops aml_scratch_double_inner_ops = { + aml_scratch_double_do_thread, +}; + +/******************************************************************************* + * Public API + ******************************************************************************/ + +int aml_scratch_double_create_request(struct aml_scratch_data *d, + struct aml_scratch_request **r, + int type, va_list ap) +{ + assert(d != NULL); + assert(r != NULL); + struct aml_scratch_double *scratch = + (struct aml_scratch_double *)d; + + struct aml_scratch_request_double *req; + + pthread_mutex_lock(&scratch->data.lock); + req = aml_vector_add(&scratch->data.requests); + /* init the request */ + if(type == AML_SCRATCH_REQUEST_TYPE_PUSH) + { + struct aml_layout *scratch_layout; + struct aml_layout *src_layout; + int *src_uid; + int scratch_uid; + + src_layout = va_arg(ap, struct aml_layout *); + src_uid = va_arg(ap, int *); + scratch_layout = va_arg(ap, struct aml_layout *); + scratch_uid = va_arg(ap, int); + + /* find destination tile */ + int *slot = aml_vector_get(&scratch->data.tilemap, scratch_uid); + assert(slot != NULL); + *src_uid = *slot; + + /* init request */ + aml_scratch_request_double_init(req, type, + scratch->data.push_dma, + src_layout, *src_uid, + scratch_layout, scratch_uid); + } + else if(type == AML_SCRATCH_REQUEST_TYPE_PULL) + { + struct aml_layout **scratch_layout; + struct aml_layout *src_layout; + int *scratch_uid; + int src_uid; + + scratch_layout = va_arg(ap, struct aml_layout **); + scratch_uid = va_arg(ap, int *); + src_layout = va_arg(ap, struct aml_layout *); + src_uid = va_arg(ap, int); + + /* find scratchination tile + * We don't use add here because adding a tile means allocating + * new tiles on the sch_area too. */ + int slot = aml_vector_find(&scratch->data.tilemap, src_uid); + if(slot == -1) + { + /* create a new request */ + slot = aml_vector_find(&scratch->data.tilemap, -1); + assert(slot != -1); + int *tile = aml_vector_get(&scratch->data.tilemap, slot); + *tile = src_uid; + } + else + type = AML_SCRATCH_REQUEST_TYPE_NOOP; + + /* save the key */ + *scratch_uid = slot; + // *scratch_layout = aml_tiling_nd_get(scratch->data.scratch_tiling) + + /* init request */ + aml_scratch_request_double_init(req, type, + scratch->data.pull_dma, + *scratch_layout, slot, + src_layout, src_uid); + } + pthread_mutex_unlock(&scratch->data.lock); + /* thread creation */ + if(req->type != AML_SCRATCH_REQUEST_TYPE_NOOP) + { + pthread_create(&req->thread, NULL, scratch->ops.do_thread, req); + } + *r = (struct aml_scratch_request *)req; + return 0; +} + +int aml_scratch_double_destroy_request(struct aml_scratch_data *d, + struct aml_scratch_request *r) +{ + assert(d != NULL); + assert(r != NULL); + struct aml_scratch_double *scratch = + (struct aml_scratch_double *)d; + + struct aml_scratch_request_double *req = + (struct aml_scratch_request_double *)r; + int *tile; + + if(req->type != AML_SCRATCH_REQUEST_TYPE_NOOP) + { + pthread_cancel(req->thread); + pthread_join(req->thread, NULL); + } + + aml_scratch_request_double_destroy(req); + + /* destroy removes the tile from the scratch */ + pthread_mutex_lock(&scratch->data.lock); + if(req->type == AML_SCRATCH_REQUEST_TYPE_PUSH) + tile = aml_vector_get(&scratch->data.tilemap,req->srcid); + else if(req->type == AML_SCRATCH_REQUEST_TYPE_PULL) + tile = aml_vector_get(&scratch->data.tilemap,req->dstid); + aml_vector_remove(&scratch->data.tilemap, tile); + aml_vector_remove(&scratch->data.requests, req); + pthread_mutex_unlock(&scratch->data.lock); + return 0; +} + +int aml_scratch_double_wait_request(struct aml_scratch_data *d, + struct aml_scratch_request *r) +{ + assert(d != NULL); + assert(r != NULL); + struct aml_scratch_double *scratch = (struct aml_scratch_double *)d; + struct aml_scratch_request_double *req = + (struct aml_scratch_request_double *)r; + int *tile; + + /* wait for completion of the request */ + if(req->type != AML_SCRATCH_REQUEST_TYPE_NOOP) + pthread_join(req->thread, NULL); + + /* cleanup a completed request. In case of push, free up the tile */ + aml_scratch_request_double_destroy(req); + pthread_mutex_lock(&scratch->data.lock); + if(req->type == AML_SCRATCH_REQUEST_TYPE_PUSH) + { + tile = aml_vector_get(&scratch->data.tilemap,req->srcid); + aml_vector_remove(&scratch->data.tilemap, tile); + } + aml_vector_remove(&scratch->data.requests, req); + pthread_mutex_unlock(&scratch->data.lock); + return 0; +} + +void *aml_scratch_double_baseptr(const struct aml_scratch_data *d) +{ + assert(d != NULL); + // don't think this function makes sense for this implementation. + return NULL; +} + +int aml_scratch_double_release(struct aml_scratch_data *d, int scratchid) +{ + assert(d != NULL); + struct aml_scratch_double *scratch = (struct aml_scratch_double *)d; + int *tile; + + pthread_mutex_lock(&scratch->data.lock); + tile = aml_vector_get(&scratch->data.tilemap, scratchid); + if(tile != NULL) + aml_vector_remove(&scratch->data.tilemap, tile); + pthread_mutex_unlock(&scratch->data.lock); + return 0; +} + +struct aml_scratch_ops aml_scratch_double_ops = { + aml_scratch_double_create_request, + aml_scratch_double_destroy_request, + aml_scratch_double_wait_request, + aml_scratch_double_baseptr, + aml_scratch_double_release, +}; + +/******************************************************************************* + * Init functions: + ******************************************************************************/ + +int aml_scratch_double_create(struct aml_scratch **d, ...) +{ + va_list ap; + struct aml_scratch *ret = NULL; + intptr_t baseptr, dataptr; + va_start(ap, d); + + /* alloc */ + baseptr = (intptr_t) calloc(1, AML_SCRATCH_DOUBLE_ALLOCSIZE); + dataptr = baseptr + sizeof(struct aml_scratch); + + ret = (struct aml_scratch *)baseptr; + ret->data = (struct aml_scratch_data *)dataptr; + + aml_scratch_double_vinit(ret, ap); + + va_end(ap); + *d = ret; + return 0; +} +int aml_scratch_double_vinit(struct aml_scratch *d, va_list ap) +{ + d->ops = &aml_scratch_double_ops; + struct aml_scratch_double *scratch = (struct aml_scratch_double *)d->data; + + scratch->ops = aml_scratch_double_inner_ops; + + scratch->data.dest_tiling = va_arg(ap, struct aml_tiling_nd *); + scratch->data.src_tiling = va_arg(ap, struct aml_tiling_nd *); + scratch->data.push_dma = va_arg(ap, struct aml_dma *); + scratch->data.pull_dma = va_arg(ap, struct aml_dma *); + size_t nbtiles = va_arg(ap, size_t); + size_t nbreqs = va_arg(ap, size_t); + + /* allocate request array */ + aml_vector_init(&scratch->data.requests, nbreqs, + sizeof(struct aml_scratch_request_double), + offsetof(struct aml_scratch_request_double, type), + AML_SCRATCH_REQUEST_TYPE_INVALID); + + /* scratch init */ + aml_vector_init(&scratch->data.tilemap, nbtiles, sizeof(int), 0, -1); + pthread_mutex_init(&scratch->data.lock, NULL); + return 0; +} +int aml_scratch_double_init(struct aml_scratch *d, ...) +{ + int err; + va_list ap; + va_start(ap, d); + err = aml_scratch_double_vinit(d, ap); + va_end(ap); + return err; +} + +int aml_scratch_double_destroy(struct aml_scratch *d) +{ + struct aml_scratch_double *scratch = (struct aml_scratch_double *)d->data; + aml_vector_destroy(&scratch->data.requests); + aml_vector_destroy(&scratch->data.tilemap); + pthread_mutex_destroy(&scratch->data.lock); + return 0; +} From a7b9a1049768e4797fb1b22cf17e3bb896434776 Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Mon, 4 Feb 2019 15:22:05 -0600 Subject: [PATCH 46/47] Added tiling collapsing unused dimensions. --- src/Makefile.am | 4 +- src/aml-tiling-collapse.h | 40 ++++ src/aml.h | 1 + src/tiling_nd_collapse.c | 385 ++++++++++++++++++++++++++++++++++++++ tests/tiling_nd.c | 57 +++++- 5 files changed, 485 insertions(+), 2 deletions(-) create mode 100644 src/aml-tiling-collapse.h create mode 100644 src/tiling_nd_collapse.c diff --git a/src/Makefile.am b/src/Makefile.am index cdcded08..b945d5c0 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -21,7 +21,8 @@ TILING_CSOURCES = tiling.c \ TILING_ND_CSOURCES = tiling_nd.c \ tiling_nd_resize.c \ - tiling_nd_pad.c + tiling_nd_pad.c \ + tiling_nd_collapse.c BINDING_CSOURCES = binding.c \ binding_single.c \ @@ -60,6 +61,7 @@ LIBHSOURCES = aml.h \ aml-tiling.h \ aml-tiling-resize.h \ aml-tiling-pad.h \ + aml-tiling-collapse.h \ aml-copy.h libaml_la_SOURCES = $(LIBCSOURCES) $(LIBHSOURCES) diff --git a/src/aml-tiling-collapse.h b/src/aml-tiling-collapse.h new file mode 100644 index 00000000..348ea0b6 --- /dev/null +++ b/src/aml-tiling-collapse.h @@ -0,0 +1,40 @@ +#ifndef AML_TILING_COLLAPSE_H +#define AML_TILING_COLLAPSE_H + +#include + +struct aml_tiling_nd_data_collapse { + const struct aml_layout *l; + size_t ndims; + size_t *tile_dims; + size_t *dims; + size_t *border_tile_dims; +}; + +#define AML_TILING_COLLAPSE_ALLOCSIZE(ndims) (sizeof(struct aml_tiling_nd) +\ + sizeof(struct aml_tiling_nd_data_collapse) +\ + (ndims * 3) * sizeof(size_t)) + +int aml_tiling_nd_collapse_struct_init(struct aml_tiling_nd *t, size_t ndims, + void *data); +int aml_tiling_nd_collapse_ainit(struct aml_tiling_nd *t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + const size_t *tile_dims); +int aml_tiling_nd_collapse_vinit(struct aml_tiling_nd *t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + va_list data); +int aml_tiling_nd_collapse_init(struct aml_tiling_nd *t, uint64_t tags, + const struct aml_layout *l, size_t ndims, ...); +int aml_tiling_nd_collapse_acreate(struct aml_tiling_nd **t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + const size_t *tile_dims); +int aml_tiling_nd_collapse_vcreate(struct aml_tiling_nd **t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + va_list data); +int aml_tiling_nd_collapse_create(struct aml_tiling_nd **t, uint64_t tags, + const struct aml_layout *l, size_t ndims, ...); + +extern struct aml_tiling_nd_ops aml_tiling_nd_collapse_column_ops; +extern struct aml_tiling_nd_ops aml_tiling_nd_collapse_row_ops; + +#endif diff --git a/src/aml.h b/src/aml.h index 1cedde4f..8b976419 100644 --- a/src/aml.h +++ b/src/aml.h @@ -26,6 +26,7 @@ #include "aml-tiling.h" #include "aml-tiling-resize.h" #include "aml-tiling-pad.h" +#include "aml-tiling-collapse.h" #include "aml-copy.h" /******************************************************************************* * Forward Declarations: diff --git a/src/tiling_nd_collapse.c b/src/tiling_nd_collapse.c new file mode 100644 index 00000000..c33d5a90 --- /dev/null +++ b/src/tiling_nd_collapse.c @@ -0,0 +1,385 @@ +#include + +int aml_tiling_nd_collapse_struct_init(struct aml_tiling_nd *t, size_t ndims, + void *memory) +{ + struct aml_tiling_nd_data_collapse *dataptr; + + assert(t == (struct aml_tiling_nd *)memory); + memory = (void *)((uintptr_t)memory + + sizeof(struct aml_tiling_nd)); + dataptr = memory; + t->data = memory; + memory = (void *)((uintptr_t)memory + + sizeof(struct aml_tiling_nd_data_collapse)); + dataptr->l = NULL; + dataptr->ndims = ndims; + dataptr->tile_dims = (size_t *)memory; + dataptr->dims = dataptr->tile_dims + ndims; + dataptr->border_tile_dims = dataptr->dims + ndims; + return 0; +} + +int aml_tiling_nd_collapse_ainit(struct aml_tiling_nd *t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + const size_t *tile_dims) +{ + assert(t != NULL); + assert(t->data != NULL); + struct aml_tiling_nd_data_collapse *data = + (struct aml_tiling_nd_data_collapse *)t->data; + assert(data->ndims == ndims); + assert(data->tile_dims); + assert(data->dims); + assert(data->border_tile_dims); + data->l = l; + int type = AML_TYPE_GET(tags, AML_TYPE_TILING_ORDER); + if (type == AML_TYPE_TILING_ROW_ORDER) { + AML_TYPE_SET(t->tags, AML_TYPE_TILING_ORDER, + AML_TYPE_TILING_ROW_ORDER); + t->ops = &aml_tiling_nd_collapse_row_ops; + for (size_t i = 0; i < ndims; i++) + data->tile_dims[i] = tile_dims[ndims-i-1]; + } else { + AML_TYPE_SET(t->tags, AML_TYPE_TILING_ORDER, + AML_TYPE_TILING_COLUMN_ORDER); + t->ops = &aml_tiling_nd_collapse_column_ops; + for (size_t i = 0; i < ndims; i++) + data->tile_dims[i] = tile_dims[i]; + } + size_t target_dims[ndims]; + l->ops->adims_column(l->data, target_dims); + for (size_t i = 0; i < ndims; i++) { + data->border_tile_dims[i] = target_dims[i] % data->tile_dims[i]; + data->dims[i] = target_dims[i] / data->tile_dims[i]; + if (data->border_tile_dims[i] == 0) + data->border_tile_dims[i] = data->tile_dims[i]; + else + data->dims[i] += 1; + } + return 0; +} + +int aml_tiling_nd_collapse_vinit(struct aml_tiling_nd *t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + va_list data) +{ + size_t tile_dims[ndims]; + for(size_t i = 0; i < ndims; i++) + tile_dims[i] = va_arg(data, size_t); + return aml_tiling_nd_collapse_ainit(t, tags, l, ndims, tile_dims); +} + +int aml_tiling_nd_collapse_init(struct aml_tiling_nd *t, uint64_t tags, + const struct aml_layout *l, size_t ndims, ...) +{ + int err; + va_list ap; + va_start(ap, ndims); + err = aml_tiling_nd_collapse_vinit(t, tags, l, ndims, ap); + va_end(ap); + return err; +} + +int aml_tiling_nd_collapse_acreate(struct aml_tiling_nd **t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + const size_t *tile_dims) +{ + assert(ndims > 0); + void *baseptr = calloc(1, AML_TILING_COLLAPSE_ALLOCSIZE(ndims)); + *t = (struct aml_tiling_nd *)baseptr; + aml_tiling_nd_collapse_struct_init(*t, ndims, baseptr); + return aml_tiling_nd_collapse_ainit(*t, tags, l, ndims, tile_dims); +} + +int aml_tiling_nd_collapse_vcreate(struct aml_tiling_nd **t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + va_list data) +{ + assert(ndims > 0); + void *baseptr = calloc(1, AML_TILING_COLLAPSE_ALLOCSIZE(ndims)); + *t = (struct aml_tiling_nd *)baseptr; + aml_tiling_nd_collapse_struct_init(*t, ndims, baseptr); + return aml_tiling_nd_collapse_vinit(*t, tags, l, ndims, data); +} + +int aml_tiling_nd_collapse_create(struct aml_tiling_nd **t, uint64_t tags, + const struct aml_layout *l, size_t ndims, ...) +{ + int err; + va_list ap; + assert(ndims > 0); + void *baseptr = calloc(1, AML_TILING_COLLAPSE_ALLOCSIZE(ndims)); + *t = (struct aml_tiling_nd *)baseptr; + aml_tiling_nd_collapse_struct_init(*t, ndims, baseptr); + va_start(ap, ndims); + err = aml_tiling_nd_collapse_vinit(*t, tags, l, ndims, ap); + va_end(ap); + return err; +} + +/*----------------------------------------------------------------------------*/ + +struct aml_layout* +aml_tiling_nd_collapse_column_aindex(const struct aml_tiling_nd_data *l, + const size_t *coords) +{ + const struct aml_tiling_nd_data_collapse *d = + (const struct aml_tiling_nd_data_collapse *)l; + assert(d != NULL); + size_t ndims = d->ndims; + size_t new_coords[ndims]; + size_t offsets[ndims]; + size_t dims[ndims]; + size_t strides[ndims]; + for(size_t i = 0, j = 0; i < ndims; i++) + if (d->dims[i] > 1) { + assert(coords[j] < d->dims[i]); + new_coords[i] = coords[j]; + j++; + } else + new_coords[i] = 0; + for(size_t i = 0; i < ndims; i++) { + offsets[i] = new_coords[i] * d->tile_dims[i]; + strides[i] = 1; + } + for(size_t i = 0; i < ndims; i++) + dims[i] = (new_coords[i] == d->dims[i] - 1 ? + d->border_tile_dims[i] : + d->tile_dims[i] ); + return d->l->ops->aslice_column(d->l->data, offsets, dims, strides); +} + +struct aml_layout* +aml_tiling_nd_collapse_column_index(const struct aml_tiling_nd_data *l, + va_list coords) +{ + const struct aml_tiling_nd_data_collapse *d = + (const struct aml_tiling_nd_data_collapse *)l; + size_t n_coords[d->ndims]; + for(size_t i = 0, j = 0; i < d->ndims; i++) + if (d->dims[i] > 1) + n_coords[j++] = va_arg(coords, size_t); + return aml_tiling_nd_collapse_column_aindex(l, n_coords); +} + +int +aml_tiling_nd_collapse_column_order(const struct aml_tiling_nd_data * l) +{ + return AML_TYPE_TILING_COLUMN_ORDER; +} + +int +aml_tiling_nd_collapse_column_tile_dims(const struct aml_tiling_nd_data *l, + va_list dims_ptrs) +{ + const struct aml_tiling_nd_data_collapse *d = + (const struct aml_tiling_nd_data_collapse *)l; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) { + size_t *dim = va_arg(dims_ptrs, size_t*); + assert(dim != NULL); + *dim = d->tile_dims[i]; + } + return 0; +} + +int +aml_tiling_nd_collapse_column_tile_adims(const struct aml_tiling_nd_data *l, + size_t *tile_dims) +{ + const struct aml_tiling_nd_data_collapse *d = + (const struct aml_tiling_nd_data_collapse *)l; + assert(d != NULL); + memcpy((void*)tile_dims, (void*)d->tile_dims, sizeof(size_t)*d->ndims); + return 0; +} + +int +aml_tiling_nd_collapse_column_dims(const struct aml_tiling_nd_data *l, + va_list dims_ptrs) +{ + const struct aml_tiling_nd_data_collapse *d = + (const struct aml_tiling_nd_data_collapse *)l; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) { + if (d->dims[i] > 1) { + size_t *dim = va_arg(dims_ptrs, size_t*); + assert(dim != NULL); + *dim = d->dims[i]; + } + } + return 0; +} + +int +aml_tiling_nd_collapse_column_adims(const struct aml_tiling_nd_data *l, + size_t *dims) +{ + const struct aml_tiling_nd_data_collapse *d = + (const struct aml_tiling_nd_data_collapse *)l; + assert(d != NULL); + for(size_t i = 0, j = 0; i < d->ndims; i++) + if (d->dims[i] > 1) + dims[j++] = d->dims[i]; + return 0; +} + +size_t +aml_tiling_nd_collapse_column_ndims(const struct aml_tiling_nd_data *l) +{ + const struct aml_tiling_nd_data_collapse *d = + (const struct aml_tiling_nd_data_collapse *)l; + assert(d != NULL); + size_t ndims = 0; + for(size_t i = 0; i < d->ndims; i++) + if (d->dims[i] > 1) + ndims++; + return ndims; +} + +struct aml_tiling_nd_ops aml_tiling_nd_collapse_column_ops = { + aml_tiling_nd_collapse_column_index, + aml_tiling_nd_collapse_column_aindex, + aml_tiling_nd_collapse_column_order, + aml_tiling_nd_collapse_column_tile_dims, + aml_tiling_nd_collapse_column_tile_adims, + aml_tiling_nd_collapse_column_dims, + aml_tiling_nd_collapse_column_adims, + aml_tiling_nd_collapse_column_ndims +}; + +/*----------------------------------------------------------------------------*/ + +struct aml_layout* +aml_tiling_nd_collapse_row_aindex(const struct aml_tiling_nd_data *l, + const size_t *coords) +{ + const struct aml_tiling_nd_data_collapse *d = + (const struct aml_tiling_nd_data_collapse *)l; + assert(d != NULL); + size_t ndims = d->ndims; + size_t new_coords[ndims]; + size_t offsets[ndims]; + size_t dims[ndims]; + size_t strides[ndims]; + + for(size_t i = 0, j = 0; i < ndims; i++) + if (d->dims[ndims - i - 1] > 1) { + assert(coords[j] < d->dims[ndims - i - 1]); + new_coords[ndims - i - 1] = coords[j]; + j++; + } else + new_coords[ndims - i - 1] = 0; + for(size_t i = 0; i < ndims; i++) { + + offsets[i] = new_coords[i] * d->tile_dims[i]; + strides[i] = 1; + } + for(size_t i = 0; i < ndims; i++) + dims[i] = (new_coords[i] == d->dims[i] - 1 ? + d->border_tile_dims[i] : + d->tile_dims[i] ); + return d->l->ops->aslice_column(d->l->data, offsets, dims, strides); +} + +struct aml_layout* +aml_tiling_nd_collapse_row_index(const struct aml_tiling_nd_data *l, + va_list coords) +{ + const struct aml_tiling_nd_data_collapse *d = + (const struct aml_tiling_nd_data_collapse *)l; + size_t n_coords[d->ndims]; + for(size_t i = 0, j = 0; i < d->ndims; i++) + if (d->dims[i] > 1) + n_coords[j++] = va_arg(coords, size_t); + return aml_tiling_nd_collapse_row_aindex(l, n_coords); +} + +int +aml_tiling_nd_collapse_row_order(const struct aml_tiling_nd_data * l) +{ + return AML_TYPE_TILING_ROW_ORDER; +} + +int +aml_tiling_nd_collapse_row_tile_dims(const struct aml_tiling_nd_data *l, + va_list dims_ptrs) +{ + const struct aml_tiling_nd_data_collapse *d = + (const struct aml_tiling_nd_data_collapse *)l; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) { + size_t *dim = va_arg(dims_ptrs, size_t*); + assert(dim != NULL); + *dim = d->tile_dims[d->ndims - i - 1]; + } + return 0; +} + +int +aml_tiling_nd_collapse_row_tile_adims(const struct aml_tiling_nd_data *l, + size_t *tile_dims) +{ + const struct aml_tiling_nd_data_collapse *d = + (const struct aml_tiling_nd_data_collapse *)l; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) { + tile_dims[i] = d->tile_dims[d->ndims - i - 1]; + } + return 0; +} + +int +aml_tiling_nd_collapse_row_dims(const struct aml_tiling_nd_data *l, + va_list dims_ptrs) +{ + const struct aml_tiling_nd_data_collapse *d = + (const struct aml_tiling_nd_data_collapse *)l; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) { + if (d->dims[i] > 1) { + size_t *dim = va_arg(dims_ptrs, size_t*); + assert(dim != NULL); + *dim = d->dims[d->ndims - i - 1]; + } + } + return 0; +} + +int +aml_tiling_nd_collapse_row_adims(const struct aml_tiling_nd_data *l, + size_t *dims) +{ + const struct aml_tiling_nd_data_collapse *d = + (const struct aml_tiling_nd_data_collapse *)l; + assert(d != NULL); + for(size_t i = 0, j = 0; i < d->ndims; i++) + if (d->dims[i] > 1) + dims[j++] = d->dims[d->ndims - i - 1]; + return 0; +} + +size_t +aml_tiling_nd_collapse_row_ndims(const struct aml_tiling_nd_data *l) +{ + const struct aml_tiling_nd_data_collapse *d = + (const struct aml_tiling_nd_data_collapse *)l; + assert(d != NULL); + size_t ndims = 0; + for(size_t i = 0; i < d->ndims; i++) + if (d->dims[i] > 1) + ndims++; + return ndims; +} + +struct aml_tiling_nd_ops aml_tiling_nd_collapse_row_ops = { + aml_tiling_nd_collapse_row_index, + aml_tiling_nd_collapse_row_aindex, + aml_tiling_nd_collapse_row_order, + aml_tiling_nd_collapse_row_tile_dims, + aml_tiling_nd_collapse_row_tile_adims, + aml_tiling_nd_collapse_row_dims, + aml_tiling_nd_collapse_row_adims, + aml_tiling_nd_collapse_row_ndims +}; diff --git a/tests/tiling_nd.c b/tests/tiling_nd.c index 614da70c..e8f1d4d8 100644 --- a/tests/tiling_nd.c +++ b/tests/tiling_nd.c @@ -1,6 +1,60 @@ #include #include +void test_tiling_collapse(void) +{ + int memory[9][8][4][3]; + int memoryres[9][8][4][3]; + size_t dims_col[4] = {3, 4, 8, 9}; + size_t dims_row[4] = {9, 8, 4, 3}; + + size_t stride[4] = {1, 1, 1, 1}; + size_t dims_tile_col[4] = {1, 4, 1, 9}; + size_t dims_tile_row[4] = {9, 1, 4, 1}; + + size_t expected_dims_col[2] = {3, 8}; + size_t expected_dims_row[2] = {8, 3}; + + int n = 0; + for(size_t i = 0; i < 9; i++) + for(size_t k = 0; k < 8; k++) + for(size_t l = 0; l < 4; l++) + for(size_t m = 0; m < 3; m++, n++) { + memory[i][k][l][m] = n; + memoryres[i][k][l][m] = 0; + } + struct aml_layout *a, *ares; + aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)memory, sizeof(int), 4, dims_col, + stride, dims_col); + aml_layout_native_acreate(&ares, AML_TYPE_LAYOUT_ROW_ORDER, + (void *)memoryres, sizeof(int), 4, dims_row, + stride, dims_row); + + struct aml_tiling_nd *t, *tres; + aml_tiling_nd_collapse_acreate(&t, AML_TYPE_TILING_COLUMN_ORDER, + a, 4, dims_tile_col); + aml_tiling_nd_collapse_acreate(&tres, AML_TYPE_TILING_ROW_ORDER, + ares, 4, dims_tile_row); + + + for(size_t i = 0; i < expected_dims_col[1]; i++) + for(size_t j = 0; j < expected_dims_col[0]; j++) { + struct aml_layout *b, *bres; + b = aml_tiling_nd_index(t, j, i); + bres = aml_tiling_nd_index(tres, i, j); + aml_copy_layout_generic(bres, b); + free(b); + free(bres); + } + assert(memcmp(memory, memoryres, 8 * 9 * 4 * 3 * sizeof(int)) == 0); + + free(a); + free(ares); + free(t); + free(tres); +} + void test_tiling_even_mixed(void) { int memory[9][10][8]; @@ -21,7 +75,7 @@ void test_tiling_even_mixed(void) for(size_t j = 0; j < 10; j++) for(size_t k = 0; k < 8; k++, l++) { memory[i][j][k] = l; - memoryres[i][j][k] = 0.0; + memoryres[i][j][k] = 0; } struct aml_layout *a, *ares; @@ -561,6 +615,7 @@ int main(int argc, char *argv[]) test_tiling_even_mixed(); test_tiling_pad_even(); test_tiling_pad_uneven(); + test_tiling_collapse(); return 0; } From af6da1aa02ab1c2ad59cfd5f367cc6ad343a3a40 Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Mon, 4 Feb 2019 16:24:35 -0600 Subject: [PATCH 47/47] Bugfix and associated test. --- src/tiling_nd_collapse.c | 4 ++-- tests/tiling_nd.c | 9 +++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/tiling_nd_collapse.c b/src/tiling_nd_collapse.c index c33d5a90..d169f0c9 100644 --- a/src/tiling_nd_collapse.c +++ b/src/tiling_nd_collapse.c @@ -338,7 +338,7 @@ aml_tiling_nd_collapse_row_dims(const struct aml_tiling_nd_data *l, (const struct aml_tiling_nd_data_collapse *)l; assert(d != NULL); for(size_t i = 0; i < d->ndims; i++) { - if (d->dims[i] > 1) { + if (d->dims[d->ndims - i - 1] > 1) { size_t *dim = va_arg(dims_ptrs, size_t*); assert(dim != NULL); *dim = d->dims[d->ndims - i - 1]; @@ -355,7 +355,7 @@ aml_tiling_nd_collapse_row_adims(const struct aml_tiling_nd_data *l, (const struct aml_tiling_nd_data_collapse *)l; assert(d != NULL); for(size_t i = 0, j = 0; i < d->ndims; i++) - if (d->dims[i] > 1) + if (d->dims[d->ndims - i - 1] > 1) dims[j++] = d->dims[d->ndims - i - 1]; return 0; } diff --git a/tests/tiling_nd.c b/tests/tiling_nd.c index e8f1d4d8..6ad02e05 100644 --- a/tests/tiling_nd.c +++ b/tests/tiling_nd.c @@ -37,6 +37,15 @@ void test_tiling_collapse(void) aml_tiling_nd_collapse_acreate(&tres, AML_TYPE_TILING_ROW_ORDER, ares, 4, dims_tile_row); + assert(aml_tiling_nd_ndims(t) == 2); + assert(aml_tiling_nd_ndims(tres) == 2); + + size_t dims[2] = { 0, 0}; + aml_tiling_nd_adims(t, dims); + assert(memcmp(dims, expected_dims_col, 2 * sizeof(int)) == 0); + memset(dims, 0, 2 * sizeof(int)); + aml_tiling_nd_adims(tres, dims); + assert(memcmp(dims, expected_dims_row, 2 * sizeof(int)) == 0); for(size_t i = 0; i < expected_dims_col[1]; i++) for(size_t j = 0; j < expected_dims_col[0]; j++) {