diff --git a/Code_Exercises/Exercise_16_Coalesced_Global_Memory/README.md b/Code_Exercises/Exercise_16_Coalesced_Global_Memory/README.md index 3149ffc1..c6741814 100644 --- a/Code_Exercises/Exercise_16_Coalesced_Global_Memory/README.md +++ b/Code_Exercises/Exercise_16_Coalesced_Global_Memory/README.md @@ -17,8 +17,8 @@ global memory access patterns in your kernel are coalesced. Consider two alternative ways to linearize the global id: ``` -auto rowMajorLinearId = (idx[1] * width) + idx[0]; // row-major -auto columnMajorLinearId = (idx[0] * height) + idx[1]; // column-major +auto rowMajorLinearId = sycl::id(globalId[0], globalId[1]); +auto columnMajorLinearId = sycl::id(globalId[1], globalId[0]); ``` Try using both of these and compare the execution time of each. diff --git a/Code_Exercises/Exercise_16_Coalesced_Global_Memory/solution.cpp b/Code_Exercises/Exercise_16_Coalesced_Global_Memory/solution.cpp index 19d0addf..7907770a 100644 --- a/Code_Exercises/Exercise_16_Coalesced_Global_Memory/solution.cpp +++ b/Code_Exercises/Exercise_16_Coalesced_Global_Memory/solution.cpp @@ -79,6 +79,12 @@ TEST_CASE("image_convolution_coalesced", "coalesced_global_memory_solution") { ndRange, [=](sycl::nd_item<2> item) { auto globalId = item.get_global_id(); + auto rowMajorLinearId = sycl::id(globalId[0], globalId[1]); + auto columnMajorLinearId = sycl::id(globalId[1], globalId[0]); + + // Set row major or column major + globalId = rowMajorLinearId; + auto channelsStride = sycl::range(1, channels); auto haloOffset = sycl::id(halo, halo); auto src = (globalId + haloOffset) * channelsStride; diff --git a/Lesson_Materials/Lecture_04_Handling_Errors/Lecture_04_Handling_Errors.pdf b/Lesson_Materials/Lecture_04_Handling_Errors/Lecture_04_Handling_Errors.pdf index 7e1359cd..1fe5e655 100644 Binary files a/Lesson_Materials/Lecture_04_Handling_Errors/Lecture_04_Handling_Errors.pdf and b/Lesson_Materials/Lecture_04_Handling_Errors/Lecture_04_Handling_Errors.pdf differ diff --git a/Lesson_Materials/Lecture_04_Handling_Errors/index.html b/Lesson_Materials/Lecture_04_Handling_Errors/index.html index dae3e44e..22d46e81 100644 --- a/Lesson_Materials/Lecture_04_Handling_Errors/index.html +++ b/Lesson_Materials/Lecture_04_Handling_Errors/index.html @@ -91,7 +91,7 @@ /* Synchronous code */ - cgh.single_task<add>(bufO.get_range(), [=](id<1> i) { + cgh.parallel_for<add>(bufO.get_range(), [=](id<1> i) { /* Asynchronous code */ @@ -166,7 +166,7 @@ auto inB = accessor{bufB, cgh, read_only}; auto out = accessor{bufO, cgh, write_only}; - cgh.single_task<add>(bufO.get_range(), [=](id<1> i) { + cgh.parallel_for<add>(bufO.get_range(), [=](id<1> i) { out[i] = inA[i] + inB[i]; }); }).wait(); @@ -190,7 +190,6 @@ std::vector<float> dA{ 7, 5, 16, 8 }, dB{ 8, 16, 5, 7 }, dO{ 0, 0, 0, 0 }; try{ queue gpuQueue(gpu_selector{}, async_handler{}); - buffer bufA{dA}; buffer bufB{dB}; buffer bufO{dO}; @@ -200,7 +199,7 @@ auto inB = accessor{bufB, cgh, read_only}; auto out = accessor{bufO, cgh, write_only}; - cgh.single_task<add>(bufO.get_range(), [=](id<1> i) { + cgh.parallel_for<add>(bufO.get_range(), [=](id<1> i) { out[i] = inA[i] + inB[i]; }); }).wait(); @@ -213,8 +212,7 @@