remove comments

rmjarvis · Nov 19, 2024 · a3726e6 · a3726e6
1 parent 2542ec6
commit a3726e6
Showing 1 changed file with 3 additions and 13 deletions.
diff --git a/src/basic_solver.cpp b/src/basic_solver.cpp
@@ -8,8 +8,9 @@
 
 namespace py = pybind11;
 
-// A custom deleter that finds the original address of the memory allocation directly
-// before the stored pointer and frees that using delete []
+// AlignedDeleted and allocateAlignedMemory
+// are borrowed from GalSim:
+// https://github.com/GalSim-developers/GalSim/blob/releases/2.6/src/Image.cpp#L127
 template <typename T>
 struct AlignedDeleter {
     void operator()(T* p) const { delete [] ((char**)p)[-1]; }
@@ -18,11 +19,6 @@ struct AlignedDeleter {
 template <typename T>
 std::shared_ptr<T> allocateAlignedMemory(int n)
 {
-    // This bit is based on the answers here:
-    // http://stackoverflow.com/questions/227897/how-to-allocate-aligned-memory-only-using-the-standard-library/227900
-    // The point of this is to get the _data pointer aligned to a 16 byte (128 bit) boundary.
-    // Arrays that are so aligned can use SSE operations and so can be much faster than
-    // non-aligned memroy.  FFTW in particular is faster if it gets aligned data.
     char* mem = new char[n * sizeof(T) + sizeof(char*) + 15];
     T* data = reinterpret_cast<T*>( (uintptr_t)(mem + sizeof(char*) + 15) & ~(size_t) 0x0F );
     ((char**)data)[-1] = mem;
@@ -45,12 +41,6 @@ auto _solve_direct_cpp_impl(
     std::vector<ssize_t> b_shape = b_buffer_info.shape;
     std::vector<ssize_t> A_shape = A_buffer_info.shape;
 
-    // Allocate and initialize container array for the A transpose A matrix.
-    // For some reason it is faster to allocate a numpy array and then map
-    // it to Eigen than allocating an Eigen array directly
-    // py::array_t<T, py::array::f_style> ATA(std::vector<ssize_t>({A_shape[1]*n, A_shape[1]*n}));
-    // py::buffer_info ATA_buffer = ATA.request();
-    // T __restrict * ATA_data = static_cast<T *>(ATA_buffer.ptr);
     auto ATA_data = allocateAlignedMemory<T>(A_shape[1]*N * A_shape[1]*N);
     // set it all to zero
     memset(ATA_data.get(), 0, A_shape[1]*n*A_shape[1]*n*sizeof(T));