diff --git a/dlib/cuda/tensor_tools.h b/dlib/cuda/tensor_tools.h index 13a98bf7f0..17649603d9 100644 --- a/dlib/cuda/tensor_tools.h +++ b/dlib/cuda/tensor_tools.h @@ -172,49 +172,49 @@ namespace dlib { namespace tt requires - dest does not alias the memory of lhs or rhs - The dimensions of lhs and rhs must be compatible for matrix multiplication. - The specific requirements depend on the mode: - - For CHANNEL_WISE mode (default): - - Let L == trans_lhs ? trans(mat(lhs)) : mat(lhs) - - Let R == trans_rhs ? trans(mat(rhs)) : mat(rhs) - - Let D == mat(dest) - - D.nr() == L.nr() && D.nc() == R.nc() - (i.e. dest must be preallocated and have the correct output dimensions) - - L.nc() == R.nr() - - For PLANE_WISE mode: - - lhs.num_samples() == rhs.num_samples() && lhs.k() == rhs.k() - - If !trans_lhs && !trans_rhs: - lhs.nc() == rhs.nr() - dest.nr() == lhs.nr() && dest.nc() == rhs.nc() - - If trans_lhs && !trans_rhs: - lhs.nr() == rhs.nr() - dest.nr() == lhs.nc() && dest.nc() == rhs.nc() - - If !trans_lhs && trans_rhs: - lhs.nc() == rhs.nc() - dest.nr() == lhs.nr() && dest.nc() == rhs.nr() - - If trans_lhs && trans_rhs: - lhs.nr() == rhs.nc() - dest.nr() == lhs.nc() && dest.nc() == rhs.nr() + The specific requirements depend on the mode: + + For CHANNEL_WISE mode (default): + - Let L == trans_lhs ? trans(mat(lhs)) : mat(lhs) + - Let R == trans_rhs ? trans(mat(rhs)) : mat(rhs) + - Let D == mat(dest) + - D.nr() == L.nr() && D.nc() == R.nc() + (i.e. dest must be preallocated and have the correct output dimensions) + - L.nc() == R.nr() + + For PLANE_WISE mode: + - lhs.num_samples() == rhs.num_samples() && lhs.k() == rhs.k() + - If !trans_lhs && !trans_rhs: + lhs.nc() == rhs.nr() + dest.nr() == lhs.nr() && dest.nc() == rhs.nc() + - If trans_lhs && !trans_rhs: + lhs.nr() == rhs.nr() + dest.nr() == lhs.nc() && dest.nc() == rhs.nc() + - If !trans_lhs && trans_rhs: + lhs.nc() == rhs.nc() + dest.nr() == lhs.nr() && dest.nc() == rhs.nr() + - If trans_lhs && trans_rhs: + lhs.nr() == rhs.nc() + dest.nr() == lhs.nc() && dest.nc() == rhs.nr() ensures - Performs matrix multiplication based on the specified mode: - For CHANNEL_WISE mode: - - performs: dest = alpha*L*R + beta*mat(dest) - Where L, R, and D are as defined above. - - For PLANE_WISE mode: - - Performs matrix multiplication for each corresponding 2D plane (nr x nc) - in lhs and rhs across all samples and channels. - - The operation is equivalent to performing the following for each sample - and channel: - dest[s][k] = alpha * (lhs[s][k] * rhs[s][k]) + beta * dest[s][k] - Where [s][k] represents the 2D plane for sample s and channel k. + For CHANNEL_WISE mode: + - performs: dest = alpha*L*R + beta*mat(dest) + where L, R, and D are as defined above. + + For PLANE_WISE mode: + - Performs matrix multiplication for each corresponding 2D plane (nr x nc) + in lhs and rhs across all samples and channels. + - The operation is equivalent to performing the following for each sample + and channel: + dest[s][k] = alpha * (lhs[s][k] * rhs[s][k]) + beta * dest[s][k] + where [s][k] represents the 2D plane for sample s and channel k. - Note that the PLANE_WISE mode is particularly useful for operations like attention - mechanisms in neural networks, where you want to perform matrix multiplications - on 2D planes of 4D tensors while preserving the sample and channel dimensions. + Note that the PLANE_WISE mode is particularly useful for operations like attention + mechanisms in neural networks, where you want to perform matrix multiplications + on 2D planes of 4D tensors while preserving the sample and channel dimensions. !*/ // ----------------------------------------------------------------------------------------