From 805d708791368b2babe4b794ea73c229e1498953 Mon Sep 17 00:00:00 2001 From: Zihao Date: Mon, 19 Sep 2022 13:29:53 -0700 Subject: [PATCH] c++ hyb part test passed --- CMakeLists.txt | 6 +- python/tvm/sparse/__init__.py | 2 +- python/tvm/sparse/format_rewrite.py | 6 + src/sparse/format.cc | 119 +++++++++++++++--- tests/python/sparsetir/test_format_rewrite.py | 115 ++++++++++++++++- 5 files changed, 222 insertions(+), 26 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3b0fe713d..5e2d85db5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -277,7 +277,6 @@ tvm_file_glob(GLOB_RECURSE COMPILER_SRCS src/parser/*.cc src/printer/*.cc src/support/*.cc - src/sparse/*.cc ) tvm_file_glob(GLOB CODEGEN_SRCS @@ -287,6 +286,9 @@ tvm_file_glob(GLOB CODEGEN_SRCS list(APPEND COMPILER_SRCS ${CODEGEN_SRCS}) +tvm_file_glob(GLOB SPARSE_SRCS + src/sparse/*.cc +) tvm_file_glob(GLOB_RECURSE RELAY_OP_SRCS src/relay/op/*.cc ) @@ -310,6 +312,7 @@ list(APPEND COMPILER_SRCS ${RELAY_PASS_SRCS}) list(APPEND COMPILER_SRCS ${RELAY_BACKEND_SRCS}) list(APPEND COMPILER_SRCS ${RELAY_IR_SRCS}) list(APPEND COMPILER_SRCS ${RELAY_QNN_SRCS}) +list(APPEND COMPILER_SRCS ${SPARSE_SRCS}) tvm_file_glob(GLOB DATATYPE_SRCS src/target/datatype/*.cc) list(APPEND COMPILER_SRCS ${DATATYPE_SRCS}) @@ -320,6 +323,7 @@ tvm_file_glob(GLOB RUNTIME_SRCS src/runtime/vm/*.cc ) + if(BUILD_FOR_HEXAGON) if(NOT BUILD_STATIC_RUNTIME) # Allow undefined symbols (there will be some from libc). diff --git a/python/tvm/sparse/__init__.py b/python/tvm/sparse/__init__.py index 69a7c8c1d..754e07a37 100644 --- a/python/tvm/sparse/__init__.py +++ b/python/tvm/sparse/__init__.py @@ -18,5 +18,5 @@ """Python-interface for Sparse-TIR""" from .lower import lower_sparse_iter, lower_sparse_buffer -from .format_rewrite import FormatRewriteRule +from .format_rewrite import FormatRewriteRule, column_part_hyb from .specialize import specialize_buffer diff --git a/python/tvm/sparse/format_rewrite.py b/python/tvm/sparse/format_rewrite.py index 565fa0f3a..e90400e15 100644 --- a/python/tvm/sparse/format_rewrite.py +++ b/python/tvm/sparse/format_rewrite.py @@ -82,3 +82,9 @@ def __init__( idx_map, inv_idx_map, ) # type: ignore + + +def column_part_hyb(num_rows, num_cols, indptr_nd, indices_nd, num_col_parts, buckets): + return _ffi_api.ColumnPartHyb( + num_rows, num_cols, indptr_nd, indices_nd, num_col_parts, buckets # type: ignore + ) diff --git a/src/sparse/format.cc b/src/sparse/format.cc index 84b2702a7..496dc57a4 100644 --- a/src/sparse/format.cc +++ b/src/sparse/format.cc @@ -17,51 +17,130 @@ * under the License. */ - /*! * \file format.cc * \brief format conversion routine. */ -#include +#include #include +#include #include -#include -#include + namespace tvm { using runtime::NDArray; +Array>> ColumnPartHyb(int num_rows, int num_cols, NDArray indptr, + NDArray indices, int num_col_parts, + Array buckets) { + int partition_size = (num_cols + num_col_parts - 1) / num_col_parts; + int num_bkts = buckets.size(); + std::vector buckets_vec; + for (const Integer& bucket_size : buckets) { + buckets_vec.push_back(bucket_size->value); + } -Array>> ColumnPartHyb( - int num_rows, - int num_cols, - NDArray indptr, - NDArray indices, - int column_parts, - Array buckets -) { - Array> rst_row_indices; - Array> rst_col_indices; - int partition_size = (num_cols + column_parts - 1) / column_parts; - - assert(indptr->dtype.bits == 32); - assert(indices->dtype.bits == 32); + CHECK_EQ(indptr->dtype.bits, 32) << "Only support int32 index data type, got " + << int(indptr->dtype.bits) << " bits for indptr."; + CHECK_EQ(indices->dtype.bits, 32) << "Only support int32 index data type, got " + << int(indices->dtype.bits) << " bits for indices."; + CHECK_EQ(indptr->device.device_type, kDLCPU) << "Only support ColumnPartHyb conversion on CPU."; + CHECK_EQ(indices->device.device_type, kDLCPU) << "Only support ColumnPartHyb conversion on CPU."; int* indptr_data = static_cast(indptr->data); int* indices_data = static_cast(indices->data); + std::vector> degree_counter(num_col_parts); + for (int i = 0; i < num_rows; ++i) { + for (int j = indptr_data[i]; j < indptr_data[i + 1]; ++j) { + int row_id = i; + int col_id = indices_data[j]; + int part_id = col_id / partition_size; + degree_counter[part_id].insert(row_id); + } + } + + /* (num_parts, num_buckets, ...) */ + std::vector>> row_indices(num_col_parts); + std::vector>> col_indices(num_col_parts); + // init row_indices and col_indices + for (int part_id = 0; part_id < num_col_parts; ++part_id) { + for (int bucket_id = 0; bucket_id < num_bkts; ++bucket_id) { + row_indices[part_id].push_back(std::vector()); + col_indices[part_id].push_back(std::vector()); + } + } for (int i = 0; i < num_rows; ++i) { for (int j = indptr_data[i]; j < indptr_data[i + 1]; ++j) { int row_id = i; int col_id = indices_data[j]; int part_id = col_id / partition_size; + int degree = degree_counter[part_id].count(row_id); + int bucket_id = std::upper_bound(buckets_vec.begin(), buckets_vec.end(), degree - 1) - + buckets_vec.begin(); + if (bucket_id == num_bkts) { + bucket_id--; + } + int bucket_size = buckets_vec[bucket_id]; + bool create_new_bucket = false; + int remainder = col_indices[part_id][bucket_id].size() % bucket_size; + if (remainder != 0) { + if (row_id != row_indices[part_id][bucket_id].back()) { + // padding + for (int k = remainder; k < bucket_size; ++k) { + col_indices[part_id][bucket_id].push_back(0); + } + create_new_bucket = true; + } + } else { + create_new_bucket = true; + } + if (create_new_bucket) { + ICHECK(col_indices[part_id][bucket_id].size() % bucket_size == 0) << "Invalid padding"; + row_indices[part_id][bucket_id].push_back(row_id); + } + col_indices[part_id][bucket_id].push_back(col_id); } } - return {rst_row_indices, rst_col_indices}; + // final padding and conversion to NDArray + Array> row_indices_nd; + Array> col_indices_nd; + for (int part_id = 0; part_id < num_col_parts; ++part_id) { + Array row_indices_part_local; + Array col_indices_part_local; + for (int bucket_id = 0; bucket_id < num_bkts; ++bucket_id) { + int bucket_size = buckets_vec[bucket_id]; + // padding + int remainder = col_indices[part_id][bucket_id].size() % bucket_size; + if (remainder != 0) { + for (int k = remainder; k < bucket_size; ++k) { + col_indices[part_id][bucket_id].push_back(0); + } + } + // conversion to NDArray + int nnz = row_indices[part_id][bucket_id].size(); + ICHECK(int(col_indices[part_id][bucket_id].size()) == nnz * bucket_size) << "Padding error."; + NDArray row_indices_bucket_local = NDArray::Empty({nnz}, {kDLInt, 32, 1}, {kDLCPU, 0}); + NDArray col_indices_bucket_local = + NDArray::Empty({nnz, bucket_size}, {kDLInt, 32, 1}, {kDLCPU, 0}); + row_indices_bucket_local.CopyFromBytes(row_indices[part_id][bucket_id].data(), + nnz * sizeof(int)); + col_indices_bucket_local.CopyFromBytes(col_indices[part_id][bucket_id].data(), + nnz * bucket_size * sizeof(int)); + row_indices_part_local.push_back(row_indices_bucket_local); + col_indices_part_local.push_back(col_indices_bucket_local); + } + row_indices_nd.push_back(row_indices_part_local); + col_indices_nd.push_back(col_indices_part_local); + } + + // convert to NDArray + + return {row_indices_nd, col_indices_nd}; } namespace sparse { - TVM_REGISTER_GLOBAL("tir.sparse.ColumnPartHyb").set_body_typed(ColumnPartHyb); +TVM_REGISTER_GLOBAL("tir.sparse.ColumnPartHyb").set_body_typed(ColumnPartHyb); } // namespace sparse } // namespace tvm \ No newline at end of file diff --git a/tests/python/sparsetir/test_format_rewrite.py b/tests/python/sparsetir/test_format_rewrite.py index 6df95f10a..d432dd8e2 100644 --- a/tests/python/sparsetir/test_format_rewrite.py +++ b/tests/python/sparsetir/test_format_rewrite.py @@ -15,8 +15,11 @@ # specific language governing permissions and limitations # under the License. +from ast import copy_location import tvm -from tvm.sparse import FormatRewriteRule +import dgl +import numpy as np +from tvm.sparse import FormatRewriteRule, column_part_hyb from sparse_tir_scripts import csrmm from sparse_tir_format_rewrite_scripts import ( bsr, @@ -121,7 +124,111 @@ def test_csrmm_padding_rewrite(): tvm.ir.assert_structural_equal(mod["main"], padding_rewrite_with_preprocess, True) +def scipy_column_part_hyb(g, column_part, bucket_sizes): + mat = g.adj(transpose=True, scipy_fmt="csr") + buckets = bucket_sizes * column_part + m = mat.shape[0] + n = mat.shape[1] + nnz = mat.nnz + per_column_part_size = (n + column_part - 1) // column_part + sub_mats = [ + mat[:, i * per_column_part_size : (i + 1) * per_column_part_size] + for i in range(column_part) + ] + + num_buckets = len(buckets) + ell_n = [] + + for partition in range(column_part): + sub_mat = sub_mats[partition] + in_degrees = sub_mat.indptr[1:] - sub_mat.indptr[:-1] + for i, bucket_size in enumerate(bucket_sizes[:-1]): + last_bucket_size = 0 if i == 0 else bucket_sizes[i - 1] + ell_n.append(int(((in_degrees > last_bucket_size) & (in_degrees <= bucket_size)).sum())) + sub_indegrees = in_degrees[in_degrees > bucket_sizes[-2]] + ell_n.append(int(((sub_indegrees + bucket_sizes[-1] - 1) // bucket_sizes[-1]).sum())) + + ell_rows = [] + ell_indices = [] + + for partition in range(column_part): + sub_mat = sub_mats[partition] + in_degrees = sub_mat.indptr[1:] - sub_mat.indptr[:-1] + + for i, bucket_size in enumerate(bucket_sizes[:-1]): + last_bucket_size = 0 if i == 0 else bucket_sizes[i - 1] + ell_rows.append( + ((in_degrees > last_bucket_size) & (in_degrees <= bucket_size)).nonzero()[0] + ) + ell_rows.append((in_degrees > bucket_sizes[-2]).nonzero()[0]) + + for i, bucket_size in enumerate(bucket_sizes[:-1]): + indices = np.zeros( + (ell_n[partition * len(bucket_sizes) + i], bucket_size), dtype=np.int32 + ) + for j, row_id in enumerate(ell_rows[partition * len(bucket_sizes) + i]): + row = sub_mat[row_id] + indices[j, : row.nnz] = row.indices + partition * per_column_part_size + ell_indices.append(indices) + + # split rows for the last bucket + indices = np.zeros( + (ell_n[(partition + 1) * len(bucket_sizes) - 1], bucket_sizes[-1]), dtype=np.int32 + ) + new_rows = np.zeros((ell_n[(partition + 1) * len(bucket_sizes) - 1],), dtype=np.int32) + bucket_size = bucket_sizes[-1] + i = 0 + for row_id in ell_rows[-1]: + row = sub_mat[row_id] + for start_offset in range(0, row.nnz, bucket_size): + if start_offset + bucket_size >= row.nnz: + # last bucket + indices[i, : row.nnz - start_offset] = ( + row.indices[start_offset:] + partition * per_column_part_size + ) + else: + indices[i] = ( + row.indices[start_offset : start_offset + bucket_size] + + partition * per_column_part_size + ) + new_rows[i] = row_id + i += 1 + + ell_indices.append(indices) + ell_rows[-1] = new_rows + + return ell_rows, ell_indices + + +def test_column_part_hyb(): + g = dgl.rand_graph(1000, 10000).int() + column_parts = 4 + buckets = [1, 2, 4, 8] + indptr, indices, _ = g.adj_sparse("csc") + indptr_nd = tvm.nd.array(indptr.numpy(), device=tvm.cpu()) + indices_nd = tvm.nd.array(indices.numpy(), device=tvm.cpu()) + # built-in c++ funcion + row_indices, col_indices = column_part_hyb( + g.num_dst_nodes(), g.num_src_nodes(), indptr_nd, indices_nd, column_parts, buckets + ) + # compute indices with scipy + row_indices_scipy, col_indices_scipy = scipy_column_part_hyb(g, column_parts, buckets) + + for part_id in range(column_parts): + for bucket_id, _ in enumerate(buckets): + assert np.array_equal( + row_indices[part_id][bucket_id].numpy(), + row_indices_scipy[part_id * len(buckets) + bucket_id], + ) + assert np.array_equal( + col_indices[part_id][bucket_id].numpy(), + col_indices_scipy[part_id * len(buckets) + bucket_id], + ) + + if __name__ == "__main__": - test_csrmm_bsr_rewrite() - test_csrmm_ell_rewrite() - test_csrmm_padding_rewrite() + # test_csrmm_bsr_rewrite() + # test_csrmm_ell_rewrite() + # test_csrmm_padding_rewrite() + test_column_part_hyb() + # test_condense()