Skip to content

Commit

Permalink
Merge pull request #48 from JKLiang9714/huawei-dev-ljk
Browse files Browse the repository at this point in the history
add more ucg features
  • Loading branch information
ChenQiangFYQ authored Sep 24, 2021
2 parents fc4f5b7 + 66acebc commit c516cd5
Show file tree
Hide file tree
Showing 19 changed files with 1,620 additions and 193 deletions.
19 changes: 9 additions & 10 deletions README
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ $ make install
```

NOTE: Compiling support for various networks or other specific hardware may
require additional command line flags when running "configure".
require additional command line flags when running configure.

### Developer Builds

Expand All @@ -69,17 +69,17 @@ penalty at run-time because of extra debugging code.
$ make -C test/gtest test
```

### Building RPM package
### Build RPM package
```bash
$ contrib/buildrpm.sh -s -b
```

### Building DEB package
### Build DEB package
```bash
$ dpkg-buildpackage -us -uc
```

### Building Doxygen documentation
### Build Doxygen documentation
```bash
$ make docs
```
Expand All @@ -92,18 +92,18 @@ $ make docs

### UCX Performance Test

Start the server:
Start server:

```sh
$ ./src/tools/perf/ucx_perftest -c 0
```

Connect to the client:
Connect client:

```sh
$ ./src/tools/perf/ucx_perftest <server-hostname> -t tag_lat -c 1
```
Note: The -c flag sets CPU affinity. To run both commands on the same host, set the affinity to different CPU cores.
Note: the `-c` flag sets CPU affinity. If running both commands on same host, make sure you set the affinity to different CPU cores.

## Our Community

Expand Down Expand Up @@ -185,7 +185,7 @@ To reference the UCX website:

## Huawei Optimization Introduction

Based on performance consideration, UCX **DOES NOT** provide the functionalities related to transmission security.
Based on performance consideration, UCX **DO NOT** provide the functionalities related to transmission security.

There are three optimized collective operations:

Expand All @@ -203,7 +203,7 @@ New algorithms are as follows:
- Topo-aware Recursive + Binomial(intra)
- Topo-aware Recursive + K-nomial(intra)

Select a specific algorithm with parameters which are described in the tables below.
Select specific algorithm with parameters which is showed in the table below.

Bcast:

Expand Down Expand Up @@ -238,4 +238,3 @@ Barrier:
| 5 | Topo-aware Recursive + K-nomial (intra)(Socket) |
| 6 | Topo-aware K-nomial(Node) |
| 7 | Topo-aware K-nomial(Socket) |

1 change: 1 addition & 0 deletions config/m4/gtest.m4
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,5 @@ AC_ARG_ENABLE([gtest],
AC_MSG_CHECKING([for using Google C++ Testing Framework])
AC_MSG_RESULT([$enable_gtest])
AM_CONDITIONAL([HAVE_GTEST],[test "x$enable_gtest" = "xyes"])
AC_DEFINE([ENABLE_GTEST],[1],[enable gtest])
])
1 change: 0 additions & 1 deletion contrib/upload_docs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ git remote show origin &>/dev/null || git remote add origin https://github.com/o
git fetch --all
git checkout -t origin/master -f
git pull
git submodule update --init --recursive --remote
cp -f ../docs/doxygen-doc/ucx.pdf ./
git commit ucx.pdf -m "update ucx.pdf for $rev"
git push
3 changes: 2 additions & 1 deletion src/uct/ib/ud/base/ud_ep.c
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,7 @@ uct_ud_ep_is_last_pending_elem(uct_ud_ep_t *ep, ucs_arbiter_elem_t *elem)
/* only two elements are in the group (the 1st element is the
* current one, the 2nd (or the last) element is the control one) */
(ucs_arbiter_group_tail(&ep->tx.pending.group) == &ep->tx.pending.elem)));

}

static ucs_arbiter_cb_result_t
Expand Down Expand Up @@ -1085,6 +1085,7 @@ static uct_ud_send_skb_t *uct_ud_ep_prepare_crep(uct_ud_ep_t *ep)
crep = (uct_ud_ctl_hdr_t *)(neth + 1);

crep->type = UCT_UD_PACKET_CREP;
memset(&crep->conn_rep, 0, sizeof(crep->conn_rep));
crep->conn_rep.src_ep_id = ep->ep_id;

uct_ud_peer_name(ucs_unaligned_ptr(&crep->peer));
Expand Down
3 changes: 2 additions & 1 deletion test/gtest/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ gtest_SOURCES += \
ucg/test_op.cc \
ucg/test_ucg_context.cc \
ucg/test_builtin.cc \
ucg/ucg_algo_decision_test.cc \
ucg/test_cb.cc
endif

Expand Down Expand Up @@ -223,7 +224,7 @@ endif
if HAVE_TL_RC
gtest_SOURCES += \
uct/ib/test_rc.cc
endif
endif
if HAVE_TL_DC
gtest_SOURCES += \
uct/ib/test_dc.cc
Expand Down
145 changes: 60 additions & 85 deletions test/gtest/ucg/test_builtin.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,11 @@
* See file LICENSE for terms.
*/

extern "C" {

#include <ucg/builtin/plan/builtin_plan.h>
}
#include "ucg_test.h"
#include "ucg_plan_test.h"


ucg_plan_test::ucg_plan_test()
{
m_builtin_ctx = NULL;
Expand Down Expand Up @@ -52,44 +50,44 @@ ucg_plan_test::~ucg_plan_test()
m_all_rank_infos.clear();
}

extern "C" {
STATIC_GTEST void ucg_builtin_set_algo(coll_type_t ctype, int algo_id, ucg_builtin_algo_t *algo);
STATIC_GTEST void ucg_builtin_print(ucg_plan_t *plan, const ucg_collective_params_t *coll_params);
STATIC_GTEST void ucg_builtin_set_phase_thresholds(ucg_builtin_group_ctx_t *ctx,
ucg_builtin_plan_phase_t *phase);
}

TEST(ucg_plan_test, ucg_plan_1_test) {
ucg_plan_test example(4, 8, 0);
size_t msg_size = UCG_GROUP_MED_MSG_SIZE - 1024;

ucg_plan_t *plan = NULL;
ucs_status_t ret = ucg_builtin_component.create(&ucg_builtin_component, example.m_ucg_worker,
example.m_group, 23, 0, NULL, example.m_group_params);
EXPECT_EQ(UCS_OK, ret);
ret = ucg_builtin_component.plan(&ucg_builtin_component, &example.m_coll_type, msg_size,
example.m_group, example.m_coll_params, &plan);
ret = ucg_builtin_component.plan(example.m_group, 2, example.m_coll_params, &plan);
EXPECT_EQ(UCS_OK, ret);

}

TEST(ucg_plan_test, ucg_plan_2_test) {
ucg_plan_test example(4, 8, 0);
size_t msg_size = UCG_GROUP_MED_MSG_SIZE + 1024;

ucg_plan_t *plan = NULL;
ucs_status_t ret = ucg_builtin_component.create(&ucg_builtin_component, example.m_ucg_worker,
example.m_group, 23, 0, NULL, example.m_group_params);
EXPECT_EQ(UCS_OK, ret);
ret = ucg_builtin_component.plan(&ucg_builtin_component, &example.m_coll_type, msg_size,
example.m_group, example.m_coll_params, &plan);
ret = ucg_builtin_component.plan(example.m_group, 2, example.m_coll_params, &plan);
EXPECT_EQ(UCS_OK, ret);
}

TEST(ucg_plan_test, ucg_plan_3_test) {
ucg_plan_test example(4, 8, 0);
size_t msg_size = UCG_GROUP_MED_MSG_SIZE - 1024;

ucg_plan_t *plan = NULL;
ucs_status_t ret = ucg_builtin_component.create(&ucg_builtin_component, example.m_ucg_worker,
example.m_group, 23, 0, NULL, example.m_group_params);
EXPECT_EQ(UCS_OK, ret);
ret = ucg_builtin_component.plan(&ucg_builtin_component, &example.m_coll_type, msg_size,
example.m_group, example.m_coll_params, &plan);
ret = ucg_builtin_component.plan(example.m_group, 2, example.m_coll_params, &plan);
EXPECT_EQ(UCS_OK, ret);
}
/*
Expand All @@ -107,56 +105,46 @@ TEST(ucg_plan_test, ucg_plan_4_test) {
}
*/
TEST(ucg_plan_test, algorithm_selection) {
ucs_status_t ret;
unsigned idx;
for (idx = 0; idx < UCG_ALGORITHM_ALLREDUCE_LAST; idx++) {
ret = ucg_builtin_allreduce_algo_switch((enum ucg_builtin_allreduce_algorithm) idx, &ucg_algo);
ASSERT_EQ(UCS_OK, ret);
ucg_builtin_allreduce_algo_switch((enum ucg_builtin_allreduce_algorithm) idx, &ucg_algo);
}

for (idx = 0; idx < UCG_ALGORITHM_BARRIER_LAST; idx++) {
ret = ucg_builtin_barrier_algo_switch((enum ucg_builtin_barrier_algorithm) idx, &ucg_algo);
ASSERT_EQ(UCS_OK, ret);
ucg_builtin_barrier_algo_switch((enum ucg_builtin_barrier_algorithm) idx, &ucg_algo);
}

for (idx = 0; idx < UCG_ALGORITHM_BCAST_LAST; idx++) {
ret = ucg_builtin_bcast_algo_switch((enum ucg_builtin_bcast_algorithm) idx, &ucg_algo);
ASSERT_EQ(UCS_OK, ret);
ucg_builtin_bcast_algo_switch((enum ucg_builtin_bcast_algorithm) idx, &ucg_algo);
}

for (idx = 0; idx < UCG_ALGORITHM_ALLTOALLV_LAST; idx++) {
ucg_builtin_alltoallv_algo_switch((enum ucg_builtin_alltoallv_algorithm) idx, &ucg_algo);
}
}

TEST(ucg_plan_test, topo_level) {
ucs_status_t ret;
ucg_algo.topo_level = UCG_GROUP_HIERARCHY_LEVEL_NODE;
enum ucg_group_member_distance domain_distance = UCG_GROUP_MEMBER_DISTANCE_SELF;
ret = choose_distance_from_topo_aware_level(&domain_distance);
ASSERT_EQ(UCS_OK, ret);
choose_distance_from_topo_aware_level(&domain_distance);
ucg_algo.topo_level = UCG_GROUP_HIERARCHY_LEVEL_SOCKET;
ret = choose_distance_from_topo_aware_level(&domain_distance);
ASSERT_EQ(UCS_OK, ret);
choose_distance_from_topo_aware_level(&domain_distance);
ucg_algo.topo_level = UCG_GROUP_HIERARCHY_LEVEL_L3CACHE;
ret = choose_distance_from_topo_aware_level(&domain_distance);
ASSERT_EQ(UCS_OK, ret);
choose_distance_from_topo_aware_level(&domain_distance);
}

TEST(ucg_plan_test, check_continus_number) {
ucg_group_params_t group_params;

group_params.member_count = 4;
group_params.topo_map = (char **)malloc(sizeof(char *) * group_params.member_count);
group_params.topo_map[0] = new char[4] {UCG_GROUP_MEMBER_DISTANCE_SELF, UCG_GROUP_MEMBER_DISTANCE_HOST, UCG_GROUP_MEMBER_DISTANCE_NET, UCG_GROUP_MEMBER_DISTANCE_NET};
group_params.topo_map[1] = new char[4] {UCG_GROUP_MEMBER_DISTANCE_HOST, UCG_GROUP_MEMBER_DISTANCE_SELF, UCG_GROUP_MEMBER_DISTANCE_NET, UCG_GROUP_MEMBER_DISTANCE_NET};
group_params.topo_map[2] = new char[4] {UCG_GROUP_MEMBER_DISTANCE_NET, UCG_GROUP_MEMBER_DISTANCE_NET, UCG_GROUP_MEMBER_DISTANCE_SELF, UCG_GROUP_MEMBER_DISTANCE_HOST};
group_params.topo_map[3] = new char[4] {UCG_GROUP_MEMBER_DISTANCE_NET, UCG_GROUP_MEMBER_DISTANCE_NET, UCG_GROUP_MEMBER_DISTANCE_HOST, UCG_GROUP_MEMBER_DISTANCE_SELF};
group_params.topo_args.nrank_uncontinue = 0;
group_params.topo_args.srank_uncontinue = 1;

unsigned discount = 0;
ucs_status_t status = ucg_builtin_check_continuous_number(&group_params, UCG_GROUP_MEMBER_DISTANCE_HOST, &discount);
ASSERT_EQ(UCS_OK, status);
ASSERT_EQ(0u, discount);

group_params.topo_map[0] = new char[4] {UCG_GROUP_MEMBER_DISTANCE_SELF, UCG_GROUP_MEMBER_DISTANCE_HOST, UCG_GROUP_MEMBER_DISTANCE_HOST, UCG_GROUP_MEMBER_DISTANCE_SOCKET};
group_params.topo_map[1] = new char[4] {UCG_GROUP_MEMBER_DISTANCE_HOST, UCG_GROUP_MEMBER_DISTANCE_SELF, UCG_GROUP_MEMBER_DISTANCE_SOCKET, UCG_GROUP_MEMBER_DISTANCE_HOST};
group_params.topo_map[2] = new char[4] {UCG_GROUP_MEMBER_DISTANCE_HOST, UCG_GROUP_MEMBER_DISTANCE_SOCKET, UCG_GROUP_MEMBER_DISTANCE_SELF, UCG_GROUP_MEMBER_DISTANCE_HOST};
group_params.topo_map[3] = new char[4] {UCG_GROUP_MEMBER_DISTANCE_SOCKET, UCG_GROUP_MEMBER_DISTANCE_HOST, UCG_GROUP_MEMBER_DISTANCE_HOST, UCG_GROUP_MEMBER_DISTANCE_SELF};
discount = 0;
status = ucg_builtin_check_continuous_number(&group_params, UCG_GROUP_MEMBER_DISTANCE_SOCKET, &discount);
ASSERT_EQ(UCS_OK, status);
Expand Down Expand Up @@ -207,60 +195,47 @@ TEST(ucg_plan_test, choose_type) {
}
}

/* TODO: add verification to below functions */
/*
TEST(ucg_plan_test, plan_decision_in_discontinuous_case) {
ucg_plan_test example(2, 2, 0);
unsigned op_num = 3;
enum ucg_collective_modifiers modifiers[op_num] = { (enum ucg_collective_modifiers ) (UCG_GROUP_COLLECTIVE_MODIFIER_BROADCAST | UCG_GROUP_COLLECTIVE_MODIFIER_SINGLE_SOURCE), \
(enum ucg_collective_modifiers) (UCG_GROUP_COLLECTIVE_MODIFIER_AGGREGATE | UCG_GROUP_COLLECTIVE_MODIFIER_BROADCAST | UCG_GROUP_COLLECTIVE_MODIFIER_BARRIER), \
(enum ucg_collective_modifiers) (UCG_GROUP_COLLECTIVE_MODIFIER_AGGREGATE | UCG_GROUP_COLLECTIVE_MODIFIER_BROADCAST) };
unsigned size_num = 2;
size_t msg_size[size_num] = {UCG_GROUP_MED_MSG_SIZE - 10, UCG_GROUP_MED_MSG_SIZE + 10};
for (unsigned i = 0; i < op_num; i++) {
for (unsigned j = 0; j < size_num; j++) {
ucg_builtin_plan_decision_in_discontinuous_case(msg_size[j], example.m_group_params, modifiers[i], example.m_coll_params);
}
TEST(ucg_plan_test, set_algo) {
coll_type_t ctype[] = {COLL_TYPE_BARRIER, COLL_TYPE_BCAST, COLL_TYPE_ALLREDUCE, COLL_TYPE_ALLTOALLV};
for (int i = 0; i < sizeof(ctype) / sizeof(ctype[0]); i++) {
ucg_builtin_set_algo(ctype[i], 0, &ucg_algo);
}
}
*/
TEST(ucg_plan_test, plan_decision_fixed) {
ucg_plan_test example(2, 2, 0);
unsigned op_num = 3;
enum ucg_collective_modifiers modifiers[op_num] = { (enum ucg_collective_modifiers ) (UCG_GROUP_COLLECTIVE_MODIFIER_BROADCAST | UCG_GROUP_COLLECTIVE_MODIFIER_SINGLE_SOURCE), \
(enum ucg_collective_modifiers) (UCG_GROUP_COLLECTIVE_MODIFIER_AGGREGATE | UCG_GROUP_COLLECTIVE_MODIFIER_BROADCAST | UCG_GROUP_COLLECTIVE_MODIFIER_BARRIER), \
(enum ucg_collective_modifiers) (UCG_GROUP_COLLECTIVE_MODIFIER_AGGREGATE | UCG_GROUP_COLLECTIVE_MODIFIER_BROADCAST) };
unsigned size_num = 2;
size_t msg_size[size_num] = {UCG_GROUP_MED_MSG_SIZE - 10, UCG_GROUP_MED_MSG_SIZE + 10};
unsigned data_num = 2;
unsigned large_data[data_num] = {100, 10000};
example.m_coll_params->send.dt_len = 200;
enum ucg_builtin_bcast_algorithm bcast_algo_decision;
enum ucg_builtin_allreduce_algorithm allreduce_algo_decision;
enum ucg_builtin_barrier_algorithm barrier_algo_decision;
for (unsigned i = 0; i < op_num; i++) {
for (unsigned j = 0; j < size_num; j++) {
for (unsigned k = 0; k < data_num; k++) {
plan_decision_fixed(msg_size[j], example.m_group_params, modifiers[i], example.m_coll_params, large_data[k], 0, &bcast_algo_decision, &allreduce_algo_decision, &barrier_algo_decision);
}
}
}

TEST(ucg_plan_test, builtin_plan) {
ucg_plan_t *plan = new ucg_plan_t;
plan->planner = &ucg_builtin_component;
ucg_collective_params_t *coll_params = NULL;
ucg_builtin_print(plan, coll_params);
}

TEST(ucg_plan_test, plan_chooose_ops) {
ucg_plan_test example(2, 2, 0);
unsigned op_num = 3;
enum ucg_collective_modifiers modifiers[op_num] = { (enum ucg_collective_modifiers ) (UCG_GROUP_COLLECTIVE_MODIFIER_BROADCAST | UCG_GROUP_COLLECTIVE_MODIFIER_SINGLE_SOURCE), \
(enum ucg_collective_modifiers) (UCG_GROUP_COLLECTIVE_MODIFIER_AGGREGATE | UCG_GROUP_COLLECTIVE_MODIFIER_BROADCAST | UCG_GROUP_COLLECTIVE_MODIFIER_BARRIER), \
(enum ucg_collective_modifiers) (UCG_GROUP_COLLECTIVE_MODIFIER_AGGREGATE | UCG_GROUP_COLLECTIVE_MODIFIER_BROADCAST) };
TEST(ucg_plan_test, set_phase_thresholds) {
ucg_plan_test example(4, 8, 0);
ucg_builtin_plan_phase_t plan_phase;

for (unsigned i = 0; i < op_num; i++) {
ucg_builtin_plan_choose_ops(example.m_planc, modifiers[i]);
}
}
uct_iface_attr_t *ep_attr = new uct_iface_attr_t();
plan_phase.ep_attr = ep_attr;

uct_md_attr_t *md_attr = new uct_md_attr_t();
md_attr->cap.max_reg = 8128;
plan_phase.md_attr = md_attr;

plan_phase.send_thresh.max_short_one = 31;
plan_phase.send_thresh.max_short_max = 63;
plan_phase.send_thresh.max_bcopy_one = 127;
plan_phase.send_thresh.max_bcopy_max = 255;
plan_phase.send_thresh.max_zcopy_one = 1023;
plan_phase.send_thresh.md_attr_cap_max_reg = 8127;

TEST(ucg_plan_test, test_algorithm_decision) {
ucg_plan_test example(2, 2, 0);
ucs_status_t ret = ucg_builtin_algorithm_decision(&(example.m_coll_type), 1024, example.m_group_params, example.m_coll_params, example.m_planc);
ASSERT_EQ(UCS_OK, ret);
plan_phase.recv_thresh.max_short_one = plan_phase.send_thresh.max_short_one;
plan_phase.recv_thresh.max_short_max = plan_phase.send_thresh.max_short_max;
plan_phase.recv_thresh.max_bcopy_one = plan_phase.send_thresh.max_bcopy_one;
plan_phase.recv_thresh.max_bcopy_max = plan_phase.send_thresh.max_bcopy_max;
plan_phase.recv_thresh.max_zcopy_one = plan_phase.send_thresh.max_zcopy_one;
plan_phase.recv_thresh.md_attr_cap_max_reg = 8127;

ucg_builtin_set_phase_thresholds(example.m_builtin_ctx, &plan_phase);

delete ep_attr;
}

Loading

0 comments on commit c516cd5

Please sign in to comment.