Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

extend edges method #2651

Merged
merged 1 commit into from
Nov 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion c/CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
--------
UPCOMING
--------

**Features**

- Add the `tsk_treeseq_extend_edges` method that can compress a tree sequence
by extending edges into adjacent trees and thus creating unary nodes in those
trees (:user:`petrelharp`, :user:`hfr1tze`, :user:`avabamf`, :pr:`2651`).

--------------------
[1.1.2] - 2023-05-17
--------------------
Expand All @@ -24,7 +34,7 @@
(:user:`jeromekelleher`, :issue:`2662`, :pr:`2663`).

- Guarantee that unfiltered tables are not written to unnecessarily
during simplify (:user:`jeromekelleher` :pr:`2619`).
during simplify (:user:`jeromekelleher`, :pr:`2619`).

- Add `x_table_keep_rows` methods to provide efficient in-place table subsetting
(:user:`jeromekelleher`, :pr:`2700`).
Expand Down
162 changes: 162 additions & 0 deletions c/tests/test_trees.c
Original file line number Diff line number Diff line change
Expand Up @@ -8212,6 +8212,165 @@ test_split_edges_errors(void)
tsk_treeseq_free(&ts);
}

static void
test_extend_edges_simple(void)
{
int ret;
tsk_treeseq_t ts, ets;
const char *nodes = "1 0 -1 -1\n"
"1 0 -1 -1\n"
"0 2.0 -1 -1\n";
const char *edges = "0 10 2 0\n"
"0 10 2 1\n";
const char *sites = "0.0 0\n"
"1.0 0\n";
const char *mutations = "0 0 1 -1 0.5\n"
"1 1 1 -1 0.5\n";

tsk_treeseq_from_text(&ts, 10, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);
ret = tsk_treeseq_extend_edges(&ts, 10, 0, &ets);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, ets.tables, 0));
tsk_treeseq_free(&ts);

tsk_treeseq_free(&ets);
}

static void
test_extend_edges_errors(void)
{
int ret;
tsk_treeseq_t ts, ets;
const char *nodes = "1 0 -1 -1\n"
"1 0 -1 -1\n"
"0 2.0 -1 -1\n";
const char *edges = "0 10 2 0\n"
"0 10 2 1\n";
const char *sites = "0.0 0\n"
"1.0 0\n";
const char *mutations = "0 0 1 -1 0.5\n"
"1 1 1 -1 0.5\n";
const char *mutations_no_time = "0 0 1 -1\n"
"1 1 1 -1\n";
// left, right, node source, dest, time
const char *migrations = "0 10 0 0 1 0.5\n"
"0 10 0 1 0 1.5\n";

tsk_treeseq_from_text(&ts, 10, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);
ret = tsk_treeseq_extend_edges(&ts, -2, 0, &ets);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EXTEND_EDGES_BAD_MAXITER);
tsk_treeseq_free(&ts);

tsk_treeseq_from_text(
&ts, 10, nodes, edges, migrations, sites, mutations, NULL, NULL, 0);
ret = tsk_treeseq_extend_edges(&ts, 10, 0, &ets);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MIGRATIONS_NOT_SUPPORTED);
tsk_treeseq_free(&ts);

tsk_treeseq_from_text(
&ts, 10, nodes, edges, NULL, sites, mutations_no_time, NULL, NULL, 0);
ret = tsk_treeseq_extend_edges(&ts, 10, 0, &ets);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_DISALLOWED_UNKNOWN_MUTATION_TIME);
tsk_treeseq_free(&ts);

tsk_treeseq_free(&ets);
}

static void
assert_equal_except_edges_and_mutation_nodes(
const tsk_treeseq_t *ts1, const tsk_treeseq_t *ts2)
{
tsk_table_collection_t t1, t2;
int ret;

ret = tsk_table_collection_copy(ts1->tables, &t1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);

ret = tsk_table_collection_copy(ts2->tables, &t2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);

tsk_memset(t1.mutations.node, 0, t1.mutations.num_rows * sizeof(*t1.mutations.node));
tsk_memset(t2.mutations.node, 0, t2.mutations.num_rows * sizeof(*t2.mutations.node));

tsk_edge_table_clear(&t1.edges);
tsk_edge_table_clear(&t2.edges);

CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));

tsk_table_collection_free(&t1);
tsk_table_collection_free(&t2);
}

static void
test_extend_edges(void)
{
int ret, max_iter;
tsk_treeseq_t ts, ets;
/* 7 and 8 should be extended to the whole sequence

6 6 6 6
+-+-+ +-+-+ +-+-+ +-+-+
| | 7 | | 8 | |
| | ++-+ | | +-++ | |
4 5 4 | | 4 | 5 4 5
+++ +++ +++ | | | | +++ +++ +++
0 1 2 3 0 1 2 3 0 1 2 3 0 1 2 3
*/

const char *nodes = "1 0 -1 -1\n"
"1 0 -1 -1\n"
"1 0 -1 -1\n"
"1 0 -1 -1\n"
"0 1.0 -1 -1\n"
"0 1.0 -1 -1\n"
"0 3.0 -1 -1\n"
"0 2.0 -1 -1\n"
"0 2.0 -1 -1\n";
// l, r, p, c
const char *edges = "0 10 4 0\n"
"0 5 4 1\n"
"7 10 4 1\n"
"0 2 5 2\n"
"5 10 5 2\n"
"0 2 5 3\n"
"5 10 5 3\n"
"2 5 7 2\n"
"2 5 7 4\n"
"5 7 8 1\n"
"5 7 8 5\n"
"2 5 6 3\n"
"0 2 6 4\n"
"5 10 6 4\n"
"0 2 6 5\n"
"7 10 6 5\n"
"2 5 6 7\n"
"5 7 6 8\n";
const char *sites = "0.0 0\n"
"9.0 0\n";
const char *mutations = "0 4 1 -1 2.5\n"
"0 4 2 0 1.5\n"
"1 5 1 -1 2.5\n"
"1 5 2 2 1.5\n";

tsk_treeseq_from_text(&ts, 10, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);

for (max_iter = 1; max_iter < 10; max_iter++) {
ret = tsk_treeseq_extend_edges(&ts, max_iter, 0, &ets);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_equal_except_edges_and_mutation_nodes(&ts, &ets);
CU_ASSERT_TRUE(ets.tables->edges.num_rows >= 12);
tsk_treeseq_free(&ets);
}

ret = tsk_treeseq_extend_edges(&ts, 10, 0, &ets);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(ets.tables->nodes.num_rows, 9);
CU_ASSERT_EQUAL_FATAL(ets.tables->edges.num_rows, 12);
tsk_treeseq_free(&ets);

tsk_treeseq_free(&ts);
}

static void
test_init_take_ownership_no_edge_metadata(void)
{
Expand Down Expand Up @@ -8431,6 +8590,9 @@ main(int argc, char **argv)
{ "test_split_edges_no_populations", test_split_edges_no_populations },
{ "test_split_edges_populations", test_split_edges_populations },
{ "test_split_edges_errors", test_split_edges_errors },
{ "test_extend_edges_simple", test_extend_edges_simple },
{ "test_extend_edges_errors", test_extend_edges_errors },
{ "test_extend_edges", test_extend_edges },
{ "test_init_take_ownership_no_edge_metadata",
test_init_take_ownership_no_edge_metadata },
{ NULL, NULL },
Expand Down
10 changes: 10 additions & 0 deletions c/tskit/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,11 @@ tsk_strerror_internal(int err)
"values for any single site. "
"(TSK_ERR_MUTATION_TIME_HAS_BOTH_KNOWN_AND_UNKNOWN)";
break;
case TSK_ERR_DISALLOWED_UNKNOWN_MUTATION_TIME:
ret = "Some mutation times are marked 'unknown' for a method that requires "
"no unknown times. (Use compute_mutation_times to add times?) "
"(TSK_ERR_DISALLOWED_UNKNOWN_MUTATION_TIME)";
break;

/* Migration errors */
case TSK_ERR_UNSORTED_MIGRATIONS:
Expand Down Expand Up @@ -615,6 +620,11 @@ tsk_strerror_internal(int err)
"if an individual has nodes from more than one time. "
"(TSK_ERR_INDIVIDUAL_TIME_MISMATCH)";
break;

case TSK_ERR_EXTEND_EDGES_BAD_MAXITER:
ret = "Maximum number of iterations must be positive. "
"(TSK_ERR_EXTEND_EDGES_BAD_MAXITER)";
break;
}
return ret;
}
Expand Down
15 changes: 15 additions & 0 deletions c/tskit/core.h
Original file line number Diff line number Diff line change
Expand Up @@ -500,6 +500,11 @@ the edge on which it occurs, and wasn't TSK_UNKNOWN_TIME.
A single site had a mixture of known mutation times and TSK_UNKNOWN_TIME
*/
#define TSK_ERR_MUTATION_TIME_HAS_BOTH_KNOWN_AND_UNKNOWN -509
/**
Some mutations have TSK_UNKNOWN_TIME in an algorithm where that's
disallowed (use compute_mutation_times?).
*/
#define TSK_ERR_DISALLOWED_UNKNOWN_MUTATION_TIME -510
/** @} */

/**
Expand Down Expand Up @@ -865,6 +870,16 @@ An individual had nodes from more than one time
*/
#define TSK_ERR_INDIVIDUAL_TIME_MISMATCH -1704
/** @} */

/**
@defgroup EXTEND_EDGES_ERROR_GROUP Extend edges errors.
@{
*/
/**
Maximum iteration number (max_iter) must be positive.
*/
#define TSK_ERR_EXTEND_EDGES_BAD_MAXITER -1800
/** @} */
// clang-format on

/* This bit is 0 for any errors originating from kastore */
Expand Down
Loading
Loading