Skip to content

Commit 741164e

Browse files
Divergence matrix tree-by-tree algorithms
Implement the basic version of the divergence matrix operation using tree-by-tree algorithms, and provide interface for parallelising along the genome.
1 parent 3d4fc51 commit 741164e

File tree

9 files changed

+2189
-25
lines changed

9 files changed

+2189
-25
lines changed

c/tests/test_trees.c

Lines changed: 352 additions & 1 deletion
Large diffs are not rendered by default.

c/tskit/trees.c

Lines changed: 557 additions & 12 deletions
Large diffs are not rendered by default.

c/tskit/trees.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1003,6 +1003,10 @@ int tsk_treeseq_f4(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
10031003
tsk_size_t num_index_tuples, const tsk_id_t *index_tuples, tsk_size_t num_windows,
10041004
const double *windows, tsk_flags_t options, double *result);
10051005

1006+
int tsk_treeseq_divergence_matrix(const tsk_treeseq_t *self, tsk_size_t num_samples,
1007+
const tsk_id_t *samples, tsk_size_t num_windows, const double *windows,
1008+
tsk_flags_t options, double *result);
1009+
10061010
/****************************************************************************/
10071011
/* Tree */
10081012
/****************************************************************************/

python/_tskitmodule.c

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9638,6 +9638,78 @@ TreeSequence_f4(TreeSequence *self, PyObject *args, PyObject *kwds)
96389638
return TreeSequence_k_way_stat_method(self, args, kwds, 4, tsk_treeseq_f4);
96399639
}
96409640

9641+
static PyObject *
9642+
TreeSequence_divergence_matrix(TreeSequence *self, PyObject *args, PyObject *kwds)
9643+
{
9644+
PyObject *ret = NULL;
9645+
static char *kwlist[] = { "windows", "samples", "mode", NULL };
9646+
PyArrayObject *result_array = NULL;
9647+
PyObject *windows = NULL;
9648+
PyObject *py_samples = Py_None;
9649+
char *mode = NULL;
9650+
PyArrayObject *windows_array = NULL;
9651+
PyArrayObject *samples_array = NULL;
9652+
tsk_flags_t options = 0;
9653+
npy_intp *shape, dims[3];
9654+
tsk_size_t num_samples, num_windows;
9655+
tsk_id_t *samples = NULL;
9656+
int err;
9657+
9658+
if (TreeSequence_check_state(self) != 0) {
9659+
goto out;
9660+
}
9661+
if (!PyArg_ParseTupleAndKeywords(
9662+
args, kwds, "O|Os", kwlist, &windows, &py_samples, &mode)) {
9663+
goto out;
9664+
}
9665+
num_samples = tsk_treeseq_get_num_samples(self->tree_sequence);
9666+
if (py_samples != Py_None) {
9667+
samples_array = (PyArrayObject *) PyArray_FROMANY(
9668+
py_samples, NPY_INT32, 1, 1, NPY_ARRAY_IN_ARRAY);
9669+
if (samples_array == NULL) {
9670+
goto out;
9671+
}
9672+
shape = PyArray_DIMS(samples_array);
9673+
samples = PyArray_DATA(samples_array);
9674+
num_samples = (tsk_size_t) shape[0];
9675+
}
9676+
if (parse_windows(windows, &windows_array, &num_windows) != 0) {
9677+
goto out;
9678+
}
9679+
dims[0] = num_windows;
9680+
dims[1] = num_samples;
9681+
dims[2] = num_samples;
9682+
result_array = (PyArrayObject *) PyArray_SimpleNew(3, dims, NPY_FLOAT64);
9683+
if (result_array == NULL) {
9684+
goto out;
9685+
}
9686+
if (parse_stats_mode(mode, &options) != 0) {
9687+
goto out;
9688+
}
9689+
// clang-format off
9690+
Py_BEGIN_ALLOW_THREADS
9691+
err = tsk_treeseq_divergence_matrix(
9692+
self->tree_sequence,
9693+
num_samples, samples,
9694+
num_windows, PyArray_DATA(windows_array),
9695+
options, PyArray_DATA(result_array));
9696+
Py_END_ALLOW_THREADS
9697+
// clang-format on
9698+
/* Clang-format insists on doing this in spite of the "off" instruction above */
9699+
if (err != 0)
9700+
{
9701+
handle_library_error(err);
9702+
goto out;
9703+
}
9704+
ret = (PyObject *) result_array;
9705+
result_array = NULL;
9706+
out:
9707+
Py_XDECREF(result_array);
9708+
Py_XDECREF(windows_array);
9709+
Py_XDECREF(samples_array);
9710+
return ret;
9711+
}
9712+
96419713
static PyObject *
96429714
TreeSequence_get_num_mutations(TreeSequence *self)
96439715
{
@@ -10346,6 +10418,10 @@ static PyMethodDef TreeSequence_methods[] = {
1034610418
.ml_meth = (PyCFunction) TreeSequence_f4,
1034710419
.ml_flags = METH_VARARGS | METH_KEYWORDS,
1034810420
.ml_doc = "Computes the f4 statistic." },
10421+
{ .ml_name = "divergence_matrix",
10422+
.ml_meth = (PyCFunction) TreeSequence_divergence_matrix,
10423+
.ml_flags = METH_VARARGS | METH_KEYWORDS,
10424+
.ml_doc = "Computes the pairwise divergence matrix." },
1034910425
{ .ml_name = "split_edges",
1035010426
.ml_meth = (PyCFunction) TreeSequence_split_edges,
1035110427
.ml_flags = METH_VARARGS | METH_KEYWORDS,

0 commit comments

Comments
 (0)