From 93ea66c24c973cb3d4a99451ed3c6c567751465e Mon Sep 17 00:00:00 2001
From: ande <ande.elliott@gmail.com>
Date: Fri, 1 Jun 2018 12:12:17 +0100
Subject: [PATCH 01/84] added smooth c++ file

---
 src/fastSmooth.cpp | 153 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 153 insertions(+)
 create mode 100644 src/fastSmooth.cpp
diff --git a/src/fastSmooth.cpp b/src/fastSmooth.cpp
new file mode 100644
index 00000000..ac664ff5
--- /dev/null
+++ b/src/fastSmooth.cpp
@@ -0,0 +1,153 @@
+// Enable C++11
+// [[Rcpp::plugins(cpp11)]]
+#include <Rcpp.h>
+#include <unordered_set>
+#include <algorithm>
+#include <numeric>
+#include <math.h>
+using namespace Rcpp;
+
+// [[Rcpp::plugins("cpp11")]]
+
+//' @title
+//' Compute EMD
+////'
+////' @param loc1 numeric vector.
+////' @param val1 numeric vector.
+////' @param loc2 numeric vector.
+////' @param val2 numeric vector.
+//'
+//' @export
+// [[Rcpp::export]]
+double NetEmdSmooth(NumericVector loc1,NumericVector val1,double binWidth1,NumericVector loc2,NumericVector val2,double binWidth2)
+{
+  //init
+   double res=0;
+   double curVal1,curVal2;
+   double curPos;
+   double temp1;
+   int count;
+   int i,j,k;
+   //place start of windows before
+   //start of histogram so we can start the loop
+   // stores the result
+   res=0;
+   //TODO be worried about adding lots of small numbers
+
+   // current location on hist 1 and hist 2
+   i=0;
+   j=0;
+   double cdfLower=0;
+   double loc1SegStart=loc1[0];
+   double loc1SegEnd=loc1[0]+binWidth1;
+   double loc1SegValStart=0;
+   double loc1SegValEnd=val1[0];
+   double loc2SegStart=loc2[0];
+   double loc2SegEnd=loc2[0]+binWidth2;
+   double loc2SegValStart=0;
+   double loc2SegValEnd=val2[0];
+   double curStartVal;
+   double curEndVal;
+   double loc1Start;
+   double loc2Start;
+   double loc1End;
+   double loc2End;
+   double h;
+   res=0;
+   while (1)
+   {
+        // lets compute the area for these segments
+        if (loc1SegValStart<loc2SegValStart)
+        {
+            curStartVal=loc2SegValStart;
+            loc2Start=loc2SegStart;
+            loc1Start=loc1SegStart+(loc1SegEnd-loc1SegValStart)*(loc2SegValStart-loc1SegValStart)/(loc1SegValEnd-loc1SegValStart);
+        }
+        else
+        {
+            curStartVal=loc1SegValStart;
+            loc1Start=loc1SegStart;
+            loc2Start=loc2SegStart+(loc2SegEnd-loc2SegValStart)*(loc1SegValStart-loc2SegValStart)/(loc2SegValEnd-loc2SegValStart);
+        }
+        if (loc1SegValEnd<loc2SegValEnd)
+        {
+            curEndVal=loc1SegValEnd;
+            loc1End=loc1SegEnd;
+            loc2End=loc2SegStart+(loc2SegEnd-loc2SegValStart)*(loc1SegValEnd-loc2SegValStart)/(loc2SegValEnd-loc2SegValStart);
+        }
+        else
+        {
+            curEndVal=loc2SegValEnd;
+            loc2End=loc2SegEnd;
+            loc1End=loc1SegStart+(loc1SegEnd-loc1SegValStart)*(loc2SegValEnd-loc1SegValStart)/(loc1SegValEnd-loc1SegValStart);
+        }
+        // okay so we now have our bounds
+        h=curEndVal-curStartVal;
+        if (loc1Start<loc2Start)
+        {
+            //case1 they is no overlap
+            if (loc1End<=loc2End)
+            {
+                res+=(h/2.0)*(loc2Start+loc2End-loc1Start-loc1End);
+            }
+            else // we have a bowtie
+            {
+                res+=(h/2.0)*((loc2Start-loc1Start)*(loc2Start-loc1Start)+(loc1End-loc2End)*(loc1End-loc2End))/(loc1End-loc2End+loc2Start-loc1Start);
+            }
+        }
+        else
+        {
+            //case1 they is no overlap
+            if (loc2End<=loc1End)
+            {
+                res+=(h/2.0)*(loc1Start+loc1End-loc2Start-loc2End);
+            }
+            else // we have a bowtie
+            {
+                res+=(h/2.0)*((loc1Start-loc2Start)*(loc1Start-loc2Start)+(loc2End-loc1End)*(loc2End-loc1End))/(loc2End-loc1End+loc1Start-loc2Start);
+            }
+        }
+        // update the segment under consideration
+        if (i==val1.size()-1)
+        {
+            if (j==val2.size()-1)
+            {
+                break;
+            }
+            else
+            {
+               j+=1;
+               loc2SegStart=loc2[j];
+               loc2SegEnd=loc2[j]+binWidth2;
+               loc2SegValStart=loc2SegValEnd;
+               loc2SegValEnd=val2[j];
+            }
+        }
+        else if (j==val2.size()-1)
+        {
+           loc1SegStart=loc1[i];
+           loc1SegEnd=loc1[i]+binWidth1;
+           loc1SegValStart=loc1SegValEnd;
+           loc1SegValEnd=val1[i];
+        }
+        else if (val1[i+1]<val2[j+1])
+        {
+            i+=1;
+            // update the i segment
+           loc1SegStart=loc1[i];
+           loc1SegEnd=loc1[i]+binWidth1;
+           loc1SegValStart=loc1SegValEnd;
+           loc1SegValEnd=val1[i];
+        }
+        else
+        {
+           j+=1;
+           loc2SegStart=loc2[j];
+           loc2SegEnd=loc2[j]+binWidth2;
+           loc2SegValStart=loc2SegValEnd;
+           loc2SegValEnd=val2[j];
+        }
+   }
+    return res;
+}
+

From d4d7d84113cb02772742b2c0f08ebb3a71cb9220 Mon Sep 17 00:00:00 2001
From: ande <ande.elliott@gmail.com>
Date: Fri, 1 Jun 2018 18:03:34 +0100
Subject: [PATCH 02/84] fixed a few annoying bugs in the implementation.

This version appears to work, not sure if doing this from the other perspective would be better
---
 src/fastSmooth.cpp | 33 +++++++++++++--------------------
 1 file changed, 13 insertions(+), 20 deletions(-)

diff --git a/src/fastSmooth.cpp b/src/fastSmooth.cpp
index ac664ff5..eda90de4 100644
--- a/src/fastSmooth.cpp
+++ b/src/fastSmooth.cpp
@@ -54,6 +54,7 @@ double NetEmdSmooth(NumericVector loc1,NumericVector val1,double binWidth1,Numer
    double loc2End;
    double h;
    res=0;
+   int count123=0;
    while (1)
    {
         // lets compute the area for these segments
@@ -61,38 +62,37 @@ double NetEmdSmooth(NumericVector loc1,NumericVector val1,double binWidth1,Numer
         {
             curStartVal=loc2SegValStart;
             loc2Start=loc2SegStart;
-            loc1Start=loc1SegStart+(loc1SegEnd-loc1SegValStart)*(loc2SegValStart-loc1SegValStart)/(loc1SegValEnd-loc1SegValStart);
+            loc1Start=loc1SegStart+(loc1SegEnd-loc1SegStart)*(loc2SegValStart-loc1SegValStart)/(loc1SegValEnd-loc1SegValStart);
         }
         else
         {
             curStartVal=loc1SegValStart;
             loc1Start=loc1SegStart;
-            loc2Start=loc2SegStart+(loc2SegEnd-loc2SegValStart)*(loc1SegValStart-loc2SegValStart)/(loc2SegValEnd-loc2SegValStart);
+            loc2Start=loc2SegStart+(loc2SegEnd-loc2SegStart)*(loc1SegValStart-loc2SegValStart)/(loc2SegValEnd-loc2SegValStart);
         }
         if (loc1SegValEnd<loc2SegValEnd)
         {
             curEndVal=loc1SegValEnd;
             loc1End=loc1SegEnd;
-            loc2End=loc2SegStart+(loc2SegEnd-loc2SegValStart)*(loc1SegValEnd-loc2SegValStart)/(loc2SegValEnd-loc2SegValStart);
+            loc2End=loc2SegStart+(loc2SegEnd-loc2SegStart)*(loc1SegValEnd-loc2SegValStart)/(loc2SegValEnd-loc2SegValStart);
         }
         else
         {
             curEndVal=loc2SegValEnd;
             loc2End=loc2SegEnd;
-            loc1End=loc1SegStart+(loc1SegEnd-loc1SegValStart)*(loc2SegValEnd-loc1SegValStart)/(loc1SegValEnd-loc1SegValStart);
+            loc1End=loc1SegStart+(loc1SegEnd-loc1SegStart)*(loc2SegValEnd-loc1SegValStart)/(loc1SegValEnd-loc1SegValStart);
         }
-        // okay so we now have our bounds
-        h=curEndVal-curStartVal;
+        h=(curEndVal-curStartVal)/2.0;
         if (loc1Start<loc2Start)
         {
             //case1 they is no overlap
             if (loc1End<=loc2End)
             {
-                res+=(h/2.0)*(loc2Start+loc2End-loc1Start-loc1End);
+                res+=(h)*(loc2Start+loc2End-loc1Start-loc1End);
             }
             else // we have a bowtie
             {
-                res+=(h/2.0)*((loc2Start-loc1Start)*(loc2Start-loc1Start)+(loc1End-loc2End)*(loc1End-loc2End))/(loc1End-loc2End+loc2Start-loc1Start);
+              res+=(h)*((loc2Start-loc1Start)*(loc2Start-loc1Start)+(loc1End-loc2End)*(loc1End-loc2End))/(loc1End-loc2End+loc2Start-loc1Start);
             }
         }
         else
@@ -100,11 +100,11 @@ double NetEmdSmooth(NumericVector loc1,NumericVector val1,double binWidth1,Numer
             //case1 they is no overlap
             if (loc2End<=loc1End)
             {
-                res+=(h/2.0)*(loc1Start+loc1End-loc2Start-loc2End);
+                res+=(h)*(loc1Start+loc1End-loc2Start-loc2End);
             }
             else // we have a bowtie
             {
-                res+=(h/2.0)*((loc1Start-loc2Start)*(loc1Start-loc2Start)+(loc2End-loc1End)*(loc2End-loc1End))/(loc2End-loc1End+loc1Start-loc2Start);
+                res+=(h)*((loc1Start-loc2Start)*(loc1Start-loc2Start)+(loc2End-loc1End)*(loc2End-loc1End))/(loc2End-loc1End+loc1Start-loc2Start);
             }
         }
         // update the segment under consideration
@@ -112,6 +112,7 @@ double NetEmdSmooth(NumericVector loc1,NumericVector val1,double binWidth1,Numer
         {
             if (j==val2.size()-1)
             {
+            //    std::cout << " i hit the break\n";
                 break;
             }
             else
@@ -123,17 +124,9 @@ double NetEmdSmooth(NumericVector loc1,NumericVector val1,double binWidth1,Numer
                loc2SegValEnd=val2[j];
             }
         }
-        else if (j==val2.size()-1)
+        else if ((j==val2.size()-1) || (val1[i+1]<val2[j+1]))
         {
-           loc1SegStart=loc1[i];
-           loc1SegEnd=loc1[i]+binWidth1;
-           loc1SegValStart=loc1SegValEnd;
-           loc1SegValEnd=val1[i];
-        }
-        else if (val1[i+1]<val2[j+1])
-        {
-            i+=1;
-            // update the i segment
+          i+=1;
            loc1SegStart=loc1[i];
            loc1SegEnd=loc1[i]+binWidth1;
            loc1SegValStart=loc1SegValEnd;

From 92d2abdabd372bd5aae22b3f7fffd529cc62292b Mon Sep 17 00:00:00 2001
From: ande <ande.elliott@gmail.com>
Date: Sun, 3 Jun 2018 16:35:40 +0100
Subject: [PATCH 03/84] Modifications to add the c code into the function

Also remoing some uneeded c code.
---
 NAMESPACE           |  3 +++
 R/RcppExports.R     | 10 ++++++++++
 R/emd.R             | 45 ++++++++++++++++++++++++++++++++++++++++++---
 src/RcppExports.cpp | 17 +++++++++++++++++
 src/fastSmooth.cpp  |  5 ++---
 5 files changed, 74 insertions(+), 6 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 55867c79..7031e634 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,5 +1,6 @@
 # Generated by roxygen2: do not edit by hand
 
+export(NetEmdSmooth)
 export(adaptive_breaks)
 export(area_between_dhist_ecmfs)
 export(as_smoothed_dhist)
@@ -64,5 +65,7 @@ export(shift_dhist)
 export(simplify_graph)
 export(sort_dhist)
 export(zeros_to_ones)
+import(Rcpp)
+importFrom(Rcpp,evalCpp)
 importFrom(Rcpp,sourceCpp)
 useDynLib(netdist)
diff --git a/R/RcppExports.R b/R/RcppExports.R
index f85dc216..87d17d1b 100644
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -44,3 +44,13 @@ emd_fast_no_smoothing <- function(locations1, values1, locations2, values2) {
     .Call('_netdist_emd_fast_no_smoothing', PACKAGE = 'netdist', locations1, values1, locations2, values2)
 }
 
+#' @title
+#' Compute EMD
+NULL
+
+#'
+#' @export
+NetEmdSmooth <- function(loc1, val1, binWidth1, loc2, val2, binWidth2) {
+    .Call('_netdist_NetEmdSmooth', PACKAGE = 'netdist', loc1, val1, binWidth1, loc2, val2, binWidth2)
+}
+
diff --git a/R/emd.R b/R/emd.R
index 696e72cd..678be30f 100644
--- a/R/emd.R
+++ b/R/emd.R
@@ -42,6 +42,9 @@ min_emd <- function(dhist1, dhist2, method = "optimise") {
 #' @return Earth Mover's Distance between the two discrete histograms
 #' @export
 min_emd_optimise_fast <- function(dhist1, dhist2) {
+  print("there is me")
+  print(dhist1$smoothing_window_width)
+  print(dhist2$smoothing_window_width)
   # Can we run the C++ fast implementation (only works with no smoothing)?
   if ((dhist1$smoothing_window_width==0) && (dhist2$smoothing_window_width==0))
   {
@@ -80,11 +83,47 @@ min_emd_optimise_fast <- function(dhist1, dhist2) {
       min_offset <- soln$minimum
       return(list(min_emd = min_emd, min_offset = min_offset))
   }
-  else
+  else #if ((dhist1$smoothing_window_width==1) && (dhist2$smoothing_window_width==1))
   {
-    # Fall back on other version if either dhist is smoothed
-    return(min_emd_optimise(dhist1, dhist2));
+    print("hi im here")
+    # Determine minimum and maximum offset of range in which histograms overlap
+    # (based on sliding histogram 1)
+    min_offset <- min(dhist2$locations) - max(dhist1$locations)
+    max_offset <- max(dhist2$locations) - min(dhist1$locations)
+    # Set lower and upper range for optimise algorithm to be somewhat wider than
+    # range defined by the minimum and maximum offset. This guards against a
+    # couple of issues that arise if the optimise range is exactly min_offset 
+    # to max_offset
+    # 1) If lower and upper are equal, the optimise method will throw and error
+    # 2) It seems that optimise is not guaranteed to explore its lower and upper
+    #    bounds, even in the case where one of them is the offset with minimum
+    #    EMD
+    buffer <- 0.1
+    min_offset <- min_offset - buffer
+    max_offset <- max_offset + buffer
+    # Define a single parameter function to minimise emd as a function of offset
+    val1 <- cumsum(dhist1$masses)
+    val2 <- cumsum(dhist2$masses)
+    val1 <- val1/val1[length(val1)]
+    val2 <- val2/val2[length(val2)]
+    loc1=dhist1$locations
+    loc2=dhist2$locations
+    binWidth1=dhist1$smoothing_window_width
+    binWidth2=dhist2$smoothing_window_width
+    count=0
+    emd_offset <- function(offset) {
+      temp1<- NetEmdSmooth(loc1+offset,val1,binWidth1,loc2,val2,binWidth2)
+      temp1
+    }
+    # Get solution from optimiser
+    soln <- stats::optimise(emd_offset, lower = min_offset, upper = max_offset, 
+                            tol = .Machine$double.eps*1000)
+    # Return mnimum EMD and associated offset
+    min_emd <- soln$objective
+    min_offset <- soln$minimum
+    return(list(min_emd = min_emd, min_offset = min_offset))
   }
+
 }
 
 
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
index a9ac60fb..8db3345e 100644
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -30,10 +30,27 @@ BEGIN_RCPP
     return rcpp_result_gen;
 END_RCPP
 }
+// NetEmdSmooth
+double NetEmdSmooth(NumericVector loc1, NumericVector val1, double binWidth1, NumericVector loc2, NumericVector val2, double binWidth2);
+RcppExport SEXP _netdist_NetEmdSmooth(SEXP loc1SEXP, SEXP val1SEXP, SEXP binWidth1SEXP, SEXP loc2SEXP, SEXP val2SEXP, SEXP binWidth2SEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< NumericVector >::type loc1(loc1SEXP);
+    Rcpp::traits::input_parameter< NumericVector >::type val1(val1SEXP);
+    Rcpp::traits::input_parameter< double >::type binWidth1(binWidth1SEXP);
+    Rcpp::traits::input_parameter< NumericVector >::type loc2(loc2SEXP);
+    Rcpp::traits::input_parameter< NumericVector >::type val2(val2SEXP);
+    Rcpp::traits::input_parameter< double >::type binWidth2(binWidth2SEXP);
+    rcpp_result_gen = Rcpp::wrap(NetEmdSmooth(loc1, val1, binWidth1, loc2, val2, binWidth2));
+    return rcpp_result_gen;
+END_RCPP
+}
 
 static const R_CallMethodDef CallEntries[] = {
     {"_netdist_counts_from_observations", (DL_FUNC) &_netdist_counts_from_observations, 1},
     {"_netdist_emd_fast_no_smoothing", (DL_FUNC) &_netdist_emd_fast_no_smoothing, 4},
+    {"_netdist_NetEmdSmooth", (DL_FUNC) &_netdist_NetEmdSmooth, 6},
     {NULL, NULL, 0}
 };
 
diff --git a/src/fastSmooth.cpp b/src/fastSmooth.cpp
index eda90de4..a0161147 100644
--- a/src/fastSmooth.cpp
+++ b/src/fastSmooth.cpp
@@ -55,18 +55,17 @@ double NetEmdSmooth(NumericVector loc1,NumericVector val1,double binWidth1,Numer
    double h;
    res=0;
    int count123=0;
+   curStartVal=0;
    while (1)
    {
         // lets compute the area for these segments
         if (loc1SegValStart<loc2SegValStart)
         {
-            curStartVal=loc2SegValStart;
             loc2Start=loc2SegStart;
             loc1Start=loc1SegStart+(loc1SegEnd-loc1SegStart)*(loc2SegValStart-loc1SegValStart)/(loc1SegValEnd-loc1SegValStart);
         }
         else
         {
-            curStartVal=loc1SegValStart;
             loc1Start=loc1SegStart;
             loc2Start=loc2SegStart+(loc2SegEnd-loc2SegStart)*(loc1SegValStart-loc2SegValStart)/(loc2SegValEnd-loc2SegValStart);
         }
@@ -112,7 +111,6 @@ double NetEmdSmooth(NumericVector loc1,NumericVector val1,double binWidth1,Numer
         {
             if (j==val2.size()-1)
             {
-            //    std::cout << " i hit the break\n";
                 break;
             }
             else
@@ -140,6 +138,7 @@ double NetEmdSmooth(NumericVector loc1,NumericVector val1,double binWidth1,Numer
            loc2SegValStart=loc2SegValEnd;
            loc2SegValEnd=val2[j];
         }
+        curStartVal=curEndVal;
    }
     return res;
 }

From 1dda134059fe0f62d53a5d2d7e3a91cd70633aff Mon Sep 17 00:00:00 2001
From: ande <ande.elliott@gmail.com>
Date: Sun, 3 Jun 2018 17:05:30 +0100
Subject: [PATCH 04/84] removed 1 if statement out of the loop

---
 src/fastSmooth.cpp | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/src/fastSmooth.cpp b/src/fastSmooth.cpp
index a0161147..a6f84274 100644
--- a/src/fastSmooth.cpp
+++ b/src/fastSmooth.cpp
@@ -56,19 +56,20 @@ double NetEmdSmooth(NumericVector loc1,NumericVector val1,double binWidth1,Numer
    res=0;
    int count123=0;
    curStartVal=0;
+   if (loc1SegValStart<loc2SegValStart)
+   {
+     loc2Start=loc2SegStart;
+     loc1Start=loc1SegStart+(loc1SegEnd-loc1SegStart)*(loc2SegValStart-loc1SegValStart)/(loc1SegValEnd-loc1SegValStart);
+   }
+   else
+   {
+     loc1Start=loc1SegStart;
+     loc2Start=loc2SegStart+(loc2SegEnd-loc2SegStart)*(loc1SegValStart-loc2SegValStart)/(loc2SegValEnd-loc2SegValStart);
+   }
+   
    while (1)
    {
         // lets compute the area for these segments
-        if (loc1SegValStart<loc2SegValStart)
-        {
-            loc2Start=loc2SegStart;
-            loc1Start=loc1SegStart+(loc1SegEnd-loc1SegStart)*(loc2SegValStart-loc1SegValStart)/(loc1SegValEnd-loc1SegValStart);
-        }
-        else
-        {
-            loc1Start=loc1SegStart;
-            loc2Start=loc2SegStart+(loc2SegEnd-loc2SegStart)*(loc1SegValStart-loc2SegValStart)/(loc2SegValEnd-loc2SegValStart);
-        }
         if (loc1SegValEnd<loc2SegValEnd)
         {
             curEndVal=loc1SegValEnd;
@@ -139,6 +140,8 @@ double NetEmdSmooth(NumericVector loc1,NumericVector val1,double binWidth1,Numer
            loc2SegValEnd=val2[j];
         }
         curStartVal=curEndVal;
+        loc2Start=loc2End;
+        loc1Start=loc1End;
    }
     return res;
 }

From 5ef89bd0ca2e355f27581765d82f1627c3e999af Mon Sep 17 00:00:00 2001
From: ande <ande.elliott@gmail.com>
Date: Tue, 5 Jun 2018 11:18:11 +0100
Subject: [PATCH 05/84] added speed test for smooth version

---
 R/net_emd_speed_benchmark.R | 38 +++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/R/net_emd_speed_benchmark.R b/R/net_emd_speed_benchmark.R
index 285e3e14..757811e8 100644
--- a/R/net_emd_speed_benchmark.R
+++ b/R/net_emd_speed_benchmark.R
@@ -1,3 +1,4 @@
+#' @export
 netEMDSpeedTest <- function()
 {
   ##load the data
@@ -32,4 +33,41 @@ netEMDSpeedTest <- function()
     }
   }
   list(gddBuildTime = gddBuildTime, netEMDtime = netEMDtime)
+}
+
+#' @export
+netEMDSpeedTestSmooth <- function()
+{
+  ##load the data
+  source_dir <- system.file(file.path("extdata", "random"), package = "netdist")
+  print(source_dir)
+  edge_format = "ncol"
+  file_pattern = ""
+  #    source_dir <- system.file(file.path("extdata", "VRPINS"), package = "netdist")
+  #    edge_format = "ncol"
+  #    file_pattern = ".txt"
+  graphs <- read_simple_graphs(source_dir = source_dir, format = edge_format, pattern = file_pattern)
+  n1=names(graphs)
+  lab1=c()
+  gddBuildTime=c()
+  netEMDtime=c()
+  for (i in 1:length(graphs))
+  {
+    for (j in 1:(i))
+    {
+      g1=graphs[[i]]
+      g2=graphs[[j]]
+      lab1=append(lab1,paste(n1[i],n1[j],sep=','))
+      print(paste(n1[i],n1[j],sep=','))
+      fulltimeStart=Sys.time()
+      gdd1=gdd(g1)
+      gdd2=gdd(g2)
+      netEMDStart=Sys.time()
+      net_emd(gdd1,gdd2,smoothing_window_width = 1)
+      endTime=Sys.time()
+      gddBuildTime=append(gddBuildTime,as.double(netEMDStart-fulltimeStart))
+      netEMDtime=append(netEMDtime,as.double(endTime-netEMDStart))
+    }
+  }
+  list(gddBuildTime = gddBuildTime, netEMDtime = netEMDtime)
 }
\ No newline at end of file

From 449609733a0e1e8fc387a3ee21983423d56a5bd4 Mon Sep 17 00:00:00 2001
From: ande <ande.elliott@gmail.com>
Date: Mon, 18 Jun 2018 13:14:41 +0100
Subject: [PATCH 06/84] removed some of the print messages

---
 R/emd.R | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/R/emd.R b/R/emd.R
index 678be30f..c83c05c0 100644
--- a/R/emd.R
+++ b/R/emd.R
@@ -42,9 +42,6 @@ min_emd <- function(dhist1, dhist2, method = "optimise") {
 #' @return Earth Mover's Distance between the two discrete histograms
 #' @export
 min_emd_optimise_fast <- function(dhist1, dhist2) {
-  print("there is me")
-  print(dhist1$smoothing_window_width)
-  print(dhist2$smoothing_window_width)
   # Can we run the C++ fast implementation (only works with no smoothing)?
   if ((dhist1$smoothing_window_width==0) && (dhist2$smoothing_window_width==0))
   {
@@ -85,7 +82,6 @@ min_emd_optimise_fast <- function(dhist1, dhist2) {
   }
   else #if ((dhist1$smoothing_window_width==1) && (dhist2$smoothing_window_width==1))
   {
-    print("hi im here")
     # Determine minimum and maximum offset of range in which histograms overlap
     # (based on sliding histogram 1)
     min_offset <- min(dhist2$locations) - max(dhist1$locations)

From 80cd6df8951662daae65334a8a3915655b56431b Mon Sep 17 00:00:00 2001
From: ande <ande.elliott@gmail.com>
Date: Mon, 18 Jun 2018 13:15:16 +0100
Subject: [PATCH 07/84] added the new methods to the namespace for exports

---
 NAMESPACE | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/NAMESPACE b/NAMESPACE
index 7031e634..56e0b0fa 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -45,6 +45,8 @@ export(min_emd)
 export(min_emd_exhaustive)
 export(min_emd_optimise)
 export(min_emd_optimise_fast)
+export(netEMDSpeedTest)
+export(netEMDSpeedTestSmooth)
 export(net_emd)
 export(net_emds_for_all_graphs)
 export(netdis)

From c0a28e8aa72072e9a3cea0089760c041b69346c9 Mon Sep 17 00:00:00 2001
From: ande <ande.elliott@gmail.com>
Date: Thu, 14 Nov 2019 09:30:56 +0000
Subject: [PATCH 08/84] commented and removing unneeded code

---
 src/fastSmooth.cpp | 62 ++++++++++++++++++++++++++++++++--------------
 1 file changed, 44 insertions(+), 18 deletions(-)

diff --git a/src/fastSmooth.cpp b/src/fastSmooth.cpp
index a6f84274..66ebe166 100644
--- a/src/fastSmooth.cpp
+++ b/src/fastSmooth.cpp
@@ -37,27 +37,36 @@ double NetEmdSmooth(NumericVector loc1,NumericVector val1,double binWidth1,Numer
    // current location on hist 1 and hist 2
    i=0;
    j=0;
-   double cdfLower=0;
-   double loc1SegStart=loc1[0];
-   double loc1SegEnd=loc1[0]+binWidth1;
-   double loc1SegValStart=0;
-   double loc1SegValEnd=val1[0];
+   // hist1 variables
+   double loc1SegStart=loc1[0];  //- start of a Segment in x
+   double loc1SegEnd=loc1[0]+binWidth1; //- end of a Segment in x
+   double loc1SegValStart=0; //- start of a Segment in y
+   double loc1SegValEnd=val1[0]; //- end of a Segment in y
+
+   // hist2 variables
    double loc2SegStart=loc2[0];
    double loc2SegEnd=loc2[0]+binWidth2;
    double loc2SegValStart=0;
    double loc2SegValEnd=val2[0];
-   double curStartVal;
-   double curEndVal;
-   double loc1Start;
-   double loc2Start;
-   double loc1End;
-   double loc2End;
+
+   double curStartVal; // start value in y 
+   double curEndVal; // end value in y 
+   double loc1Start; // start value in x hist1 
+   double loc2Start; // start value in x hist2
+   double loc1End; // end value in x hist1 
+   double loc2End; // end value in x hist2
    double h;
    res=0;
-   int count123=0;
+   // set as 0 as at bottom of hist
    curStartVal=0;
+
+   // need to know if first y segment ends with hist1 or hist2 
+   // Need to set the first start locations 
+   // Commented this section as they are both set to zero
+   /*
    if (loc1SegValStart<loc2SegValStart)
    {
+     
      loc2Start=loc2SegStart;
      loc1Start=loc1SegStart+(loc1SegEnd-loc1SegStart)*(loc2SegValStart-loc1SegValStart)/(loc1SegValEnd-loc1SegValStart);
    }
@@ -66,20 +75,27 @@ double NetEmdSmooth(NumericVector loc1,NumericVector val1,double binWidth1,Numer
      loc1Start=loc1SegStart;
      loc2Start=loc2SegStart+(loc2SegEnd-loc2SegStart)*(loc1SegValStart-loc2SegValStart)/(loc2SegValEnd-loc2SegValStart);
    }
+*/
    
+   loc1Start=loc1SegStart;
+   loc2Start=loc2SegStart;
    while (1)
    {
-        // lets compute the area for these segments
+        // lets compute the area for this segments
+        //
+        // Case where hist1  ends first 
         if (loc1SegValEnd<loc2SegValEnd)
         {
-            curEndVal=loc1SegValEnd;
-            loc1End=loc1SegEnd;
+            curEndVal=loc1SegValEnd; // end of current segment in y
+            loc1End=loc1SegEnd; // end of current segment in x in dhist1
+            // compute where end of current segment will be in x in dhist2 proportionally
             loc2End=loc2SegStart+(loc2SegEnd-loc2SegStart)*(loc1SegValEnd-loc2SegValStart)/(loc2SegValEnd-loc2SegValStart);
         }
-        else
+        else // hist2 comes first
         {
-            curEndVal=loc2SegValEnd;
-            loc2End=loc2SegEnd;
+            curEndVal=loc2SegValEnd; // end of current segment in y
+            loc2End=loc2SegEnd; // end of current segment in x in dhist2
+            // compute where end of current segment will be in x in dhist1 proportionally
             loc1End=loc1SegStart+(loc1SegEnd-loc1SegStart)*(loc2SegValEnd-loc1SegValStart)/(loc1SegValEnd-loc1SegValStart);
         }
         h=(curEndVal-curStartVal)/2.0;
@@ -88,6 +104,7 @@ double NetEmdSmooth(NumericVector loc1,NumericVector val1,double binWidth1,Numer
             //case1 they is no overlap
             if (loc1End<=loc2End)
             {
+		// difference between two right triangles
                 res+=(h)*(loc2Start+loc2End-loc1Start-loc1End);
             }
             else // we have a bowtie
@@ -100,6 +117,7 @@ double NetEmdSmooth(NumericVector loc1,NumericVector val1,double binWidth1,Numer
             //case1 they is no overlap
             if (loc2End<=loc1End)
             {
+		// difference between two right triangles
                 res+=(h)*(loc1Start+loc1End-loc2Start-loc2End);
             }
             else // we have a bowtie
@@ -112,10 +130,12 @@ double NetEmdSmooth(NumericVector loc1,NumericVector val1,double binWidth1,Numer
         {
             if (j==val2.size()-1)
             {
+		// we are at the end of both segments
                 break;
             }
             else
             {
+	       // at the end of dhist 1 so move dhist2
                j+=1;
                loc2SegStart=loc2[j];
                loc2SegEnd=loc2[j]+binWidth2;
@@ -125,6 +145,9 @@ double NetEmdSmooth(NumericVector loc1,NumericVector val1,double binWidth1,Numer
         }
         else if ((j==val2.size()-1) || (val1[i+1]<val2[j+1]))
         {
+	  // at the end of dhist 2 so move dhist1
+	  // OR
+	  // Next segment in dhist1 is smaller than the next segment in dhist2
           i+=1;
            loc1SegStart=loc1[i];
            loc1SegEnd=loc1[i]+binWidth1;
@@ -133,13 +156,16 @@ double NetEmdSmooth(NumericVector loc1,NumericVector val1,double binWidth1,Numer
         }
         else
         {
+	  // Next segment in dhist2 is smaller than the next segment in dhist1
            j+=1;
            loc2SegStart=loc2[j];
            loc2SegEnd=loc2[j]+binWidth2;
            loc2SegValStart=loc2SegValEnd;
            loc2SegValEnd=val2[j];
         }
+	// Move the start of the segment under consideration 
         curStartVal=curEndVal;
+	// Move the start of the x value under consideration 
         loc2Start=loc2End;
         loc1Start=loc1End;
    }

From 3e0948273faa6d56234c8d07d76963c959b1712f Mon Sep 17 00:00:00 2001
From: ande <ande.elliott@gmail.com>
Date: Thu, 14 Nov 2019 09:55:06 +0000
Subject: [PATCH 09/84] figuring out what is wrong

---
 R/RcppExports.R    | 2 +-
 src/fastSmooth.cpp | 4 +---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/R/RcppExports.R b/R/RcppExports.R
index 81fc83ae..1185ed09 100644
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -32,6 +32,6 @@ NULL
 #'
 #' @export
 NetEmdSmooth <- function(loc1, val1, binWidth1, loc2, val2, binWidth2) {
-    .Call('_netdist_NetEmdSmooth', PACKAGE = 'netdist', loc1, val1, binWidth1, loc2, val2, binWidth2)
+    .Call(`_netdist_NetEmdSmooth`, loc1, val1, binWidth1, loc2, val2, binWidth2)
 }
 
diff --git a/src/fastSmooth.cpp b/src/fastSmooth.cpp
index 66ebe166..5b5d99bf 100644
--- a/src/fastSmooth.cpp
+++ b/src/fastSmooth.cpp
@@ -63,7 +63,6 @@ double NetEmdSmooth(NumericVector loc1,NumericVector val1,double binWidth1,Numer
    // need to know if first y segment ends with hist1 or hist2 
    // Need to set the first start locations 
    // Commented this section as they are both set to zero
-   /*
    if (loc1SegValStart<loc2SegValStart)
    {
      
@@ -75,12 +74,11 @@ double NetEmdSmooth(NumericVector loc1,NumericVector val1,double binWidth1,Numer
      loc1Start=loc1SegStart;
      loc2Start=loc2SegStart+(loc2SegEnd-loc2SegStart)*(loc1SegValStart-loc2SegValStart)/(loc2SegValEnd-loc2SegValStart);
    }
-*/
-   
    loc1Start=loc1SegStart;
    loc2Start=loc2SegStart;
    while (1)
    {
+	std::cout << res << "\n"; 
         // lets compute the area for this segments
         //
         // Case where hist1  ends first 

From 87d00b71a92960cf42885641c364a70b67d396d2 Mon Sep 17 00:00:00 2001
From: ande <ande.elliott@gmail.com>
Date: Thu, 14 Nov 2019 10:18:49 +0000
Subject: [PATCH 10/84] removing changes to see if it helps

---
 src/fastSmooth.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/fastSmooth.cpp b/src/fastSmooth.cpp
index 5b5d99bf..b2d43847 100644
--- a/src/fastSmooth.cpp
+++ b/src/fastSmooth.cpp
@@ -74,11 +74,10 @@ double NetEmdSmooth(NumericVector loc1,NumericVector val1,double binWidth1,Numer
      loc1Start=loc1SegStart;
      loc2Start=loc2SegStart+(loc2SegEnd-loc2SegStart)*(loc1SegValStart-loc2SegValStart)/(loc2SegValEnd-loc2SegValStart);
    }
-   loc1Start=loc1SegStart;
-   loc2Start=loc2SegStart;
+   // loc1Start=loc1SegStart;
+   // loc2Start=loc2SegStart;
    while (1)
    {
-	std::cout << res << "\n"; 
         // lets compute the area for this segments
         //
         // Case where hist1  ends first 

From 761898d6be4a7ec2c60ea007fc783e833e226e9d Mon Sep 17 00:00:00 2001
From: ande <ande.elliott@gmail.com>
Date: Wed, 20 Nov 2019 14:34:44 +0000
Subject: [PATCH 11/84] Inital working version of fastEmdSmooth

Very inefficient (double for loop), but it works and can easily be refined
---
 src/fastSmoothV2.cpp          | 268 ++++++++++++++++++++++++++++++++++
 tests/testthat/test_fastEMD.R | 154 +++++++++++++++++++
 2 files changed, 422 insertions(+)
 create mode 100644 src/fastSmoothV2.cpp
 create mode 100644 tests/testthat/test_fastEMD.R

diff --git a/src/fastSmoothV2.cpp b/src/fastSmoothV2.cpp
new file mode 100644
index 00000000..c22d7d11
--- /dev/null
+++ b/src/fastSmoothV2.cpp
@@ -0,0 +1,268 @@
+// Enable C++11
+// [[Rcpp::plugins(cpp11)]]
+#include <Rcpp.h>
+#include <unordered_set>
+#include <algorithm>
+#include <numeric>
+#include <math.h>
+using namespace Rcpp;
+
+
+double get_segment(double start,double end,double val1_start,double val1_end,double val2_start,double val2_end)
+{
+  double length;
+  length = end - start;
+  double topTriangle;
+  double topRectangle;
+  double bottomTriangle;
+  double midPoint;
+  double midValue;
+  double res=0;
+  if (val1_start > val2_start) 
+  {
+    if (val1_end >= val2_end) 
+    {
+     // They are in the same order no bowtie
+     // top triangle  
+     std::cout << "\n       Path1";
+      topTriangle = 0.5*length*(val1_end-val1_start);
+      topRectangle = length*(val1_start-val2_start);
+      bottomTriangle = 0.5*length*(val2_end-val2_start); 
+      return topTriangle+topRectangle-bottomTriangle;
+    }
+    else
+    {
+        std::cout << "\n       Path2";
+        //bowtie
+        // lets make this really simple as the compiler 
+        // will combine the expressions as needed
+        midPoint = (val1_start-val2_start)/((val2_end-val2_start) - (val1_end-val1_start));
+        midValue = val1_start + midPoint*(val1_end-val1_start);
+        midPoint = midPoint*length;
+        std::cout << "\n       midPoint: " << midPoint << " midValue: " << midValue << "\n";
+        
+        topTriangle = 0.5*midPoint*(midValue-val1_start);
+        topRectangle = midPoint*(val1_start-val2_start);
+        bottomTriangle = 0.5*midPoint*(midValue-val2_start); 
+        
+        res = topTriangle+topRectangle-bottomTriangle;
+        
+        topTriangle = 0.5*(length-midPoint)*(val2_end-midValue);
+        topRectangle = 0; // midPoint*(val1_start-val2_start);
+        bottomTriangle = 0.5*(length - midPoint)*(val1_end - midValue); 
+        res += topTriangle+topRectangle-bottomTriangle;
+        return res;
+    }
+  }
+  else
+    {
+      if (val1_end > val2_end) 
+      {
+     std::cout << "\n       Path3";
+        //bowtie
+        midPoint = (val2_start-val1_start)/((val1_end-val1_start) - (val2_end-val2_start));
+        midValue = val2_start + midPoint*(val2_end-val2_start);
+        midPoint = midPoint*length;
+        std::cout << "\n       midPoint: " << midPoint << " midValue: " << midValue << "\n";
+        
+        topTriangle = 0.5*midPoint*(midValue-val2_start);
+        topRectangle = midPoint*(val2_start-val1_start);
+        bottomTriangle = 0.5*midPoint*(midValue-val1_start); 
+        
+        res = topTriangle+topRectangle-bottomTriangle;
+        
+        topTriangle = 0.5*(length-midPoint)*(val1_end-midValue);
+        topRectangle = 0; // midPoint*(val1_start-val2_start);
+        bottomTriangle = 0.5*(length - midPoint)*(val2_end - midValue); 
+        res += topTriangle+topRectangle-bottomTriangle;
+        return res;
+        
+      }
+      else // same order
+      {
+        std::cout << "\n       Path4";
+        topTriangle = 0.5*length*(val2_end-val2_start);
+        topRectangle = length*(val2_start-val1_start);
+        bottomTriangle = 0.5*length*(val1_end-val1_start); 
+        return topTriangle+topRectangle-bottomTriangle;
+      }
+    }
+}
+
+
+//' @title
+//' Compute EMD
+////'
+////' @param loc1 numeric vector.
+////' @param val1 numeric vector.
+////' @param loc2 numeric vector.
+////' @param val2 numeric vector.
+//'
+//' @export
+// [[Rcpp::export]]
+double NetEmdSmoothV2(NumericVector loc1,NumericVector val1,double binWidth1,NumericVector loc2,NumericVector val2,double binWidth2)
+{
+  double loc1_start, loc1_end, loc2_start, loc2_end;
+  double val1_start, val1_end, val2_start, val2_end;
+  int index1,index2;
+  
+  // Index in both lists
+  index1 = 0;
+  index2 = 0;
+  
+  
+  // Loc1 data
+  loc1_start = loc1[0];
+  loc1_end = loc1[0];
+  val1_start = 0;
+  val1_end   = 0; 
+  
+  // Loc2 data
+  loc2_start = loc2[0];
+  loc2_end   = loc2[0];
+  val2_start = 0;
+  val2_end   = 0; 
+  
+  double endX;
+  endX = std::min(loc2_end,loc1_end); 
+  // while statement
+  double res=0;
+  
+  // Hist 1
+  double curSeg1Loc1; 
+  double curSeg1Loc2; 
+  double curSeg1Val1; 
+  double curSeg1Val2; 
+  
+  // Hist 1
+  double curSeg2Loc1; 
+  double curSeg2Loc2; 
+  double curSeg2Val1; 
+  double curSeg2Val2; 
+  
+  // need to iterate through regions of constant gradient
+  double h1,h2;
+  h1=0;
+  h2=0;
+  int i123,j123;
+  double tempStart;
+  double tempEnd;
+  double valStart1;
+  double valEnd1;
+  double valStart2;
+  double valEnd2;
+  double tempDouble;
+  for (index1=-1;index1<loc1.size();index1++) 
+  {
+    for (index2=-1;index2<loc2.size();index2++) 
+    {
+              for (i123=0;i123<2;i123++) 
+              {
+              if (index1==-1)
+              {
+                  if (i123==1)
+                  {continue;}
+                          curSeg1Loc1=std::min(loc1[0],loc2[0]); 
+                          curSeg1Loc2=loc1[0]; 
+                          curSeg1Val1=0;
+                          curSeg1Val2=0;
+              }
+              else
+              {
+                  if (i123==0)
+                  {
+                    curSeg1Loc1=loc1[index1]; 
+                    curSeg1Loc2=loc1[index1]+binWidth1; 
+                    if (index1==0)
+                      {curSeg1Val1=0;}
+                    else
+                      {curSeg1Val1=val1[index1-1];}
+                    curSeg1Val2=val1[index1]; 
+                  }
+                  else
+                  {
+                    curSeg1Loc1=loc1[index1]+binWidth1; 
+                    if (index1==loc1.size()-1)
+                      {curSeg1Loc2=std::max(loc1[loc1.size()-1] +binWidth1,loc2[loc2.size()-1]+binWidth2 ); }
+                    else
+                      {curSeg1Loc2=loc1[index1+1];}
+                    curSeg1Val1=val1[index1]; 
+                    curSeg1Val2=val1[index1]; 
+                  }
+              }
+        
+        
+        for (j123=0;j123<2;j123++) 
+        {
+            if (index2==-1)
+            {
+              if (j123==1)
+              {continue;}
+                        curSeg2Loc1=std::min(loc1[0],loc2[0]); 
+                        curSeg2Loc2=loc2[0]; 
+                        curSeg2Val1=0;
+                        curSeg2Val2=0;
+            }
+            else
+            {
+          
+              if (j123==0)
+              {
+                curSeg2Loc1=loc2[index2]; 
+                curSeg2Loc2=loc2[index2]+binWidth2; 
+                if (index2==0)
+                  {curSeg2Val1=0;}
+                else
+                  {curSeg2Val1=val2[index2-1];}
+                curSeg2Val2=val2[index2]; 
+              }
+              else
+              {
+                curSeg2Loc1=loc2[index2]+binWidth2; 
+                if (index2==loc2.size()-1)
+                  {curSeg2Loc2=std::max(loc1[loc1.size()-1] +binWidth1,loc2[loc2.size()-1]+binWidth2 ); }
+                else
+                  {curSeg2Loc2=loc2[index2+1];}
+                curSeg2Val1=val2[index2]; 
+                curSeg2Val2=val2[index2]; 
+              }
+          }
+          
+          tempStart = std::max(curSeg1Loc1,curSeg2Loc1); 
+          tempEnd   = std::min(curSeg1Loc2,curSeg2Loc2); 
+          if (tempStart<tempEnd)
+          {
+            //We have a valid range
+          // double get_segment(double start,double end,double val1_start,double val1_end,double val2_start,double val2_end)
+          
+            valStart1 = curSeg1Val1 + (curSeg1Val2-curSeg1Val1)*(tempStart - curSeg1Loc1)/(curSeg1Loc2 - curSeg1Loc1);
+            valEnd1   = curSeg1Val1 + (curSeg1Val2-curSeg1Val1)*(tempEnd   - curSeg1Loc1)/(curSeg1Loc2 - curSeg1Loc1);
+            valStart2 = curSeg2Val1 + (curSeg2Val2-curSeg2Val1)*(tempStart - curSeg2Loc1)/(curSeg2Loc2 - curSeg2Loc1);
+            valEnd2   = curSeg2Val1 + (curSeg2Val2-curSeg2Val1)*(tempEnd   - curSeg2Loc1)/(curSeg2Loc2 - curSeg2Loc1);
+            std::cout << "\nSeg1 : " << curSeg1Loc1 << ", " << curSeg1Loc2 << "";
+            std::cout << "\n    Seg2 : " << curSeg2Loc1 << ", " << curSeg2Loc2 << "";
+            tempDouble = get_segment(tempStart,tempEnd,valStart1,valEnd1,valStart2,valEnd2);
+            std::cout << "\n        curSeg1Val1: " << curSeg1Val1 <<" ";
+            std::cout << "\n        curSeg1Val2: " << curSeg1Val2 <<" ";
+            std::cout << "\n        curSeg2Val1: " << curSeg2Val1 <<" ";
+            std::cout << "\n        curSeg2Val2: " << curSeg2Val2 <<" ";
+            std::cout << "\n        valEnd1:     " << valEnd1 <<" ";
+            std::cout << "\n        tempStart:   " << tempStart <<" ";
+            std::cout << "\n        tempEnd:     " << tempEnd <<" ";
+            std::cout << "\n        valStart1:   " << valStart1 <<" ";
+            std::cout << "\n        valEnd1:     " << valEnd1 <<" ";
+            std::cout << "\n        valStart2:   " << valStart2 <<" ";
+            std::cout << "\n        valEnd2:     " << valEnd2 <<" ";
+            std::cout << "\n        res: " << tempDouble <<" \n";
+            res += tempDouble;  
+          }
+//          else
+//          {
+//            std::cout << "\n        res: " << 0 <<" \n";
+//          }
+        }
+      }
+    }
+  }
+  return res;
+}
diff --git a/tests/testthat/test_fastEMD.R b/tests/testthat/test_fastEMD.R
new file mode 100644
index 00000000..6d024356
--- /dev/null
+++ b/tests/testthat/test_fastEMD.R
@@ -0,0 +1,154 @@
+context("Name of test context")
+
+library(Rcpp)
+makeFunction <- function(x1,v1,w1,x2,v2,w2)
+{
+  f1 <- function(x)
+  {
+    res1 = rep(0,length(x))
+    m1 = x1[1]<x
+    m2 = x<=(x1[1]+w1)
+    m3 = m1*m2
+    res1 = res1 + m3*(x-x1[1])*(v1[1]-0)/w1
+    
+    m1 = (x1[1]+w1)<x
+    m2 = x<=(x1[2])
+    m3 = m1*m2
+    res1 = res1 + m3*v1[1] 
+    
+    for (i in 2:length(x1))
+    {
+      m1 = x1[i]<x
+      m2 = x<=(x1[i]+w1)
+      m3 = m1*m2
+      res1 = res1 + m3*((x-x1[i])*(v1[i]-v1[i-1])/w1+v1[i-1])
+      if (i<length(x1)) 
+      {
+        m1 = (x1[i]+w1)<x
+        m2 = x<=(x1[i+1])
+        m3 = m1*m2
+        res1 = res1 + m3*v1[i]
+      }
+
+    }
+    m1 = x>(x1[length(v1)]+w1)
+    res1 = res1 + m1*v1[length(v1)]
+
+    
+    
+    res2= rep(0,length(x))
+    m1 = x2[1]<x
+    m2 = x<=(x2[1]+w2)
+    m3 = m1*m2
+    res2 = m3*(x-x2[1])*(v2[1]-0)/w2
+    
+    
+    m1 = (x2[1]+w2)<x
+    m2 = x<=(x2[2])
+    m3 = m1*m2
+    res2 = res2 + m3*v2[1] 
+    
+    for (i in 2:length(x2))
+    {
+      m1 = x2[i]<x
+      m2 = x<=(x2[i]+w2)
+      m3 = m1*m2
+      res2 = res2 + m3*((x-x2[i])*(v2[i]-v2[i-1])/w2+v2[i-1])
+      if (i<length(x2)) 
+      {
+        m1 = (x2[i]+w2)<x
+        m2 = x<=(x2[i+1])
+        m3 = m1*m2
+        res2 = res2 + m3*v2[i]
+      }
+    }
+    m1 = x>(w2+x2[length(v2)])
+    res2 = res2 + m1*v2[length(v2)]
+    
+    abs(res1-res2)
+  }
+  f1
+}
+
+test_that("3 element test", {
+  
+            sourceCpp("~/Documents/network-comparison/src/fastSmoothV2.cpp")
+            for (w1 in (1:10)/10.0)
+            {
+              for (w2 in (1:10)/10.0)
+              {
+                x1 <- c(1,2,3)
+                v1 <- c(0.25,0.70,1.00)
+                x2 <- c(1,2,3)
+                v2 <- c(0.25,0.70,1.00)
+                print(x1)
+                f1 <- makeFunction(x1,v1,w1,x2,v2,w2)
+                res2 <- integrate(f1,0,max(x2[3],x1[3])+max(w1,w2),abs.tol=0.000000001)[[1]]
+                
+                res1 <- NetEmdSmoothV2(x1,v1,w1,x2,v2,w2)
+                
+                expect_lt(abs(res1-res2),10**(-3))
+              }
+            }
+            })
+
+test_that("2 element test w1=0.1, w2=0.2", {
+  
+  sourceCpp("~/Documents/network-comparison/src/fastSmoothV2.cpp")
+  w1 = 0.1
+  w2 = 0.2
+  x1 <- c(1,2)
+  v1 <- c(0.25,0.75)
+  x2 <- c(1,2)
+  v2 <- c(0.5,1.00)
+  print(x1)
+  f1 <- makeFunction(x1,v1,w1,x2,v2,w2)
+  res2 <- integrate(f1,0,max(x2[3],x1[3])+max(w1,w2),abs.tol=0.000000001)[[1]]
+  res1 <- NetEmdSmoothV2(x1,v1,w1,x2,v2,w2)
+  
+  expect_lt(abs(res1-res2),10**(-4))
+})
+
+
+
+test_that("3 element test Mixture", {
+  
+            sourceCpp("~/Documents/network-comparison/src/fastSmoothV2.cpp")
+            for (w1 in (1:10)/10.0)
+            {
+              for (w2 in (1:10)/10.0)
+              {
+                x1 <- c(1,2,3)
+                v1 <- c(0.65,0.70,1.00)
+                x2 <- c(1,2,3)
+                v2 <- c(0.25,0.70,1.00)
+                print(x1)
+                f1 <- makeFunction(x1,v1,w1,x2,v2,w2)
+                res2 <- integrate(f1,0,max(x2[3],x1[3])+max(w1,w2),abs.tol=0.000000001)[[1]]
+                
+                res1 <- NetEmdSmoothV2(x1,v1,w1,x2,v2,w2)
+                
+                expect_lt(abs(res1-res2),10**(-3))
+              }
+            }
+            })
+
+
+
+
+test_that("3 element test w1=0.1, w2=0.2", {
+  
+            sourceCpp("~/Documents/network-comparison/src/fastSmoothV2.cpp")
+  w1 = 0.1
+  w2 = 0.2
+              x1 <- c(1,2,3)
+              v1 <- c(0.25,0.70,1.00)
+              x2 <- c(1,2,3)
+              v2 <- c(0.25,0.70,1.00)
+              print(x1)
+              f1 <- makeFunction(x1,v1,w1,x2,v2,w2)
+              res2 <- integrate(f1,0,max(x2[3],x1[3])+max(w1,w2),abs.tol=0.000000001)[[1]]
+              res1 <- NetEmdSmoothV2(x1,v1,w1,x2,v2,w2)
+              
+              expect_lt(abs(res1-res2),10**(-4))
+            })

From b7a667436517b93b564dda0db84146626396e857 Mon Sep 17 00:00:00 2001
From: ande <ande.elliott@gmail.com>
Date: Wed, 20 Nov 2019 14:42:37 +0000
Subject: [PATCH 12/84] fixed small bug

---
 tests/testthat/test_fastEMD.R | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/testthat/test_fastEMD.R b/tests/testthat/test_fastEMD.R
index 6d024356..9c5764cd 100644
--- a/tests/testthat/test_fastEMD.R
+++ b/tests/testthat/test_fastEMD.R
@@ -113,7 +113,6 @@ test_that("2 element test w1=0.1, w2=0.2", {
 
 test_that("3 element test Mixture", {
   
-            sourceCpp("~/Documents/network-comparison/src/fastSmoothV2.cpp")
             for (w1 in (1:10)/10.0)
             {
               for (w2 in (1:10)/10.0)
@@ -138,7 +137,6 @@ test_that("3 element test Mixture", {
 
 test_that("3 element test w1=0.1, w2=0.2", {
   
-            sourceCpp("~/Documents/network-comparison/src/fastSmoothV2.cpp")
   w1 = 0.1
   w2 = 0.2
               x1 <- c(1,2,3)

From 6b64eb2d7fe8f9e4c96e73acbb5e1f678135432a Mon Sep 17 00:00:00 2001
From: ande <ande.elliott@gmail.com>
Date: Wed, 20 Nov 2019 14:50:07 +0000
Subject: [PATCH 13/84] missed a few internal paths used for testing

---
 tests/testthat/test_fastEMD.R | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/testthat/test_fastEMD.R b/tests/testthat/test_fastEMD.R
index 9c5764cd..3e17928c 100644
--- a/tests/testthat/test_fastEMD.R
+++ b/tests/testthat/test_fastEMD.R
@@ -72,7 +72,6 @@ makeFunction <- function(x1,v1,w1,x2,v2,w2)
 
 test_that("3 element test", {
   
-            sourceCpp("~/Documents/network-comparison/src/fastSmoothV2.cpp")
             for (w1 in (1:10)/10.0)
             {
               for (w2 in (1:10)/10.0)
@@ -94,7 +93,6 @@ test_that("3 element test", {
 
 test_that("2 element test w1=0.1, w2=0.2", {
   
-  sourceCpp("~/Documents/network-comparison/src/fastSmoothV2.cpp")
   w1 = 0.1
   w2 = 0.2
   x1 <- c(1,2)

From 40dddeb9a166245074e1eebf6d6a798cda72d476 Mon Sep 17 00:00:00 2001
From: ande <ande.elliott@gmail.com>
Date: Wed, 20 Nov 2019 15:05:42 +0000
Subject: [PATCH 14/84] adding the V2 to namespace

---
 NAMESPACE | 1 +
 1 file changed, 1 insertion(+)

diff --git a/NAMESPACE b/NAMESPACE
index 7e023e49..24fa6913 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,5 +1,6 @@
 # Generated by roxygen2: do not edit by hand
 
+export(NetEmdSmoothV2)
 export(NetEmdSmooth)
 export(adaptive_breaks)
 export(area_between_dhist_ecmfs)

From b19b6fe2bcc478e49336ba5efb837d07b9908944 Mon Sep 17 00:00:00 2001
From: ande <ande.elliott@gmail.com>
Date: Wed, 20 Nov 2019 15:11:52 +0000
Subject: [PATCH 15/84] updated versions of wrapper files

---
 R/RcppExports.R     | 10 ++++++++++
 src/RcppExports.cpp | 17 +++++++++++++++++
 2 files changed, 27 insertions(+)

diff --git a/R/RcppExports.R b/R/RcppExports.R
index 1185ed09..e718b542 100644
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -35,3 +35,13 @@ NetEmdSmooth <- function(loc1, val1, binWidth1, loc2, val2, binWidth2) {
     .Call(`_netdist_NetEmdSmooth`, loc1, val1, binWidth1, loc2, val2, binWidth2)
 }
 
+#' @title
+#' Compute EMD
+NULL
+
+#'
+#' @export
+NetEmdSmoothV2 <- function(loc1, val1, binWidth1, loc2, val2, binWidth2) {
+    .Call(`_netdist_NetEmdSmoothV2`, loc1, val1, binWidth1, loc2, val2, binWidth2)
+}
+
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
index 2f77e0ea..99086611 100644
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -46,6 +46,22 @@ BEGIN_RCPP
     return rcpp_result_gen;
 END_RCPP
 }
+// NetEmdSmoothV2
+double NetEmdSmoothV2(NumericVector loc1, NumericVector val1, double binWidth1, NumericVector loc2, NumericVector val2, double binWidth2);
+RcppExport SEXP _netdist_NetEmdSmoothV2(SEXP loc1SEXP, SEXP val1SEXP, SEXP binWidth1SEXP, SEXP loc2SEXP, SEXP val2SEXP, SEXP binWidth2SEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< NumericVector >::type loc1(loc1SEXP);
+    Rcpp::traits::input_parameter< NumericVector >::type val1(val1SEXP);
+    Rcpp::traits::input_parameter< double >::type binWidth1(binWidth1SEXP);
+    Rcpp::traits::input_parameter< NumericVector >::type loc2(loc2SEXP);
+    Rcpp::traits::input_parameter< NumericVector >::type val2(val2SEXP);
+    Rcpp::traits::input_parameter< double >::type binWidth2(binWidth2SEXP);
+    rcpp_result_gen = Rcpp::wrap(NetEmdSmoothV2(loc1, val1, binWidth1, loc2, val2, binWidth2));
+    return rcpp_result_gen;
+END_RCPP
+}
 
 RcppExport SEXP run_testthat_tests();
 
@@ -53,6 +69,7 @@ static const R_CallMethodDef CallEntries[] = {
     {"_netdist_counts_from_observations", (DL_FUNC) &_netdist_counts_from_observations, 1},
     {"_netdist_emd_fast_no_smoothing", (DL_FUNC) &_netdist_emd_fast_no_smoothing, 4},
     {"_netdist_NetEmdSmooth", (DL_FUNC) &_netdist_NetEmdSmooth, 6},
+    {"_netdist_NetEmdSmoothV2", (DL_FUNC) &_netdist_NetEmdSmoothV2, 6},
     {"run_testthat_tests",                (DL_FUNC) &run_testthat_tests,                0},
     {NULL, NULL, 0}
 };

From 15ee86ba11606daef02ab76ac2eff0657917a5bd Mon Sep 17 00:00:00 2001
From: ande <ande.elliott@gmail.com>
Date: Wed, 20 Nov 2019 23:03:39 +0000
Subject: [PATCH 16/84] stable version

---
 src/fastSmoothV2.cpp          |   6 +-
 tests/testthat/test_fastEMD.R | 122 ++++++++++++++++++++++++++++++++--
 2 files changed, 123 insertions(+), 5 deletions(-)

diff --git a/src/fastSmoothV2.cpp b/src/fastSmoothV2.cpp
index c22d7d11..9fe8a84f 100644
--- a/src/fastSmoothV2.cpp
+++ b/src/fastSmoothV2.cpp
@@ -152,10 +152,13 @@ double NetEmdSmoothV2(NumericVector loc1,NumericVector val1,double binWidth1,Num
   double valStart2;
   double valEnd2;
   double tempDouble;
+  int secondStart=-1;
+  int h;
   for (index1=-1;index1<loc1.size();index1++) 
   {
-    for (index2=-1;index2<loc2.size();index2++) 
+    for (index2=secondStart;index2<loc2.size();index2++) 
     {
+              h=0;
               for (i123=0;i123<2;i123++) 
               {
               if (index1==-1)
@@ -255,6 +258,7 @@ double NetEmdSmoothV2(NumericVector loc1,NumericVector val1,double binWidth1,Num
             std::cout << "\n        valEnd2:     " << valEnd2 <<" ";
             std::cout << "\n        res: " << tempDouble <<" \n";
             res += tempDouble;  
+            h=1;
           }
 //          else
 //          {
diff --git a/tests/testthat/test_fastEMD.R b/tests/testthat/test_fastEMD.R
index 3e17928c..ef38b096 100644
--- a/tests/testthat/test_fastEMD.R
+++ b/tests/testthat/test_fastEMD.R
@@ -72,6 +72,7 @@ makeFunction <- function(x1,v1,w1,x2,v2,w2)
 
 test_that("3 element test", {
   
+            sourceCpp("~/Documents/network-comparison/src/fastSmoothV2.cpp")
             for (w1 in (1:10)/10.0)
             {
               for (w2 in (1:10)/10.0)
@@ -93,6 +94,7 @@ test_that("3 element test", {
 
 test_that("2 element test w1=0.1, w2=0.2", {
   
+  sourceCpp("~/Documents/network-comparison/src/fastSmoothV2.cpp")
   w1 = 0.1
   w2 = 0.2
   x1 <- c(1,2)
@@ -101,7 +103,7 @@ test_that("2 element test w1=0.1, w2=0.2", {
   v2 <- c(0.5,1.00)
   print(x1)
   f1 <- makeFunction(x1,v1,w1,x2,v2,w2)
-  res2 <- integrate(f1,0,max(x2[3],x1[3])+max(w1,w2),abs.tol=0.000000001)[[1]]
+  res2 <- integrate(f1,0,max(x2[2],x1[2])+max(w1,w2),abs.tol=0.000000001)[[1]]
   res1 <- NetEmdSmoothV2(x1,v1,w1,x2,v2,w2)
   
   expect_lt(abs(res1-res2),10**(-4))
@@ -109,8 +111,8 @@ test_that("2 element test w1=0.1, w2=0.2", {
 
 
 
-test_that("3 element test Mixture", {
-  
+test_that("3 element test Mixture", { 
+            sourceCpp("~/Documents/network-comparison/src/fastSmoothV2.cpp")
             for (w1 in (1:10)/10.0)
             {
               for (w2 in (1:10)/10.0)
@@ -130,11 +132,123 @@ test_that("3 element test Mixture", {
             }
             })
 
+test_that("3 element test Mixture MidPoint", { 
+            sourceCpp("~/Documents/network-comparison/src/fastSmoothV2.cpp")
+            w1 = 1
+            w2 = 1
+            for (v1_2 in (1:10)/10.0)
+            {
+              for (v2_2 in (1:10)/10.0)
+              {
+                x1 <- c(1,2,3)
+                v1 <- c(0.1,v1_2,1.00)
+                x2 <- c(1,2,3)
+                v2 <- c(0.1,v2_2,1.00)
+                print(x1)
+                f1 <- makeFunction(x1,v1,w1,x2,v2,w2)
+                res2 <- integrate(f1,0,max(x2[3],x1[3])+max(w1,w2),abs.tol=0.000000001)[[1]]
+                
+                res1 <- NetEmdSmoothV2(x1,v1,w1,x2,v2,w2)
+                
+                expect_lt(abs(res1-res2),10**(-3))
+              }
+            }
+            })
+
 
+test_that("3 element test Mixture StartPoint", { 
+            sourceCpp("~/Documents/network-comparison/src/fastSmoothV2.cpp")
+            w1 = 1
+            w2 = 1
+            for (v1_1 in (1:5)/10.0)
+            {
+              for (v2_1 in (1:5)/10.0)
+              {
+                x1 <- c(1,2,3)
+                v1 <- c(v1_1,0.5,1.00)
+                x2 <- c(1,2,3)
+                v2 <- c(v2_1,0.5,1.00)
+                print(x1)
+                f1 <- makeFunction(x1,v1,w1,x2,v2,w2)
+                res2 <- integrate(f1,0,max(x2[3],x1[3])+max(w1,w2),abs.tol=0.000000001)[[1]]
+                
+                res1 <- NetEmdSmoothV2(x1,v1,w1,x2,v2,w2)
+                
+                expect_lt(abs(res1-res2),10**(-3))
+              }
+            }
+            })
+
+
+test_that("3 element test Mixture StartLoc", { 
+            sourceCpp("~/Documents/network-comparison/src/fastSmoothV2.cpp")
+            w1 = 1
+            w2 = 1
+            for (x1_1 in (1:9)/10.0)
+            {
+              for (x2_1 in (1:9)/10.0)
+              {
+                x1 <- c(x1_1,2,3)
+                v1 <- c(0.25,0.5,1.00)
+                x2 <- c(x2_1,2,4)
+                v2 <- c(0.3,0.5,1.00)
+                print(x1)
+                f1 <- makeFunction(x1,v1,w1,x2,v2,w2)
+                res2 <- integrate(f1,0,max(x2[3],x1[3])+max(w1,w2),abs.tol=0.000000001)[[1]]
+                
+                res1 <- NetEmdSmoothV2(x1,v1,w1,x2,v2,w2)
+                
+                expect_lt(abs(res1-res2),10**(-3))
+              }
+            }
+            })
+
+
+test_that("many element test Mixture ", { 
+            sourceCpp("~/Documents/network-comparison/src/fastSmoothV2.cpp")
+            w1 = 1
+            w2 = 1
+            for (i in (2:10)*1)
+            {
+              for (j in (2:10)*1)
+              {
+                for (x123 in (1:2))
+                {
+                  for (y123 in (1:2))
+                  {
+                    x1 <- cumsum(abs(rnorm(i)))
+                    v1 <- cumsum(abs(rnorm(i)))
+                    w1 <- min(diff(x1))/x123
+                    v1 = v1/v1[length(v1)]
+                    x2 <- cumsum(abs(rnorm(j)))
+                    w2 <- min(diff(x2))/y123
+                    v2 <- cumsum(abs(rnorm(j)))
+                    v2 = v2/v2[length(v2)]
+                    print(x1)
+                    f1 <- makeFunction(x1,v1,w1,x2,v2,w2)
+                    top1 = max(x2[length(x2)],x1[length(x1)])+max(w1,w2)
+                    bottom1 = min(x2[1],x1[1])
+                    res2 <- 0 
+                    res2 <- res2 + integrate(f1,bottom1,top1,abs.tol=0.000000001,subdivisions = 100000000)[[1]]
+                    
+                    res1 <- NetEmdSmoothV2(x1,v1,w1,x2,v2,w2)
+                    if (abs(res1-res2)>0.001)
+                    {
+                      browser()
+                    }
+                    # Swapped to percentage error
+                    expect_lt(abs(res1-res2),10**(-3))
+                    
+                  }
+                }
+              }
+            }
+            })
 
 
 test_that("3 element test w1=0.1, w2=0.2", {
   
+           sourceCpp("~/Documents/network-comparison/src/fastSmoothV2.cpp")
   w1 = 0.1
   w2 = 0.2
               x1 <- c(1,2,3)
@@ -143,7 +257,7 @@ test_that("3 element test w1=0.1, w2=0.2", {
               v2 <- c(0.25,0.70,1.00)
               print(x1)
               f1 <- makeFunction(x1,v1,w1,x2,v2,w2)
-              res2 <- integrate(f1,0,max(x2[3],x1[3])+max(w1,w2),abs.tol=0.000000001)[[1]]
+              res2 <- integrate(f1,0,max(x2[3],x1[3])+max(w1,w2),abs.tol=0.0000000001)[[1]]
               res1 <- NetEmdSmoothV2(x1,v1,w1,x2,v2,w2)
               
               expect_lt(abs(res1-res2),10**(-4))

From d959a79c94dd9f33ef05d854923b3b0d5a12f917 Mon Sep 17 00:00:00 2001
From: ande <ande.elliott@gmail.com>
Date: Thu, 21 Nov 2019 11:58:29 +0000
Subject: [PATCH 17/84] updated iupdated

---
 src/fastSmoothV2.cpp | 187 ++++++++++++++++++++++++++++---------------
 1 file changed, 123 insertions(+), 64 deletions(-)

diff --git a/src/fastSmoothV2.cpp b/src/fastSmoothV2.cpp
index 9fe8a84f..788a3458 100644
--- a/src/fastSmoothV2.cpp
+++ b/src/fastSmoothV2.cpp
@@ -24,7 +24,7 @@ double get_segment(double start,double end,double val1_start,double val1_end,dou
     {
      // They are in the same order no bowtie
      // top triangle  
-     std::cout << "\n       Path1";
+//     std::cout << "\n       Path1";
       topTriangle = 0.5*length*(val1_end-val1_start);
       topRectangle = length*(val1_start-val2_start);
       bottomTriangle = 0.5*length*(val2_end-val2_start); 
@@ -32,14 +32,14 @@ double get_segment(double start,double end,double val1_start,double val1_end,dou
     }
     else
     {
-        std::cout << "\n       Path2";
+//        std::cout << "\n       Path2";
         //bowtie
         // lets make this really simple as the compiler 
         // will combine the expressions as needed
         midPoint = (val1_start-val2_start)/((val2_end-val2_start) - (val1_end-val1_start));
         midValue = val1_start + midPoint*(val1_end-val1_start);
         midPoint = midPoint*length;
-        std::cout << "\n       midPoint: " << midPoint << " midValue: " << midValue << "\n";
+//        std::cout << "\n       midPoint: " << midPoint << " midValue: " << midValue << "\n";
         
         topTriangle = 0.5*midPoint*(midValue-val1_start);
         topRectangle = midPoint*(val1_start-val2_start);
@@ -58,12 +58,12 @@ double get_segment(double start,double end,double val1_start,double val1_end,dou
     {
       if (val1_end > val2_end) 
       {
-     std::cout << "\n       Path3";
+//     std::cout << "\n       Path3";
         //bowtie
         midPoint = (val2_start-val1_start)/((val1_end-val1_start) - (val2_end-val2_start));
         midValue = val2_start + midPoint*(val2_end-val2_start);
         midPoint = midPoint*length;
-        std::cout << "\n       midPoint: " << midPoint << " midValue: " << midValue << "\n";
+//        std::cout << "\n       midPoint: " << midPoint << " midValue: " << midValue << "\n";
         
         topTriangle = 0.5*midPoint*(midValue-val2_start);
         topRectangle = midPoint*(val2_start-val1_start);
@@ -80,7 +80,7 @@ double get_segment(double start,double end,double val1_start,double val1_end,dou
       }
       else // same order
       {
-        std::cout << "\n       Path4";
+//        std::cout << "\n       Path4";
         topTriangle = 0.5*length*(val2_end-val2_start);
         topRectangle = length*(val2_start-val1_start);
         bottomTriangle = 0.5*length*(val1_end-val1_start); 
@@ -154,61 +154,134 @@ double NetEmdSmoothV2(NumericVector loc1,NumericVector val1,double binWidth1,Num
   double tempDouble;
   int secondStart=-1;
   int h;
+  
+auto start = std::chrono::high_resolution_clock::now();
+  std::chrono::duration<double> t1=start-start; 
+int count=0; 
+double avCount=0;
   for (index1=-1;index1<loc1.size();index1++) 
   {
-    for (index2=secondStart;index2<loc2.size();index2++) 
-    {
               h=0;
               for (i123=0;i123<2;i123++) 
               {
-              if (index1==-1)
+                if (index1==-1)
+                {
+                    if (i123==1)
+                    {continue;}
+                    curSeg1Loc1=std::min(loc1[0],loc2[0]); 
+                    curSeg1Loc2=loc1[0]; 
+                    curSeg1Val1=0;
+                    curSeg1Val2=0;
+                }
+                else
+                {
+                    if (i123==0)
+                    {
+                      curSeg1Loc1=loc1[index1]; 
+                      curSeg1Loc2=loc1[index1]+binWidth1; 
+                      if (index1==0)
+                        {curSeg1Val1=0;}
+                      else
+                        {curSeg1Val1=val1[index1-1];}
+                    }
+                    else
+                    {
+                      curSeg1Loc1=loc1[index1]+binWidth1; 
+                      if (index1==loc1.size()-1)
+                        {curSeg1Loc2=std::max(loc1[loc1.size()-1] +binWidth1,loc2[loc2.size()-1]+binWidth2 ); }
+                      else
+                        {curSeg1Loc2=loc1[index1+1];}
+                      curSeg1Val1=val1[index1]; 
+                    }
+                    curSeg1Val2=val1[index1]; 
+                 }
+                if (curSeg1Loc1==curSeg1Loc2)
+                {continue;}
+    h=0;
+    avCount+=count/loc1.size();
+//    std::cout << count << ", ";
+    count =0;
+    for (index2=secondStart;index2<loc2.size();index2++) 
+    {
+      count+=1;
+      if (index2>0)
+      {
+        if (index2<loc2.size()-2)
+        {
+        if (loc2[index2+2]+binWidth2<curSeg1Loc1)
+        {
+          secondStart=index2;
+          continue;
+          }
+        }
+      }
+        for (j123=0;j123<1;j123++) 
+        {
+            if (index2==-1)
+            {
+              if (j123==1)
+                {continue;}
+              curSeg2Loc1=std::min(loc1[0],loc2[0]); 
+              curSeg2Loc2=loc2[0]; 
+              curSeg2Val1=0;
+              curSeg2Val2=0;
+            }
+            else
+            {
+              if (j123==0)
               {
-                  if (i123==1)
-                  {continue;}
-                          curSeg1Loc1=std::min(loc1[0],loc2[0]); 
-                          curSeg1Loc2=loc1[0]; 
-                          curSeg1Val1=0;
-                          curSeg1Val2=0;
+                curSeg2Loc1=loc2[index2]; 
+                curSeg2Loc2=loc2[index2]+binWidth2; 
+                if (index2==0)
+                  {curSeg2Val1=0;}
+                else
+                  {curSeg2Val1=val2[index2-1];}
+                curSeg2Val2=val2[index2]; 
               }
               else
               {
-                  if (i123==0)
-                  {
-                    curSeg1Loc1=loc1[index1]; 
-                    curSeg1Loc2=loc1[index1]+binWidth1; 
-                    if (index1==0)
-                      {curSeg1Val1=0;}
-                    else
-                      {curSeg1Val1=val1[index1-1];}
-                    curSeg1Val2=val1[index1]; 
-                  }
-                  else
-                  {
-                    curSeg1Loc1=loc1[index1]+binWidth1; 
-                    if (index1==loc1.size()-1)
-                      {curSeg1Loc2=std::max(loc1[loc1.size()-1] +binWidth1,loc2[loc2.size()-1]+binWidth2 ); }
-                    else
-                      {curSeg1Loc2=loc1[index1+1];}
-                    curSeg1Val1=val1[index1]; 
-                    curSeg1Val2=val1[index1]; 
-                  }
+                curSeg2Loc1=loc2[index2]+binWidth2; 
+                if (index2==loc2.size()-1)
+                  {curSeg2Loc2=std::max(loc1[loc1.size()-1] +binWidth1,loc2[loc2.size()-1]+binWidth2 ); }
+                else
+                  {curSeg2Loc2=loc2[index2+1];}
+                curSeg2Val1=val2[index2]; 
+                curSeg2Val2=val2[index2]; 
               }
+            }
+          
+          tempStart = std::max(curSeg1Loc1,curSeg2Loc1); 
+          tempEnd   = std::min(curSeg1Loc2,curSeg2Loc2); 
+          if (curSeg2Loc1==curSeg2Loc2)
+            {break;}
+          if (curSeg2Loc1>curSeg1Loc2)
+            {break;}
+          if (tempStart<tempEnd)
+          {
+            //We have a valid range
+            valStart1 = curSeg1Val1 + (curSeg1Val2-curSeg1Val1)*(tempStart - curSeg1Loc1)/(curSeg1Loc2 - curSeg1Loc1);
+            valEnd1   = curSeg1Val1 + (curSeg1Val2-curSeg1Val1)*(tempEnd   - curSeg1Loc1)/(curSeg1Loc2 - curSeg1Loc1);
+            valStart2 = curSeg2Val1 + (curSeg2Val2-curSeg2Val1)*(tempStart - curSeg2Loc1)/(curSeg2Loc2 - curSeg2Loc1);
+            valEnd2   = curSeg2Val1 + (curSeg2Val2-curSeg2Val1)*(tempEnd   - curSeg2Loc1)/(curSeg2Loc2 - curSeg2Loc1);
+            tempDouble = get_segment(tempStart,tempEnd,valStart1,valEnd1,valStart2,valEnd2);
+            res += tempDouble;  
+            h=1;
+          }
+        }
         
-        
-        for (j123=0;j123<2;j123++) 
+        for (j123=1;j123<2;j123++) 
         {
             if (index2==-1)
             {
               if (j123==1)
-              {continue;}
-                        curSeg2Loc1=std::min(loc1[0],loc2[0]); 
-                        curSeg2Loc2=loc2[0]; 
-                        curSeg2Val1=0;
-                        curSeg2Val2=0;
+                {continue;}
+              curSeg2Loc1=std::min(loc1[0],loc2[0]); 
+              curSeg2Loc2=loc2[0]; 
+              curSeg2Val1=0;
+              curSeg2Val2=0;
             }
             else
             {
-          
               if (j123==0)
               {
                 curSeg2Loc1=loc2[index2]; 
@@ -229,44 +302,30 @@ double NetEmdSmoothV2(NumericVector loc1,NumericVector val1,double binWidth1,Num
                 curSeg2Val1=val2[index2]; 
                 curSeg2Val2=val2[index2]; 
               }
-          }
+            }
           
           tempStart = std::max(curSeg1Loc1,curSeg2Loc1); 
           tempEnd   = std::min(curSeg1Loc2,curSeg2Loc2); 
+          if (curSeg2Loc1==curSeg2Loc2)
+            {break;}
+          if (curSeg2Loc1>curSeg1Loc2)
+            {break;}
           if (tempStart<tempEnd)
           {
             //We have a valid range
-          // double get_segment(double start,double end,double val1_start,double val1_end,double val2_start,double val2_end)
-          
             valStart1 = curSeg1Val1 + (curSeg1Val2-curSeg1Val1)*(tempStart - curSeg1Loc1)/(curSeg1Loc2 - curSeg1Loc1);
             valEnd1   = curSeg1Val1 + (curSeg1Val2-curSeg1Val1)*(tempEnd   - curSeg1Loc1)/(curSeg1Loc2 - curSeg1Loc1);
             valStart2 = curSeg2Val1 + (curSeg2Val2-curSeg2Val1)*(tempStart - curSeg2Loc1)/(curSeg2Loc2 - curSeg2Loc1);
             valEnd2   = curSeg2Val1 + (curSeg2Val2-curSeg2Val1)*(tempEnd   - curSeg2Loc1)/(curSeg2Loc2 - curSeg2Loc1);
-            std::cout << "\nSeg1 : " << curSeg1Loc1 << ", " << curSeg1Loc2 << "";
-            std::cout << "\n    Seg2 : " << curSeg2Loc1 << ", " << curSeg2Loc2 << "";
             tempDouble = get_segment(tempStart,tempEnd,valStart1,valEnd1,valStart2,valEnd2);
-            std::cout << "\n        curSeg1Val1: " << curSeg1Val1 <<" ";
-            std::cout << "\n        curSeg1Val2: " << curSeg1Val2 <<" ";
-            std::cout << "\n        curSeg2Val1: " << curSeg2Val1 <<" ";
-            std::cout << "\n        curSeg2Val2: " << curSeg2Val2 <<" ";
-            std::cout << "\n        valEnd1:     " << valEnd1 <<" ";
-            std::cout << "\n        tempStart:   " << tempStart <<" ";
-            std::cout << "\n        tempEnd:     " << tempEnd <<" ";
-            std::cout << "\n        valStart1:   " << valStart1 <<" ";
-            std::cout << "\n        valEnd1:     " << valEnd1 <<" ";
-            std::cout << "\n        valStart2:   " << valStart2 <<" ";
-            std::cout << "\n        valEnd2:     " << valEnd2 <<" ";
-            std::cout << "\n        res: " << tempDouble <<" \n";
             res += tempDouble;  
             h=1;
           }
-//          else
-//          {
-//            std::cout << "\n        res: " << 0 <<" \n";
-//          }
         }
       }
     }
   }
+//  std::cout << t1.count() << "\n";
+//  std::cout << avCount << "\n";
   return res;
 }

From c7b1cc2a0ac87ec96986e99f9dc7cdc5fcdcf235 Mon Sep 17 00:00:00 2001
From: ande <ande.elliott@gmail.com>
Date: Thu, 21 Nov 2019 15:55:38 +0000
Subject: [PATCH 18/84] updated test

---
 tests/testthat/test_fastEMD.R | 82 ++++++++++++++++++++++++++---------
 1 file changed, 62 insertions(+), 20 deletions(-)

diff --git a/tests/testthat/test_fastEMD.R b/tests/testthat/test_fastEMD.R
index 3e17928c..71c1eed7 100644
--- a/tests/testthat/test_fastEMD.R
+++ b/tests/testthat/test_fastEMD.R
@@ -10,26 +10,28 @@ makeFunction <- function(x1,v1,w1,x2,v2,w2)
     m2 = x<=(x1[1]+w1)
     m3 = m1*m2
     res1 = res1 + m3*(x-x1[1])*(v1[1]-0)/w1
-    
-    m1 = (x1[1]+w1)<x
-    m2 = x<=(x1[2])
-    m3 = m1*m2
-    res1 = res1 + m3*v1[1] 
-    
-    for (i in 2:length(x1))
+    if (length(x1)>1)
     {
-      m1 = x1[i]<x
-      m2 = x<=(x1[i]+w1)
+      m1 = (x1[1]+w1)<x
+      m2 = x<=(x1[2])
       m3 = m1*m2
-      res1 = res1 + m3*((x-x1[i])*(v1[i]-v1[i-1])/w1+v1[i-1])
-      if (i<length(x1)) 
+      res1 = res1 + m3*v1[1] 
+      for (i in 2:length(x1))
       {
-        m1 = (x1[i]+w1)<x
-        m2 = x<=(x1[i+1])
+        m1 = x1[i]<x
+        m2 = x<=(x1[i]+w1)
         m3 = m1*m2
-        res1 = res1 + m3*v1[i]
+        res1 = res1 + m3*((x-x1[i])*(v1[i]-v1[i-1])/w1+v1[i-1])
+        if (i<length(x1)) 
+        {
+          m1 = (x1[i]+w1)<x
+          m2 = x<=(x1[i+1])
+          m3 = m1*m2
+          res1 = res1 + m3*v1[i]
+        }
+  
       }
-
+      
     }
     m1 = x>(x1[length(v1)]+w1)
     res1 = res1 + m1*v1[length(v1)]
@@ -42,11 +44,13 @@ makeFunction <- function(x1,v1,w1,x2,v2,w2)
     m3 = m1*m2
     res2 = m3*(x-x2[1])*(v2[1]-0)/w2
     
-    
-    m1 = (x2[1]+w2)<x
-    m2 = x<=(x2[2])
-    m3 = m1*m2
-    res2 = res2 + m3*v2[1] 
+    if (length(x2)>1)
+    {
+      m1 = (x2[1]+w2)<x
+      m2 = x<=(x2[2])
+      m3 = m1*m2
+      res2 = res2 + m3*v2[1] 
+    }
     
     for (i in 2:length(x2))
     {
@@ -108,6 +112,44 @@ test_that("2 element test w1=0.1, w2=0.2", {
 })
 
 
+test_that("1 element at 0 vs many test Mixture", {
+  
+  for (w1 in (1:10)*2)
+  {
+      x1 <- c(0)
+      v1 <- c(1.00)
+      x2 <- 1:w1
+      v2 <- (1:w1)/w1
+      print(x1)
+      f1 <- makeFunction(x1,v1,1,x2,v2,1)
+      res2 <- integrate(f1,0,w1+1,abs.tol=0.000000001)[[1]]
+      
+      res1 <- NetEmdSmoothV2(x1,v1,1,x2,v2,1)
+      
+      expect_lt(abs(res1-res2),10**(-3))
+    }
+})
+
+
+test_that("1 element vs many test Mixture", {
+  
+  for (w1 in (1:10)*2)
+  {
+      x1 <- c(w1/2)
+      v1 <- c(1.00)
+      x2 <- 1:w1
+      v2 <- (1:w1)/w1
+      print(x1)
+      f1 <- makeFunction(x1,v1,1,x2,v2,1)
+      res2 <- integrate(f1,0,w1+1,abs.tol=0.000000001)[[1]]
+      
+      res1 <- NetEmdSmoothV2(x1,v1,1,x2,v2,1)
+      
+      expect_lt(abs(res1-res2),10**(-3))
+    }
+})
+
+
 
 test_that("3 element test Mixture", {
   

From b5bc0a8f4ca953b1330280f8721e13034c1774ea Mon Sep 17 00:00:00 2001
From: ande <ande.elliott@gmail.com>
Date: Thu, 21 Nov 2019 16:21:28 +0000
Subject: [PATCH 19/84] updated tests

---
 tests/testthat/test_emd.R | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/testthat/test_emd.R b/tests/testthat/test_emd.R
index 58e39c22..59145923 100644
--- a/tests/testthat/test_emd.R
+++ b/tests/testthat/test_emd.R
@@ -14,7 +14,7 @@ test_that("cost_matrix returns zeros along diagonal when both sets of bin
             bin_centres2 <- bin_centres1
             expected <- rep(0, length(bin_centres1))
             expect_equal(diag(cost_matrix(bin_centres1, bin_centres2)), expected)
-            })
+          })
 
 test_that("cost_matrix returns zeros along diagonal and taxicab distance from 
           all zeros for all other elements when both sets of bin locations are  
@@ -24,7 +24,7 @@ test_that("cost_matrix returns zeros along diagonal and taxicab distance from
             num_bins <- length(bin_centres1)
             expected <- toeplitz(1:num_bins)-1
             expect_equal(cost_matrix(bin_centres1, bin_centres2), expected)
-            })
+          })
 
 test_that("cost_matrix is correct size when the two histograms are of different 
           lengths", {
@@ -35,7 +35,7 @@ test_that("cost_matrix is correct size when the two histograms are of different
             
             expect_equal(nrow(cm), length(bin_centres1))
             expect_equal(ncol(cm), length(bin_centres2))
-            })
+          })
 
 context("EMD: EMD")
 # EMD: Property-based tests
@@ -53,7 +53,7 @@ test_that("EMD methods return 0 when comparing a 1D feature distribution to
                                 bin_centres1, bin_centres2), expected)
             expect_equal(emd_cs(histogram1, histogram2), expected)
             expect_equal(emd(histogram1, histogram2), expected)
-            })
+          })
 
 test_that("EMD methods return numBins/2 when offsetting a symmetric discrete 
           triangle distribution by 1", {
@@ -170,7 +170,7 @@ test_that("EMD methods return numBins/2 when offsetting a symmetric discrete
             expect_equal(emd_cs(histogram1, histogram2), expected)
             expect_equal(emd(histogram1, histogram2), expected)
             
-            })
+          })
 
 test_that("EMD methods return same result for densely and sparsely specified 
           bins", {
@@ -203,7 +203,7 @@ test_that("EMD methods return same result for densely and sparsely specified
                          emd_cs(sparse_histogram1,sparse_histogram2))
             expect_equal(emd(dense_histogram1, dense_histogram2),
                          emd(sparse_histogram1, sparse_histogram2))
-            })
+          })
 
 test_that("EMD methods return same result when order of densely specified bins 
           is changed", {

From 5ed553ef63c5efa8168c36697b31a73a747ce9e2 Mon Sep 17 00:00:00 2001
From: ande <ande.elliott@gmail.com>
Date: Fri, 22 Nov 2019 11:50:14 +0000
Subject: [PATCH 20/84] added old failure case. as a test

---
 R/emd.R                       | 25 +++++++++++++------------
 tests/testthat/test_fastEMD.R | 15 +++++++++++++++
 2 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/R/emd.R b/R/emd.R
index 5219dd49..3d486c81 100644
--- a/R/emd.R
+++ b/R/emd.R
@@ -82,10 +82,19 @@ min_emd_optimise_fast <- function(dhist1, dhist2) {
   }
   else #if ((dhist1$smoothing_window_width==1) && (dhist2$smoothing_window_width==1))
   {
+    val1 <- cumsum(dhist1$masses)
+    val2 <- cumsum(dhist2$masses)
+    val1 <- val1/val1[length(val1)]
+    val2 <- val2/val2[length(val2)]
+    loc1=dhist1$locations
+    loc2=dhist2$locations
+    binWidth1=dhist1$smoothing_window_width
+    binWidth2=dhist2$smoothing_window_width
+    count=0
     # Determine minimum and maximum offset of range in which histograms overlap
     # (based on sliding histogram 1)
-    min_offset <- min(dhist2$locations) - max(dhist1$locations)
-    max_offset <- max(dhist2$locations) - min(dhist1$locations)
+    min_offset <- min(dhist2$locations) - max(dhist1$locations) - max(binWidth1,binWidth2)
+    max_offset <- max(dhist2$locations) - min(dhist1$locations) + max(binWidth1,binWidth2)
     # Set lower and upper range for optimise algorithm to be somewhat wider than
     # range defined by the minimum and maximum offset. This guards against a
     # couple of issues that arise if the optimise range is exactly min_offset 
@@ -97,18 +106,10 @@ min_emd_optimise_fast <- function(dhist1, dhist2) {
     buffer <- 0.1
     min_offset <- min_offset - buffer
     max_offset <- max_offset + buffer
+    
     # Define a single parameter function to minimise emd as a function of offset
-    val1 <- cumsum(dhist1$masses)
-    val2 <- cumsum(dhist2$masses)
-    val1 <- val1/val1[length(val1)]
-    val2 <- val2/val2[length(val2)]
-    loc1=dhist1$locations
-    loc2=dhist2$locations
-    binWidth1=dhist1$smoothing_window_width
-    binWidth2=dhist2$smoothing_window_width
-    count=0
     emd_offset <- function(offset) {
-      temp1<- NetEmdSmooth(loc1+offset,val1,binWidth1,loc2,val2,binWidth2)
+      temp1<- NetEmdSmoothV2(loc1+offset,val1,binWidth1,loc2,val2,binWidth2)
       temp1
     }
     # Get solution from optimiser
diff --git a/tests/testthat/test_fastEMD.R b/tests/testthat/test_fastEMD.R
index 23c3df36..8af30326 100644
--- a/tests/testthat/test_fastEMD.R
+++ b/tests/testthat/test_fastEMD.R
@@ -304,3 +304,18 @@ test_that("3 element test w1=0.1, w2=0.2", {
               
               expect_lt(abs(res1-res2),10**(-4))
             })
+
+test_that("Old failure case", {
+  d1 = list()
+  attr(d1,'class') <- "dhist"
+  d1$locations <- 0 
+  d1$masses <- 1000 
+  d2 = list()
+  attr(d2,'class') <- "dhist"
+  d2$locations <- c(0,1,2,3) 
+  d2$masses <- c(8634,1242,114,10) 
+  sq1 <- net_emd_single_pair(d1,d2,method='optimise',smoothing_window_width = 1)
+  sq2 <- net_emd_single_pair(d1,d2,method='optimiseRonly',smoothing_window_width = 1)
+  expect_lt(abs(sq1$min_emd-sq2$min_emd),10**(-4))
+})
+  
\ No newline at end of file

From cac3bab0640c1876d17ce6f5405c271ecee93560 Mon Sep 17 00:00:00 2001
From: ande <ande.elliott@gmail.com>
Date: Fri, 22 Nov 2019 13:33:05 +0000
Subject: [PATCH 21/84] first real fast version

---
 src/fastSmoothV2.cpp | 98 +++++++++-----------------------------------
 1 file changed, 20 insertions(+), 78 deletions(-)

diff --git a/src/fastSmoothV2.cpp b/src/fastSmoothV2.cpp
index 788a3458..383ab488 100644
--- a/src/fastSmoothV2.cpp
+++ b/src/fastSmoothV2.cpp
@@ -144,7 +144,7 @@ double NetEmdSmoothV2(NumericVector loc1,NumericVector val1,double binWidth1,Num
   double h1,h2;
   h1=0;
   h2=0;
-  int i123,j123;
+  int i123;
   double tempStart;
   double tempEnd;
   double valStart1;
@@ -155,15 +155,10 @@ double NetEmdSmoothV2(NumericVector loc1,NumericVector val1,double binWidth1,Num
   int secondStart=-1;
   int h;
   
-auto start = std::chrono::high_resolution_clock::now();
-  std::chrono::duration<double> t1=start-start; 
-int count=0; 
-double avCount=0;
   for (index1=-1;index1<loc1.size();index1++) 
   {
-              h=0;
-              for (i123=0;i123<2;i123++) 
-              {
+      for (i123=0;i123<2;i123++) 
+      {
                 if (index1==-1)
                 {
                     if (i123==1)
@@ -197,30 +192,21 @@ double avCount=0;
                  }
                 if (curSeg1Loc1==curSeg1Loc2)
                 {continue;}
-    h=0;
-    avCount+=count/loc1.size();
-//    std::cout << count << ", ";
-    count =0;
     for (index2=secondStart;index2<loc2.size();index2++) 
     {
-      count+=1;
-      if (index2>0)
-      {
-        if (index2<loc2.size()-2)
-        {
-        if (loc2[index2+2]+binWidth2<curSeg1Loc1)
-        {
-          secondStart=index2;
-          continue;
-          }
-        }
-      }
-        for (j123=0;j123<1;j123++) 
-        {
+            if (index2>0)
+            {
+              if (index2<loc2.size()-2)
+              {
+                  if (loc2[index2+2]+binWidth2<curSeg1Loc1)
+                  {
+                    secondStart=index2;
+                    continue;
+                  }
+              }
+            }
             if (index2==-1)
             {
-              if (j123==1)
-                {continue;}
               curSeg2Loc1=std::min(loc1[0],loc2[0]); 
               curSeg2Loc2=loc2[0]; 
               curSeg2Val1=0;
@@ -228,34 +214,17 @@ double avCount=0;
             }
             else
             {
-              if (j123==0)
-              {
                 curSeg2Loc1=loc2[index2]; 
                 curSeg2Loc2=loc2[index2]+binWidth2; 
                 if (index2==0)
                   {curSeg2Val1=0;}
                 else
                   {curSeg2Val1=val2[index2-1];}
-                curSeg2Val2=val2[index2]; 
-              }
-              else
-              {
-                curSeg2Loc1=loc2[index2]+binWidth2; 
-                if (index2==loc2.size()-1)
-                  {curSeg2Loc2=std::max(loc1[loc1.size()-1] +binWidth1,loc2[loc2.size()-1]+binWidth2 ); }
-                else
-                  {curSeg2Loc2=loc2[index2+1];}
-                curSeg2Val1=val2[index2]; 
-                curSeg2Val2=val2[index2]; 
-              }
+                curSeg2Val2 = val2[index2]; 
             }
           
           tempStart = std::max(curSeg1Loc1,curSeg2Loc1); 
           tempEnd   = std::min(curSeg1Loc2,curSeg2Loc2); 
-          if (curSeg2Loc1==curSeg2Loc2)
-            {break;}
-          if (curSeg2Loc1>curSeg1Loc2)
-            {break;}
           if (tempStart<tempEnd)
           {
             //We have a valid range
@@ -267,33 +236,10 @@ double avCount=0;
             res += tempDouble;  
             h=1;
           }
-        }
-        
-        for (j123=1;j123<2;j123++) 
-        {
             if (index2==-1)
-            {
-              if (j123==1)
-                {continue;}
-              curSeg2Loc1=std::min(loc1[0],loc2[0]); 
-              curSeg2Loc2=loc2[0]; 
-              curSeg2Val1=0;
-              curSeg2Val2=0;
-            }
+              {continue;}
             else
             {
-              if (j123==0)
-              {
-                curSeg2Loc1=loc2[index2]; 
-                curSeg2Loc2=loc2[index2]+binWidth2; 
-                if (index2==0)
-                  {curSeg2Val1=0;}
-                else
-                  {curSeg2Val1=val2[index2-1];}
-                curSeg2Val2=val2[index2]; 
-              }
-              else
-              {
                 curSeg2Loc1=loc2[index2]+binWidth2; 
                 if (index2==loc2.size()-1)
                   {curSeg2Loc2=std::max(loc1[loc1.size()-1] +binWidth1,loc2[loc2.size()-1]+binWidth2 ); }
@@ -301,15 +247,10 @@ double avCount=0;
                   {curSeg2Loc2=loc2[index2+1];}
                 curSeg2Val1=val2[index2]; 
                 curSeg2Val2=val2[index2]; 
-              }
             }
           
           tempStart = std::max(curSeg1Loc1,curSeg2Loc1); 
           tempEnd   = std::min(curSeg1Loc2,curSeg2Loc2); 
-          if (curSeg2Loc1==curSeg2Loc2)
-            {break;}
-          if (curSeg2Loc1>curSeg1Loc2)
-            {break;}
           if (tempStart<tempEnd)
           {
             //We have a valid range
@@ -319,13 +260,14 @@ double avCount=0;
             valEnd2   = curSeg2Val1 + (curSeg2Val2-curSeg2Val1)*(tempEnd   - curSeg2Loc1)/(curSeg2Loc2 - curSeg2Loc1);
             tempDouble = get_segment(tempStart,tempEnd,valStart1,valEnd1,valStart2,valEnd2);
             res += tempDouble;  
-            h=1;
           }
-        }
+          if (curSeg1Loc2<curSeg2Loc1)
+          {
+            break;
+          }
       }
     }
   }
 //  std::cout << t1.count() << "\n";
-//  std::cout << avCount << "\n";
   return res;
 }

From deae0458fa638e865bb1166cefe66167fab2f3f5 Mon Sep 17 00:00:00 2001
From: ande <ande.elliott@gmail.com>
Date: Fri, 22 Nov 2019 14:20:48 +0000
Subject: [PATCH 22/84] working partially cleaned version

next step pulling edge case out of loop
---
 src/fastSmoothV2.cpp | 184 +++++++++++++++++++++++--------------------
 1 file changed, 99 insertions(+), 85 deletions(-)

diff --git a/src/fastSmoothV2.cpp b/src/fastSmoothV2.cpp
index 383ab488..871cb07b 100644
--- a/src/fastSmoothV2.cpp
+++ b/src/fastSmoothV2.cpp
@@ -8,7 +8,7 @@
 using namespace Rcpp;
 
 
-double get_segment(double start,double end,double val1_start,double val1_end,double val2_start,double val2_end)
+inline double get_segment(double start,double end,double val1_start,double val1_end,double val2_start,double val2_end)
 {
   double length;
   length = end - start;
@@ -89,6 +89,16 @@ double get_segment(double start,double end,double val1_start,double val1_end,dou
     }
 }
 
+inline double get_segment_constrained(double start,double end, double seg1L1, double seg1L2, double seg2L1, double seg2L2, double seg1V1, double seg1V2, double seg2V1, double seg2V2)  
+{
+            //We have a valid range
+            double valStart1, valEnd1, valStart2, valEnd2;   
+            valStart1 = seg1V1 + (seg1V2-seg1V1)*(start - seg1L1)/(seg1L2 - seg1L1);
+            valEnd1   = seg1V1 + (seg1V2-seg1V1)*(end   - seg1L1)/(seg1L2 - seg1L1);
+            valStart2 = seg2V1 + (seg2V2-seg2V1)*(start - seg2L1)/(seg2L2 - seg2L1);
+            valEnd2   = seg2V1 + (seg2V2-seg2V1)*(end   - seg2L1)/(seg2L2 - seg2L1);
+            return get_segment(start,end,valStart1,valEnd1,valStart2,valEnd2);
+}
 
 //' @title
 //' Compute EMD
@@ -102,30 +112,8 @@ double get_segment(double start,double end,double val1_start,double val1_end,dou
 // [[Rcpp::export]]
 double NetEmdSmoothV2(NumericVector loc1,NumericVector val1,double binWidth1,NumericVector loc2,NumericVector val2,double binWidth2)
 {
-  double loc1_start, loc1_end, loc2_start, loc2_end;
-  double val1_start, val1_end, val2_start, val2_end;
   int index1,index2;
   
-  // Index in both lists
-  index1 = 0;
-  index2 = 0;
-  
-  
-  // Loc1 data
-  loc1_start = loc1[0];
-  loc1_end = loc1[0];
-  val1_start = 0;
-  val1_end   = 0; 
-  
-  // Loc2 data
-  loc2_start = loc2[0];
-  loc2_end   = loc2[0];
-  val2_start = 0;
-  val2_end   = 0; 
-  
-  double endX;
-  endX = std::min(loc2_end,loc1_end); 
-  // while statement
   double res=0;
   
   // Hist 1
@@ -141,57 +129,97 @@ double NetEmdSmoothV2(NumericVector loc1,NumericVector val1,double binWidth1,Num
   double curSeg2Val2; 
   
   // need to iterate through regions of constant gradient
-  double h1,h2;
-  h1=0;
-  h2=0;
   int i123;
   double tempStart;
   double tempEnd;
-  double valStart1;
-  double valEnd1;
-  double valStart2;
-  double valEnd2;
-  double tempDouble;
-  int secondStart=-1;
-  int h;
+  double secondStart=-1;
+  
+  double maxLoc = std::max(loc1[loc1.size()-1] +binWidth1,loc2[loc2.size()-1]+binWidth2 );
+  double minLoc = std::min(loc1[0],loc2[0]);
+ 
+  if (loc2[0]<loc1[0]) 
+  {
+    
+    curSeg1Loc1=std::min(loc1[0],loc2[0]); 
+    curSeg1Loc2=loc1[0]; 
+    curSeg1Val1=0;
+    curSeg1Val2=0;
+    for (index2=secondStart;index2<loc2.size();index2++) 
+    {
+              if (index2==-1)
+              {
+                curSeg2Loc1=minLoc;
+                curSeg2Loc2=loc2[0]; 
+                curSeg2Val1=0;
+                curSeg2Val2=0;
+              }
+              else
+              {
+                  curSeg2Loc1=loc2[index2]; 
+                  curSeg2Loc2=loc2[index2]+binWidth2; 
+                  if (index2==0)
+                    {curSeg2Val1=0;}
+                  else
+                    {curSeg2Val1=val2[index2-1];}
+                  curSeg2Val2 = val2[index2]; 
+              }
+            
+            tempStart = std::max(curSeg1Loc1,curSeg2Loc1); 
+            tempEnd   = std::min(curSeg1Loc2,curSeg2Loc2); 
+            if (tempStart<tempEnd)
+            {
+              res += get_segment_constrained(tempStart,tempEnd,curSeg1Loc1, curSeg1Loc2, curSeg2Loc1, curSeg2Loc2, curSeg1Val1, curSeg1Val2, curSeg2Val1, curSeg2Val2); 
+            }
+            if (index2==-1)
+              {continue;}
+            curSeg2Loc1=loc2[index2]+binWidth2; 
+            if (index2==loc2.size()-1)
+              {curSeg2Loc2= maxLoc;}
+            else
+              {curSeg2Loc2=loc2[index2+1];}
+            curSeg2Val1=val2[index2]; 
+            curSeg2Val2=val2[index2]; 
+        
+            tempStart = std::max(curSeg1Loc1,curSeg2Loc1); 
+            tempEnd   = std::min(curSeg1Loc2,curSeg2Loc2); 
+            if (tempStart<tempEnd)
+            {
+              res += get_segment_constrained(tempStart,tempEnd,curSeg1Loc1, curSeg1Loc2, curSeg2Loc1, curSeg2Loc2, curSeg1Val1, curSeg1Val2, curSeg2Val1, curSeg2Val2); 
+            }
+            if (curSeg1Loc2<curSeg2Loc1)
+              {break;}
+        }
+    }
+    
+  
+  
+  
   
-  for (index1=-1;index1<loc1.size();index1++) 
+  for (index1=0;index1<loc1.size();index1++) 
   {
       for (i123=0;i123<2;i123++) 
       {
-                if (index1==-1)
-                {
-                    if (i123==1)
-                    {continue;}
-                    curSeg1Loc1=std::min(loc1[0],loc2[0]); 
-                    curSeg1Loc2=loc1[0]; 
-                    curSeg1Val1=0;
-                    curSeg1Val2=0;
-                }
-                else
-                {
-                    if (i123==0)
-                    {
-                      curSeg1Loc1=loc1[index1]; 
-                      curSeg1Loc2=loc1[index1]+binWidth1; 
-                      if (index1==0)
-                        {curSeg1Val1=0;}
-                      else
-                        {curSeg1Val1=val1[index1-1];}
-                    }
-                    else
-                    {
-                      curSeg1Loc1=loc1[index1]+binWidth1; 
-                      if (index1==loc1.size()-1)
-                        {curSeg1Loc2=std::max(loc1[loc1.size()-1] +binWidth1,loc2[loc2.size()-1]+binWidth2 ); }
-                      else
-                        {curSeg1Loc2=loc1[index1+1];}
-                      curSeg1Val1=val1[index1]; 
-                    }
-                    curSeg1Val2=val1[index1]; 
-                 }
-                if (curSeg1Loc1==curSeg1Loc2)
-                {continue;}
+            if (i123==0)
+            {
+              curSeg1Loc1=loc1[index1]; 
+              curSeg1Loc2=loc1[index1]+binWidth1; 
+              if (index1==0)
+                {curSeg1Val1=0;}
+              else
+                {curSeg1Val1=val1[index1-1];}
+            }
+            else
+            {
+              curSeg1Loc1=loc1[index1]+binWidth1; 
+              if (index1==loc1.size()-1)
+                {curSeg1Loc2=maxLoc; }
+              else
+                {curSeg1Loc2=loc1[index1+1];}
+              curSeg1Val1=val1[index1]; 
+            }
+            curSeg1Val2=val1[index1]; 
+            if (curSeg1Loc1==curSeg1Loc2)
+              {continue;}
     for (index2=secondStart;index2<loc2.size();index2++) 
     {
             if (index2>0)
@@ -207,7 +235,7 @@ double NetEmdSmoothV2(NumericVector loc1,NumericVector val1,double binWidth1,Num
             }
             if (index2==-1)
             {
-              curSeg2Loc1=std::min(loc1[0],loc2[0]); 
+              curSeg2Loc1=minLoc;
               curSeg2Loc2=loc2[0]; 
               curSeg2Val1=0;
               curSeg2Val2=0;
@@ -227,14 +255,7 @@ double NetEmdSmoothV2(NumericVector loc1,NumericVector val1,double binWidth1,Num
           tempEnd   = std::min(curSeg1Loc2,curSeg2Loc2); 
           if (tempStart<tempEnd)
           {
-            //We have a valid range
-            valStart1 = curSeg1Val1 + (curSeg1Val2-curSeg1Val1)*(tempStart - curSeg1Loc1)/(curSeg1Loc2 - curSeg1Loc1);
-            valEnd1   = curSeg1Val1 + (curSeg1Val2-curSeg1Val1)*(tempEnd   - curSeg1Loc1)/(curSeg1Loc2 - curSeg1Loc1);
-            valStart2 = curSeg2Val1 + (curSeg2Val2-curSeg2Val1)*(tempStart - curSeg2Loc1)/(curSeg2Loc2 - curSeg2Loc1);
-            valEnd2   = curSeg2Val1 + (curSeg2Val2-curSeg2Val1)*(tempEnd   - curSeg2Loc1)/(curSeg2Loc2 - curSeg2Loc1);
-            tempDouble = get_segment(tempStart,tempEnd,valStart1,valEnd1,valStart2,valEnd2);
-            res += tempDouble;  
-            h=1;
+            res += get_segment_constrained(tempStart,tempEnd,curSeg1Loc1, curSeg1Loc2, curSeg2Loc1, curSeg2Loc2, curSeg1Val1, curSeg1Val2, curSeg2Val1, curSeg2Val2); 
           }
             if (index2==-1)
               {continue;}
@@ -242,7 +263,7 @@ double NetEmdSmoothV2(NumericVector loc1,NumericVector val1,double binWidth1,Num
             {
                 curSeg2Loc1=loc2[index2]+binWidth2; 
                 if (index2==loc2.size()-1)
-                  {curSeg2Loc2=std::max(loc1[loc1.size()-1] +binWidth1,loc2[loc2.size()-1]+binWidth2 ); }
+                  {curSeg2Loc2=maxLoc; }
                 else
                   {curSeg2Loc2=loc2[index2+1];}
                 curSeg2Val1=val2[index2]; 
@@ -253,13 +274,7 @@ double NetEmdSmoothV2(NumericVector loc1,NumericVector val1,double binWidth1,Num
           tempEnd   = std::min(curSeg1Loc2,curSeg2Loc2); 
           if (tempStart<tempEnd)
           {
-            //We have a valid range
-            valStart1 = curSeg1Val1 + (curSeg1Val2-curSeg1Val1)*(tempStart - curSeg1Loc1)/(curSeg1Loc2 - curSeg1Loc1);
-            valEnd1   = curSeg1Val1 + (curSeg1Val2-curSeg1Val1)*(tempEnd   - curSeg1Loc1)/(curSeg1Loc2 - curSeg1Loc1);
-            valStart2 = curSeg2Val1 + (curSeg2Val2-curSeg2Val1)*(tempStart - curSeg2Loc1)/(curSeg2Loc2 - curSeg2Loc1);
-            valEnd2   = curSeg2Val1 + (curSeg2Val2-curSeg2Val1)*(tempEnd   - curSeg2Loc1)/(curSeg2Loc2 - curSeg2Loc1);
-            tempDouble = get_segment(tempStart,tempEnd,valStart1,valEnd1,valStart2,valEnd2);
-            res += tempDouble;  
+            res += get_segment_constrained(tempStart,tempEnd,curSeg1Loc1, curSeg1Loc2, curSeg2Loc1, curSeg2Loc2, curSeg1Val1, curSeg1Val2, curSeg2Val1, curSeg2Val2); 
           }
           if (curSeg1Loc2<curSeg2Loc1)
           {
@@ -268,6 +283,5 @@ double NetEmdSmoothV2(NumericVector loc1,NumericVector val1,double binWidth1,Num
       }
     }
   }
-//  std::cout << t1.count() << "\n";
   return res;
 }

From f4c816a4fd94489428a64ee576be08372b3cca44 Mon Sep 17 00:00:00 2001
From: ande <ande.elliott@gmail.com>
Date: Mon, 25 Nov 2019 08:35:51 +0000
Subject: [PATCH 23/84] current working version

---
 src/fastSmoothV2.cpp | 226 +++++++++++++++++++------------------------
 1 file changed, 98 insertions(+), 128 deletions(-)

diff --git a/src/fastSmoothV2.cpp b/src/fastSmoothV2.cpp
index 871cb07b..46166ed5 100644
--- a/src/fastSmoothV2.cpp
+++ b/src/fastSmoothV2.cpp
@@ -7,7 +7,7 @@
 #include <math.h>
 using namespace Rcpp;
 
-
+//compute segment
 inline double get_segment(double start,double end,double val1_start,double val1_end,double val2_start,double val2_end)
 {
   double length;
@@ -23,8 +23,6 @@ inline double get_segment(double start,double end,double val1_start,double val1_
     if (val1_end >= val2_end) 
     {
      // They are in the same order no bowtie
-     // top triangle  
-//     std::cout << "\n       Path1";
       topTriangle = 0.5*length*(val1_end-val1_start);
       topRectangle = length*(val1_start-val2_start);
       bottomTriangle = 0.5*length*(val2_end-val2_start); 
@@ -32,7 +30,6 @@ inline double get_segment(double start,double end,double val1_start,double val1_
     }
     else
     {
-//        std::cout << "\n       Path2";
         //bowtie
         // lets make this really simple as the compiler 
         // will combine the expressions as needed
@@ -89,15 +86,26 @@ inline double get_segment(double start,double end,double val1_start,double val1_
     }
 }
 
-inline double get_segment_constrained(double start,double end, double seg1L1, double seg1L2, double seg2L1, double seg2L2, double seg1V1, double seg1V2, double seg2V1, double seg2V2)  
+// cut down and compute segment
+inline double get_segment_constrained(double seg1L1, double seg1L2, double seg2L1, double seg2L2, double seg1V1, double seg1V2, double seg2V1, double seg2V2)  
 {
-            //We have a valid range
-            double valStart1, valEnd1, valStart2, valEnd2;   
-            valStart1 = seg1V1 + (seg1V2-seg1V1)*(start - seg1L1)/(seg1L2 - seg1L1);
-            valEnd1   = seg1V1 + (seg1V2-seg1V1)*(end   - seg1L1)/(seg1L2 - seg1L1);
-            valStart2 = seg2V1 + (seg2V2-seg2V1)*(start - seg2L1)/(seg2L2 - seg2L1);
-            valEnd2   = seg2V1 + (seg2V2-seg2V1)*(end   - seg2L1)/(seg2L2 - seg2L1);
-            return get_segment(start,end,valStart1,valEnd1,valStart2,valEnd2);
+    //We have a valid range
+    double valStart1, valEnd1, valStart2, valEnd2;   
+    double start,end; 
+    start = std::max(seg1L1,seg2L1); 
+    end   = std::min(seg1L2,seg2L2); 
+    if (start<end)
+    {
+      valStart1 = seg1V1 + (seg1V2-seg1V1)*(start - seg1L1)/(seg1L2 - seg1L1);
+      valEnd1   = seg1V1 + (seg1V2-seg1V1)*(end   - seg1L1)/(seg1L2 - seg1L1);
+      valStart2 = seg2V1 + (seg2V2-seg2V1)*(start - seg2L1)/(seg2L2 - seg2L1);
+      valEnd2   = seg2V1 + (seg2V2-seg2V1)*(end   - seg2L1)/(seg2L2 - seg2L1);
+      return get_segment(start,end,valStart1,valEnd1,valStart2,valEnd2);
+    }
+    else
+    {
+      return 0;
+    } 
 }
 
 //' @title
@@ -137,41 +145,26 @@ double NetEmdSmoothV2(NumericVector loc1,NumericVector val1,double binWidth1,Num
   double maxLoc = std::max(loc1[loc1.size()-1] +binWidth1,loc2[loc2.size()-1]+binWidth2 );
   double minLoc = std::min(loc1[0],loc2[0]);
  
+ // Dealing with the non overlapping piece at the beginning
   if (loc2[0]<loc1[0]) 
   {
-    
-    curSeg1Loc1=std::min(loc1[0],loc2[0]); 
+    curSeg1Loc1=minLoc; 
     curSeg1Loc2=loc1[0]; 
     curSeg1Val1=0;
     curSeg1Val2=0;
-    for (index2=secondStart;index2<loc2.size();index2++) 
+    for (index2=0;index2<loc2.size();index2++) 
     {
-              if (index2==-1)
-              {
-                curSeg2Loc1=minLoc;
-                curSeg2Loc2=loc2[0]; 
-                curSeg2Val1=0;
-                curSeg2Val2=0;
-              }
-              else
-              {
-                  curSeg2Loc1=loc2[index2]; 
-                  curSeg2Loc2=loc2[index2]+binWidth2; 
-                  if (index2==0)
-                    {curSeg2Val1=0;}
-                  else
-                    {curSeg2Val1=val2[index2-1];}
-                  curSeg2Val2 = val2[index2]; 
-              }
+            curSeg2Loc1=loc2[index2]; 
+            curSeg2Loc2=loc2[index2]+binWidth2; 
+            if (index2==0)
+              {curSeg2Val1=0;}
+            else
+              {curSeg2Val1=val2[index2-1];}
+            curSeg2Val2 = val2[index2]; 
+            
+            // add this segment 
+            res += get_segment_constrained(curSeg1Loc1, curSeg1Loc2, curSeg2Loc1, curSeg2Loc2, curSeg1Val1, curSeg1Val2, curSeg2Val1, curSeg2Val2); 
             
-            tempStart = std::max(curSeg1Loc1,curSeg2Loc1); 
-            tempEnd   = std::min(curSeg1Loc2,curSeg2Loc2); 
-            if (tempStart<tempEnd)
-            {
-              res += get_segment_constrained(tempStart,tempEnd,curSeg1Loc1, curSeg1Loc2, curSeg2Loc1, curSeg2Loc2, curSeg1Val1, curSeg1Val2, curSeg2Val1, curSeg2Val2); 
-            }
-            if (index2==-1)
-              {continue;}
             curSeg2Loc1=loc2[index2]+binWidth2; 
             if (index2==loc2.size()-1)
               {curSeg2Loc2= maxLoc;}
@@ -180,107 +173,84 @@ double NetEmdSmoothV2(NumericVector loc1,NumericVector val1,double binWidth1,Num
             curSeg2Val1=val2[index2]; 
             curSeg2Val2=val2[index2]; 
         
-            tempStart = std::max(curSeg1Loc1,curSeg2Loc1); 
-            tempEnd   = std::min(curSeg1Loc2,curSeg2Loc2); 
-            if (tempStart<tempEnd)
-            {
-              res += get_segment_constrained(tempStart,tempEnd,curSeg1Loc1, curSeg1Loc2, curSeg2Loc1, curSeg2Loc2, curSeg1Val1, curSeg1Val2, curSeg2Val1, curSeg2Val2); 
-            }
+            res += get_segment_constrained(curSeg1Loc1, curSeg1Loc2, curSeg2Loc1, curSeg2Loc2, curSeg1Val1, curSeg1Val2, curSeg2Val1, curSeg2Val2); 
+            
             if (curSeg1Loc2<curSeg2Loc1)
               {break;}
         }
     }
     
   
-  
-  
-  
   for (index1=0;index1<loc1.size();index1++) 
   {
+      // loop over the linear and constant section of segment 
+      // 0 is linear, 1 is the constant
       for (i123=0;i123<2;i123++) 
       {
-            if (i123==0)
-            {
-              curSeg1Loc1=loc1[index1]; 
-              curSeg1Loc2=loc1[index1]+binWidth1; 
-              if (index1==0)
-                {curSeg1Val1=0;}
-              else
-                {curSeg1Val1=val1[index1-1];}
-            }
-            else
-            {
-              curSeg1Loc1=loc1[index1]+binWidth1; 
-              if (index1==loc1.size()-1)
-                {curSeg1Loc2=maxLoc; }
-              else
-                {curSeg1Loc2=loc1[index1+1];}
-              curSeg1Val1=val1[index1]; 
-            }
-            curSeg1Val2=val1[index1]; 
-            if (curSeg1Loc1==curSeg1Loc2)
-              {continue;}
-    for (index2=secondStart;index2<loc2.size();index2++) 
-    {
-            if (index2>0)
-            {
-              if (index2<loc2.size()-2)
-              {
-                  if (loc2[index2+2]+binWidth2<curSeg1Loc1)
-                  {
-                    secondStart=index2;
-                    continue;
-                  }
-              }
-            }
-            if (index2==-1)
-            {
-              curSeg2Loc1=minLoc;
-              curSeg2Loc2=loc2[0]; 
-              curSeg2Val1=0;
-              curSeg2Val2=0;
-            }
-            else
-            {
-                curSeg2Loc1=loc2[index2]; 
-                curSeg2Loc2=loc2[index2]+binWidth2; 
-                if (index2==0)
-                  {curSeg2Val1=0;}
+        if (i123==0)
+        {
+          curSeg1Loc1=loc1[index1]; 
+          curSeg1Loc2=loc1[index1]+binWidth1; 
+          if (index1==0)
+            {curSeg1Val1=0;}
+          else
+            {curSeg1Val1=val1[index1-1];}
+        }
+        else
+        {
+          curSeg1Loc1=loc1[index1]+binWidth1; 
+          if (index1==loc1.size()-1)
+            {curSeg1Loc2=maxLoc; }
+          else
+            {curSeg1Loc2=loc1[index1+1];}
+          curSeg1Val1=val1[index1]; 
+        }
+        curSeg1Val2=val1[index1]; 
+        // shortcut out of this section
+        if (curSeg1Loc1==curSeg1Loc2)
+          {continue;}
+        for (index2=secondStart;index2<loc2.size();index2++) 
+        {
+                if (index2==-1)
+                {
+                  curSeg2Loc1=minLoc;
+                  curSeg2Loc2=loc2[0]; 
+                  curSeg2Val1=0;
+                  curSeg2Val2=0;
+                }
                 else
-                  {curSeg2Val1=val2[index2-1];}
-                curSeg2Val2 = val2[index2]; 
-            }
-          
-          tempStart = std::max(curSeg1Loc1,curSeg2Loc1); 
-          tempEnd   = std::min(curSeg1Loc2,curSeg2Loc2); 
-          if (tempStart<tempEnd)
-          {
-            res += get_segment_constrained(tempStart,tempEnd,curSeg1Loc1, curSeg1Loc2, curSeg2Loc1, curSeg2Loc2, curSeg1Val1, curSeg1Val2, curSeg2Val1, curSeg2Val2); 
-          }
-            if (index2==-1)
-              {continue;}
-            else
-            {
-                curSeg2Loc1=loc2[index2]+binWidth2; 
-                if (index2==loc2.size()-1)
-                  {curSeg2Loc2=maxLoc; }
+                {
+                    curSeg2Loc1=loc2[index2]; 
+                    curSeg2Loc2=loc2[index2]+binWidth2; 
+                    if (index2==0)
+                      {curSeg2Val1=0;}
+                    else
+                      {curSeg2Val1=val2[index2-1];}
+                    curSeg2Val2 = val2[index2]; 
+                }
+              
+                res += get_segment_constrained(curSeg1Loc1, curSeg1Loc2, curSeg2Loc1, curSeg2Loc2, curSeg1Val1, curSeg1Val2, curSeg2Val1, curSeg2Val2); 
+                
+                if (index2==-1)
+                  {continue;}
                 else
-                  {curSeg2Loc2=loc2[index2+1];}
-                curSeg2Val1=val2[index2]; 
-                curSeg2Val2=val2[index2]; 
-            }
-          
-          tempStart = std::max(curSeg1Loc1,curSeg2Loc1); 
-          tempEnd   = std::min(curSeg1Loc2,curSeg2Loc2); 
-          if (tempStart<tempEnd)
-          {
-            res += get_segment_constrained(tempStart,tempEnd,curSeg1Loc1, curSeg1Loc2, curSeg2Loc1, curSeg2Loc2, curSeg1Val1, curSeg1Val2, curSeg2Val1, curSeg2Val2); 
-          }
-          if (curSeg1Loc2<curSeg2Loc1)
-          {
-            break;
+                {
+                    curSeg2Loc1=loc2[index2]+binWidth2; 
+                    if (index2==loc2.size()-1)
+                      {curSeg2Loc2=maxLoc; }
+                    else
+                      {curSeg2Loc2=loc2[index2+1];}
+                    curSeg2Val1=val2[index2]; 
+                    curSeg2Val2=val2[index2]; 
+                }
+              
+              res += get_segment_constrained(curSeg1Loc1, curSeg1Loc2, curSeg2Loc1, curSeg2Loc2, curSeg1Val1, curSeg1Val2, curSeg2Val1, curSeg2Val2); 
+              
+              if (curSeg1Loc2<curSeg2Loc1)
+              {
+                break;
+              }
           }
-      }
     }
   }
   return res;

From dc7e53dfdca3096110eca891d85919d1f99ba631 Mon Sep 17 00:00:00 2001
From: ande <ande.elliott@gmail.com>
Date: Mon, 25 Nov 2019 14:00:00 +0000
Subject: [PATCH 24/84] updated file naming

---
 src/{fastSmoothV2.cpp => fastSmoothV2_old.cpp} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename src/{fastSmoothV2.cpp => fastSmoothV2_old.cpp} (100%)

diff --git a/src/fastSmoothV2.cpp b/src/fastSmoothV2_old.cpp
similarity index 100%
rename from src/fastSmoothV2.cpp
rename to src/fastSmoothV2_old.cpp

From 9c99088e587504230f67a28e85fa300a2e4249b6 Mon Sep 17 00:00:00 2001
From: ande <ande.elliott@gmail.com>
Date: Mon, 25 Nov 2019 14:01:00 +0000
Subject: [PATCH 25/84] new fast smooth file

---
 src/fastSmoothV2.cpp | 289 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 289 insertions(+)
 create mode 100644 src/fastSmoothV2.cpp

diff --git a/src/fastSmoothV2.cpp b/src/fastSmoothV2.cpp
new file mode 100644
index 00000000..587310c2
--- /dev/null
+++ b/src/fastSmoothV2.cpp
@@ -0,0 +1,289 @@
+// Enable C++11
+// [[Rcpp::plugins(cpp11)]]
+#include <Rcpp.h>
+#include <unordered_set>
+#include <algorithm>
+#include <numeric>
+#include <math.h>
+using namespace Rcpp;
+
+//compute segment
+inline double get_segment(double start,double end,double val1_start,double val1_end,double val2_start,double val2_end)
+{
+  double length;
+  length = end - start;
+  double topTriangle;
+  double topRectangle;
+  double bottomTriangle;
+  double midPoint;
+  double midValue;
+  double res=0;
+  if (val1_start > val2_start) 
+  {
+    if (val1_end >= val2_end) 
+    {
+      // They are in the same order no bowtie
+      // seg1 is above seg2 
+      // triangle of seg1
+      topTriangle = 0.5*length*(val1_end-val1_start);
+      // rectangle between seg1 and seg2
+      topRectangle = length*(val1_start-val2_start);
+      // triangle of seg2 (to be removed)
+      bottomTriangle = 0.5*length*(val2_end-val2_start); 
+      return topTriangle+topRectangle-bottomTriangle;
+    }
+    else
+    {
+        //bowtie
+        // lets make this really simple as the compiler 
+        // will combine the expressions as needed
+        midPoint = (val1_start-val2_start)/((val2_end-val2_start) - (val1_end-val1_start));
+        midValue = val1_start + midPoint*(val1_end-val1_start);
+        midPoint = midPoint*length;
+        
+        topTriangle = 0.5*midPoint*(midValue-val1_start);
+        topRectangle = midPoint*(val1_start-val2_start);
+        bottomTriangle = 0.5*midPoint*(midValue-val2_start); 
+        
+        res = topTriangle+topRectangle-bottomTriangle;
+        
+        topTriangle = 0.5*(length-midPoint)*(val2_end-midValue);
+        topRectangle = 0; // midPoint*(val1_start-val2_start);
+        bottomTriangle = 0.5*(length - midPoint)*(val1_end - midValue); 
+        res += topTriangle+topRectangle-bottomTriangle;
+        return res;
+    }
+  }
+  else
+    {
+      if (val1_end > val2_end) 
+      {
+        //bowtie
+        // Find the point where they cross.
+        // (Solution of linear equations)
+        midPoint = (val2_start-val1_start)/((val1_end-val1_start) - (val2_end-val2_start));
+        midValue = val2_start + midPoint*(val2_end-val2_start);
+        midPoint = midPoint*length;
+        
+        topTriangle = 0.5*midPoint*(midValue-val2_start);
+        topRectangle = midPoint*(val2_start-val1_start);
+        bottomTriangle = 0.5*midPoint*(midValue-val1_start); 
+        
+        res = topTriangle+topRectangle-bottomTriangle;
+        
+        topTriangle = 0.5*(length-midPoint)*(val1_end-midValue);
+        topRectangle = 0; // midPoint*(val1_start-val2_start);
+        bottomTriangle = 0.5*(length - midPoint)*(val2_end - midValue); 
+        res += topTriangle+topRectangle-bottomTriangle;
+        return res;
+        
+      }
+      else // same order
+      {
+        // seg2 is above seg1
+        // Triangle seg2 above seg1 
+        topTriangle = 0.5*length*(val2_end-val2_start);
+        // rectangle between seg2 and seg1 
+        topRectangle = length*(val2_start-val1_start);
+        // Seg1 triangle to be removed
+        bottomTriangle = 0.5*length*(val1_end-val1_start); 
+        return topTriangle+topRectangle-bottomTriangle;
+      }
+    }
+}
+
+// cut down and compute segment
+inline double get_segment_constrained(double seg1L1, double seg1L2, double seg2L1, double seg2L2, double seg1V1, double seg1V2, double seg2V1, double seg2V2)  
+{
+    //We have a valid range
+    double valStart1, valEnd1, valStart2, valEnd2;   
+    double start,end; 
+    start = std::max(seg1L1,seg2L1); 
+    end   = std::min(seg1L2,seg2L2); 
+    if (start<end)
+    {
+      valStart1 = seg1V1 + (seg1V2-seg1V1)*(start - seg1L1)/(seg1L2 - seg1L1);
+      valEnd1   = seg1V1 + (seg1V2-seg1V1)*(end   - seg1L1)/(seg1L2 - seg1L1);
+      valStart2 = seg2V1 + (seg2V2-seg2V1)*(start - seg2L1)/(seg2L2 - seg2L1);
+      valEnd2   = seg2V1 + (seg2V2-seg2V1)*(end   - seg2L1)/(seg2L2 - seg2L1);
+      return get_segment(start,end,valStart1,valEnd1,valStart2,valEnd2);
+    }
+    else
+    {
+      return 0;
+    } 
+}
+
+
+double get_double_segment_constrained(double seg1Loc1, double seg1Loc2, double seg1Loc3, double seg1Val1, double seg1Val2, double seg2Loc1,  double seg2Loc2,  double seg2Loc3, double seg2Val1, double seg2Val2)
+{
+    double res=0;
+  
+    // compare the linear section with the linear section
+    res += get_segment_constrained(seg1Loc1, seg1Loc2, seg2Loc1, seg2Loc2, seg1Val1, seg1Val2, seg2Val1, seg2Val2); 
+    
+    // compare the linear section with the flat section
+    // This could be easily special cased (saving ~1 if statements ).
+    res += get_segment_constrained(seg1Loc1, seg1Loc2, seg2Loc2, seg2Loc3, seg1Val1, seg1Val2, seg2Val2, seg2Val2); 
+    
+    // compare the flat section with the linear section
+    // This could be easily special cased (saving ~1 if statements ).
+    res += get_segment_constrained(seg1Loc2, seg1Loc3, seg2Loc1, seg2Loc2, seg1Val2, seg1Val2, seg2Val1, seg2Val2); 
+    
+    // compare the flat section with the linear section
+    // This could be easily special cased (saving ~2 if statements ).
+    res += get_segment_constrained(seg1Loc2, seg1Loc3, seg2Loc2, seg2Loc3, seg1Val2, seg1Val2, seg2Val2, seg2Val2); 
+    return res;
+}
+
+
+
+
+
+//' @title
+//' Compute EMD
+////'
+////' @param loc1 numeric vector.
+////' @param val1 numeric vector.
+////' @param loc2 numeric vector.
+////' @param val2 numeric vector.
+//'
+//' @export
+// [[Rcpp::export]]
+double NetEmdSmoothV2(NumericVector loc1,NumericVector val1,double binWidth1,NumericVector loc2,NumericVector val2,double binWidth2)
+{
+  int index1,index2;
+  
+  double res=0;
+  
+  // Hist 1
+  double curSeg1Loc1; 
+  double curSeg1Loc2; 
+  double curSeg1Loc3; 
+  double curSeg1Val1; 
+  double curSeg1Val2; 
+  
+  // Hist 1
+  double curSeg2Loc1; 
+  double curSeg2Loc2; 
+  double curSeg2Loc3; 
+  double curSeg2Val1; 
+  double curSeg2Val2; 
+ 
+  
+  double tempDouble;
+  // need to iterate through regions of constant gradient
+  double secondStart=0;
+  
+  double maxLoc = std::max(loc1[loc1.size()-1] +binWidth1,loc2[loc2.size()-1]+binWidth2 );
+  double minLoc = std::min(loc1[0],loc2[0]);
+  
+  // warning index1==-1 case is not well tested
+  // As values outside of the range appear to be zero
+  
+  // Dealing with segments which are to the left of the region covered by both
+  // segs
+  if (loc2[0]<loc1[0])
+  {
+      // loc2 starts before loc1 so lets deal with those segments first  
+      // Fix the position of Seg1 and then interate over Seg2 until we have all
+      // of the segments of Seg2 before Seg1 starts.
+      curSeg1Loc1=minLoc;
+      curSeg1Loc2=loc1[0]; 
+      curSeg1Loc3=loc1[0]; 
+      curSeg1Val1=0;
+      curSeg1Val2=0;
+      curSeg2Val2=0;
+      for (index2=0;index2<loc2.size();index2++) 
+      {
+            curSeg2Loc1=loc2[index2]; 
+            curSeg2Loc2=loc2[index2]+binWidth2; 
+            if (index2==loc2.size()-1)
+              { curSeg2Loc3=maxLoc; }
+            else
+              { curSeg2Loc3=loc2[index2+1]; }
+            
+            curSeg2Val1 = curSeg2Val2; 
+            curSeg2Val2 = val2[index2]; 
+            tempDouble = get_double_segment_constrained(curSeg1Loc1,curSeg1Loc2,curSeg1Loc3,curSeg1Val1,curSeg1Val2,curSeg2Loc1,curSeg2Loc2,curSeg2Loc3,curSeg2Val1,curSeg2Val2);
+            res+= tempDouble;
+            if (curSeg1Loc1>curSeg2Loc3)
+              {break;}
+      }
+  }
+  else
+  {
+      // loc2 starts before loc1 so lets deal with those segments first  
+      // Fix the position of Seg2 and then interate over Seg1 until we have all
+      // of the segments of Seg1 before Seg2 starts.
+      curSeg2Loc1=minLoc;
+      curSeg2Loc2=loc2[0]; 
+      curSeg2Loc3=loc2[0]; 
+      curSeg2Val1=0;
+      curSeg2Val2=0;
+      curSeg1Val2=0;
+      for (index1=0;index1<loc1.size();index1++) 
+      {
+          curSeg1Loc1=loc1[index1]; 
+          curSeg1Loc2=loc1[index1]+binWidth1; 
+          if (index1==loc1.size()-1)
+            {curSeg1Loc3=maxLoc;}
+          else
+            {curSeg1Loc3=loc1[index1+1];}
+          curSeg1Val1 = curSeg1Val2;
+          curSeg1Val2 = val1[index1]; 
+          res += get_double_segment_constrained(curSeg1Loc1,curSeg1Loc2,curSeg1Loc3,curSeg1Val1,curSeg1Val2,curSeg2Loc1,curSeg2Loc2,curSeg2Loc3,curSeg2Val1,curSeg2Val2);
+          if (curSeg2Loc1>curSeg1Loc3)
+            {break;}
+      }
+  }
+  // Add both the  overlapping sections and the non overlapping section on the right
+  for (index1=0;index1<loc1.size();index1++) 
+  {
+        //  Get the three relevant locations
+        // Start; end of linear section; end of flat section 
+        curSeg1Loc1=loc1[index1]; 
+        curSeg1Loc2=loc1[index1]+binWidth1; 
+        // could pull this check outside of the loop with final case not sure if worth it
+        if (index1==loc1.size()-1)
+          {curSeg1Loc3=maxLoc; }
+        else
+          {curSeg1Loc3=loc1[index1+1];}
+        // Update value to the start and end of the current section
+        curSeg1Val1=curSeg1Val2;
+        curSeg1Val2=val1[index1]; 
+        // Setting up the previous value for the next loop
+        // Could replace this loop with a while 
+        // but if so would need to be careful about overlaps
+        if (secondStart==0)
+          {curSeg2Val2=0;}
+        else
+          { curSeg2Val2=val2[secondStart-1]; }
+        for (index2=secondStart;index2<loc2.size();index2++) 
+        {
+            // Construct 3 sections for second seg.
+            curSeg2Loc1=loc2[index2]; 
+            curSeg2Loc2=loc2[index2]+binWidth2; 
+            if (index2==loc2.size()-1)
+              { curSeg2Loc3=maxLoc; }
+            else
+              { curSeg2Loc3=loc2[index2+1]; }
+            //update values
+            curSeg2Val1=curSeg2Val2;
+            curSeg2Val2=val2[index2]; 
+            curSeg2Val2 = val2[index2]; 
+            // If this section is behind Seg1 
+            // Do not consider again
+            if (curSeg2Loc3<curSeg1Loc1)
+            {
+               secondStart=index2+1;  
+              continue;
+            }
+            // If current Seg2 is beyond Seg1 break out of loop
+            if (curSeg2Loc1>curSeg1Loc3)
+              {break;}
+            res += get_double_segment_constrained(curSeg1Loc1,curSeg1Loc2,curSeg1Loc3,curSeg1Val1,curSeg1Val2,curSeg2Loc1,curSeg2Loc2,curSeg2Loc3,curSeg2Val1,curSeg2Val2);
+        }
+  }
+  return res;
+}

From 10d7de8a6453d61d3aff636376835ccc7b96b8d3 Mon Sep 17 00:00:00 2001
From: ande <ande.elliott@gmail.com>
Date: Mon, 25 Nov 2019 14:01:51 +0000
Subject: [PATCH 26/84] changed name of old version

---
 src/fastSmoothV2_old.cpp      | 42 +++++++++++++++++++----
 tests/testthat/test_fastEMD.R | 64 ++++++++++++++++++++++++++++-------
 2 files changed, 87 insertions(+), 19 deletions(-)

diff --git a/src/fastSmoothV2_old.cpp b/src/fastSmoothV2_old.cpp
index 46166ed5..8eb72d8c 100644
--- a/src/fastSmoothV2_old.cpp
+++ b/src/fastSmoothV2_old.cpp
@@ -118,7 +118,7 @@ inline double get_segment_constrained(double seg1L1, double seg1L2, double seg2L
 //'
 //' @export
 // [[Rcpp::export]]
-double NetEmdSmoothV2(NumericVector loc1,NumericVector val1,double binWidth1,NumericVector loc2,NumericVector val2,double binWidth2)
+double NetEmdSmoothV2_old(NumericVector loc1,NumericVector val1,double binWidth1,NumericVector loc2,NumericVector val2,double binWidth2)
 {
   int index1,index2;
   
@@ -138,8 +138,6 @@ double NetEmdSmoothV2(NumericVector loc1,NumericVector val1,double binWidth1,Num
   
   // need to iterate through regions of constant gradient
   int i123;
-  double tempStart;
-  double tempEnd;
   double secondStart=-1;
   
   double maxLoc = std::max(loc1[loc1.size()-1] +binWidth1,loc2[loc2.size()-1]+binWidth2 );
@@ -178,9 +176,41 @@ double NetEmdSmoothV2(NumericVector loc1,NumericVector val1,double binWidth1,Num
             if (curSeg1Loc2<curSeg2Loc1)
               {break;}
         }
-    }
-    
-  
+  }
+  else
+  {
+    curSeg2Loc1=minLoc; 
+    curSeg2Loc2=loc2[0]; 
+    curSeg2Val1=0;
+    curSeg2Val2=0;
+    for (index1=0;index1<loc1.size();index1++) 
+    {
+            curSeg1Loc1=loc1[index1]; 
+            curSeg1Loc2=loc1[index1]+binWidth2; 
+            if (index1==0)
+              {curSeg1Val1=0;}
+            else
+              {curSeg1Val1=val1[index1-1];}
+            curSeg1Val2 = val1[index1]; 
+            
+            // add this segment 
+            res += get_segment_constrained(curSeg1Loc1, curSeg1Loc2, curSeg2Loc1, curSeg2Loc2, curSeg1Val1, curSeg1Val2, curSeg2Val1, curSeg2Val2); 
+            
+            curSeg1Loc1=loc1[index1]+binWidth1; 
+            if (index1==loc1.size()-1)
+              {curSeg1Loc2= maxLoc;}
+            else
+              {curSeg1Loc2=loc1[index1+1];}
+            curSeg1Val1=val1[index1]; 
+            curSeg1Val2=val1[index1]; 
+        
+            res += get_segment_constrained(curSeg1Loc1, curSeg1Loc2, curSeg2Loc1, curSeg2Loc2, curSeg1Val1, curSeg1Val2, curSeg2Val1, curSeg2Val2); 
+            
+            if (curSeg2Loc2<curSeg1Loc1)
+              {break;}
+        }
+  }
+  secondStart = 0; 
   for (index1=0;index1<loc1.size();index1++) 
   {
       // loop over the linear and constant section of segment 
diff --git a/tests/testthat/test_fastEMD.R b/tests/testthat/test_fastEMD.R
index 8af30326..e062ec4a 100644
--- a/tests/testthat/test_fastEMD.R
+++ b/tests/testthat/test_fastEMD.R
@@ -1,6 +1,7 @@
 context("Name of test context")
 
 library(Rcpp)
+
 makeFunction <- function(x1,v1,w1,x2,v2,w2)
 {
   f1 <- function(x)
@@ -74,9 +75,18 @@ makeFunction <- function(x1,v1,w1,x2,v2,w2)
   f1
 }
 
+getVal <- function(x1,v1,w1,x2,v2,w2)
+{
+  f1 <- makeFunction(x1,v1,w1,x2,v2,w2)
+  mi123 = min(min(x1),min(x2))
+  ma123 = max(max(x1)+w1,max(x2)+w2)
+  integrate(f1,mi123,ma123)
+}
+
+
 test_that("3 element test", {
   
-            sourceCpp("~/Documents/network-comparison/src/fastSmoothV2.cpp")
+            sourceCpp("~/Documents/network-comparison/src/fastSmoothV3.cpp")
             for (w1 in (1:10)/10.0)
             {
               for (w2 in (1:10)/10.0)
@@ -98,7 +108,7 @@ test_that("3 element test", {
 
 test_that("2 element test w1=0.1, w2=0.2", {
   
-  sourceCpp("~/Documents/network-comparison/src/fastSmoothV2.cpp")
+  sourceCpp("~/Documents/network-comparison/src/fastSmoothV3.cpp")
   w1 = 0.1
   w2 = 0.2
   x1 <- c(1,2)
@@ -154,7 +164,7 @@ test_that("1 element vs many test Mixture", {
 
 
 test_that("3 element test Mixture", { 
-            sourceCpp("~/Documents/network-comparison/src/fastSmoothV2.cpp")
+            sourceCpp("~/Documents/network-comparison/src/fastSmoothV3.cpp")
             for (w1 in (1:10)/10.0)
             {
               for (w2 in (1:10)/10.0)
@@ -175,7 +185,7 @@ test_that("3 element test Mixture", {
             })
 
 test_that("3 element test Mixture MidPoint", { 
-            sourceCpp("~/Documents/network-comparison/src/fastSmoothV2.cpp")
+            sourceCpp("~/Documents/network-comparison/src/fastSmoothV3.cpp")
             w1 = 1
             w2 = 1
             for (v1_2 in (1:10)/10.0)
@@ -199,7 +209,7 @@ test_that("3 element test Mixture MidPoint", {
 
 
 test_that("3 element test Mixture StartPoint", { 
-            sourceCpp("~/Documents/network-comparison/src/fastSmoothV2.cpp")
+            sourceCpp("~/Documents/network-comparison/src/fastSmoothV3.cpp")
             w1 = 1
             w2 = 1
             for (v1_1 in (1:5)/10.0)
@@ -223,7 +233,7 @@ test_that("3 element test Mixture StartPoint", {
 
 
 test_that("3 element test Mixture StartLoc", { 
-            sourceCpp("~/Documents/network-comparison/src/fastSmoothV2.cpp")
+            sourceCpp("~/Documents/network-comparison/src/fastSmoothV3.cpp")
             w1 = 1
             w2 = 1
             for (x1_1 in (1:9)/10.0)
@@ -247,7 +257,7 @@ test_that("3 element test Mixture StartLoc", {
 
 
 test_that("many element test Mixture ", { 
-            sourceCpp("~/Documents/network-comparison/src/fastSmoothV2.cpp")
+            sourceCpp("~/Documents/network-comparison/src/fastSmoothV3.cpp")
             w1 = 1
             w2 = 1
             for (i in (2:10)*1)
@@ -274,10 +284,10 @@ test_that("many element test Mixture ", {
                     res2 <- res2 + integrate(f1,bottom1,top1,abs.tol=0.000000001,subdivisions = 100000000)[[1]]
                     
                     res1 <- NetEmdSmoothV2(x1,v1,w1,x2,v2,w2)
-                    if (abs(res1-res2)>0.001)
-                    {
-                      browser()
-                    }
+                    #  if (abs(res1-res2)>0.001)
+                    #  {
+                    #    browser()
+                    #  }
                     # Swapped to percentage error
                     expect_lt(abs(res1-res2),10**(-3))
                     
@@ -290,7 +300,7 @@ test_that("many element test Mixture ", {
 
 test_that("3 element test w1=0.1, w2=0.2", {
   
-           sourceCpp("~/Documents/network-comparison/src/fastSmoothV2.cpp")
+           sourceCpp("~/Documents/network-comparison/src/fastSmoothV3.cpp")
   w1 = 0.1
   w2 = 0.2
               x1 <- c(1,2,3)
@@ -318,4 +328,32 @@ test_that("Old failure case", {
   sq2 <- net_emd_single_pair(d1,d2,method='optimiseRonly',smoothing_window_width = 1)
   expect_lt(abs(sq1$min_emd-sq2$min_emd),10**(-4))
 })
-  
\ No newline at end of file
+
+
+test_that("Old failure case 2", {
+  x1 = c(0.2862492, 0.6917626)
+  v1 = c(0.6519357, 1.0000000)
+  w1 = 0.2027567
+  x2 = c(0.9990626, 1.4882579)
+  v2 = c(0.6519357, 1.0000000)
+  w2 =  0.2445976
+  f1 <- makeFunction(x1,v1,w1,x2,v2,w2)
+  res1 <- integrate(f1,min(min(x1),min(x2)),max(max(x1),max(x2))+max(w1,w2))[[1]]
+  res2 <- NetEmdSmoothV2(x1,v1,w1,x2,v2,w2)
+  expect_lt(abs(res2-res1),10**(-4))
+})
+  
+
+
+test_that("Old Failure Case 2 reverse", {
+  x2 = c(0.2862492, 0.6917626)
+  v2 = c(0.6519357, 1.0000000)
+  w2 = 0.2027567
+  x1 = c(0.9990626, 1.4882579)
+  v1 = c(0.6519357, 1.0000000)
+  w1 =  0.2445976
+  f1 <- makeFunction(x1,v1,w1,x2,v2,w2)
+  res1 <- integrate(f1,min(min(x1),min(x2)),max(max(x1),max(x2))+max(w1,w2))[[1]]
+  res2 <- NetEmdSmoothV2(x1,v1,w1,x2,v2,w2)
+  expect_lt(abs(res2-res1),10**(-4))
+})
\ No newline at end of file

From cc76a3bf10ff10326a66e870dfb95312738a4296 Mon Sep 17 00:00:00 2001
From: ande <ande.elliott@gmail.com>
Date: Mon, 25 Nov 2019 14:46:21 +0000
Subject: [PATCH 27/84] removed sourceCpp

---
 tests/testthat/test_fastEMD.R | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/tests/testthat/test_fastEMD.R b/tests/testthat/test_fastEMD.R
index e062ec4a..459b7aec 100644
--- a/tests/testthat/test_fastEMD.R
+++ b/tests/testthat/test_fastEMD.R
@@ -86,7 +86,6 @@ getVal <- function(x1,v1,w1,x2,v2,w2)
 
 test_that("3 element test", {
   
-            sourceCpp("~/Documents/network-comparison/src/fastSmoothV3.cpp")
             for (w1 in (1:10)/10.0)
             {
               for (w2 in (1:10)/10.0)
@@ -108,7 +107,6 @@ test_that("3 element test", {
 
 test_that("2 element test w1=0.1, w2=0.2", {
   
-  sourceCpp("~/Documents/network-comparison/src/fastSmoothV3.cpp")
   w1 = 0.1
   w2 = 0.2
   x1 <- c(1,2)
@@ -164,7 +162,6 @@ test_that("1 element vs many test Mixture", {
 
 
 test_that("3 element test Mixture", { 
-            sourceCpp("~/Documents/network-comparison/src/fastSmoothV3.cpp")
             for (w1 in (1:10)/10.0)
             {
               for (w2 in (1:10)/10.0)
@@ -185,7 +182,6 @@ test_that("3 element test Mixture", {
             })
 
 test_that("3 element test Mixture MidPoint", { 
-            sourceCpp("~/Documents/network-comparison/src/fastSmoothV3.cpp")
             w1 = 1
             w2 = 1
             for (v1_2 in (1:10)/10.0)
@@ -209,7 +205,6 @@ test_that("3 element test Mixture MidPoint", {
 
 
 test_that("3 element test Mixture StartPoint", { 
-            sourceCpp("~/Documents/network-comparison/src/fastSmoothV3.cpp")
             w1 = 1
             w2 = 1
             for (v1_1 in (1:5)/10.0)
@@ -233,7 +228,6 @@ test_that("3 element test Mixture StartPoint", {
 
 
 test_that("3 element test Mixture StartLoc", { 
-            sourceCpp("~/Documents/network-comparison/src/fastSmoothV3.cpp")
             w1 = 1
             w2 = 1
             for (x1_1 in (1:9)/10.0)
@@ -257,7 +251,6 @@ test_that("3 element test Mixture StartLoc", {
 
 
 test_that("many element test Mixture ", { 
-            sourceCpp("~/Documents/network-comparison/src/fastSmoothV3.cpp")
             w1 = 1
             w2 = 1
             for (i in (2:10)*1)
@@ -300,7 +293,6 @@ test_that("many element test Mixture ", {
 
 test_that("3 element test w1=0.1, w2=0.2", {
   
-           sourceCpp("~/Documents/network-comparison/src/fastSmoothV3.cpp")
   w1 = 0.1
   w2 = 0.2
               x1 <- c(1,2,3)

From aa1f6e82496f2c14317bf3d318c3b2b416bdcde9 Mon Sep 17 00:00:00 2001
From: ande <ande.elliott@gmail.com>
Date: Mon, 25 Nov 2019 15:24:40 +0000
Subject: [PATCH 28/84] fixed introduced bug

---
 src/fastSmoothV2.cpp          |  8 ++++++--
 tests/testthat/test_fastEMD.R | 10 ----------
 2 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/src/fastSmoothV2.cpp b/src/fastSmoothV2.cpp
index 587310c2..b87cf747 100644
--- a/src/fastSmoothV2.cpp
+++ b/src/fastSmoothV2.cpp
@@ -237,7 +237,12 @@ double NetEmdSmoothV2(NumericVector loc1,NumericVector val1,double binWidth1,Num
             {break;}
       }
   }
+  std::cout << "res = " << res <<"\n";
   // Add both the  overlapping sections and the non overlapping section on the right
+  // Note we reiterate over the first few sections loc1
+  // Could store where we are upto from above to save time
+  // Reset Val counter
+  curSeg1Val2=0;
   for (index1=0;index1<loc1.size();index1++) 
   {
         //  Get the three relevant locations
@@ -270,7 +275,6 @@ double NetEmdSmoothV2(NumericVector loc1,NumericVector val1,double binWidth1,Num
               { curSeg2Loc3=loc2[index2+1]; }
             //update values
             curSeg2Val1=curSeg2Val2;
-            curSeg2Val2=val2[index2]; 
             curSeg2Val2 = val2[index2]; 
             // If this section is behind Seg1 
             // Do not consider again
@@ -280,9 +284,9 @@ double NetEmdSmoothV2(NumericVector loc1,NumericVector val1,double binWidth1,Num
               continue;
             }
             // If current Seg2 is beyond Seg1 break out of loop
+            res += get_double_segment_constrained(curSeg1Loc1,curSeg1Loc2,curSeg1Loc3,curSeg1Val1,curSeg1Val2,curSeg2Loc1,curSeg2Loc2,curSeg2Loc3,curSeg2Val1,curSeg2Val2);
             if (curSeg2Loc1>curSeg1Loc3)
               {break;}
-            res += get_double_segment_constrained(curSeg1Loc1,curSeg1Loc2,curSeg1Loc3,curSeg1Val1,curSeg1Val2,curSeg2Loc1,curSeg2Loc2,curSeg2Loc3,curSeg2Val1,curSeg2Val2);
         }
   }
   return res;
diff --git a/tests/testthat/test_fastEMD.R b/tests/testthat/test_fastEMD.R
index 459b7aec..cd091401 100644
--- a/tests/testthat/test_fastEMD.R
+++ b/tests/testthat/test_fastEMD.R
@@ -94,7 +94,6 @@ test_that("3 element test", {
                 v1 <- c(0.25,0.70,1.00)
                 x2 <- c(1,2,3)
                 v2 <- c(0.25,0.70,1.00)
-                print(x1)
                 f1 <- makeFunction(x1,v1,w1,x2,v2,w2)
                 res2 <- integrate(f1,0,max(x2[3],x1[3])+max(w1,w2),abs.tol=0.000000001)[[1]]
                 
@@ -113,7 +112,6 @@ test_that("2 element test w1=0.1, w2=0.2", {
   v1 <- c(0.25,0.75)
   x2 <- c(1,2)
   v2 <- c(0.5,1.00)
-  print(x1)
   f1 <- makeFunction(x1,v1,w1,x2,v2,w2)
   res2 <- integrate(f1,0,max(x2[2],x1[2])+max(w1,w2),abs.tol=0.000000001)[[1]]
   res1 <- NetEmdSmoothV2(x1,v1,w1,x2,v2,w2)
@@ -130,7 +128,6 @@ test_that("1 element at 0 vs many test Mixture", {
       v1 <- c(1.00)
       x2 <- 1:w1
       v2 <- (1:w1)/w1
-      print(x1)
       f1 <- makeFunction(x1,v1,1,x2,v2,1)
       res2 <- integrate(f1,0,w1+1,abs.tol=0.000000001)[[1]]
       
@@ -149,7 +146,6 @@ test_that("1 element vs many test Mixture", {
       v1 <- c(1.00)
       x2 <- 1:w1
       v2 <- (1:w1)/w1
-      print(x1)
       f1 <- makeFunction(x1,v1,1,x2,v2,1)
       res2 <- integrate(f1,0,w1+1,abs.tol=0.000000001)[[1]]
       
@@ -170,7 +166,6 @@ test_that("3 element test Mixture", {
                 v1 <- c(0.65,0.70,1.00)
                 x2 <- c(1,2,3)
                 v2 <- c(0.25,0.70,1.00)
-                print(x1)
                 f1 <- makeFunction(x1,v1,w1,x2,v2,w2)
                 res2 <- integrate(f1,0,max(x2[3],x1[3])+max(w1,w2),abs.tol=0.000000001)[[1]]
                 
@@ -192,7 +187,6 @@ test_that("3 element test Mixture MidPoint", {
                 v1 <- c(0.1,v1_2,1.00)
                 x2 <- c(1,2,3)
                 v2 <- c(0.1,v2_2,1.00)
-                print(x1)
                 f1 <- makeFunction(x1,v1,w1,x2,v2,w2)
                 res2 <- integrate(f1,0,max(x2[3],x1[3])+max(w1,w2),abs.tol=0.000000001)[[1]]
                 
@@ -215,7 +209,6 @@ test_that("3 element test Mixture StartPoint", {
                 v1 <- c(v1_1,0.5,1.00)
                 x2 <- c(1,2,3)
                 v2 <- c(v2_1,0.5,1.00)
-                print(x1)
                 f1 <- makeFunction(x1,v1,w1,x2,v2,w2)
                 res2 <- integrate(f1,0,max(x2[3],x1[3])+max(w1,w2),abs.tol=0.000000001)[[1]]
                 
@@ -238,7 +231,6 @@ test_that("3 element test Mixture StartLoc", {
                 v1 <- c(0.25,0.5,1.00)
                 x2 <- c(x2_1,2,4)
                 v2 <- c(0.3,0.5,1.00)
-                print(x1)
                 f1 <- makeFunction(x1,v1,w1,x2,v2,w2)
                 res2 <- integrate(f1,0,max(x2[3],x1[3])+max(w1,w2),abs.tol=0.000000001)[[1]]
                 
@@ -269,7 +261,6 @@ test_that("many element test Mixture ", {
                     w2 <- min(diff(x2))/y123
                     v2 <- cumsum(abs(rnorm(j)))
                     v2 = v2/v2[length(v2)]
-                    print(x1)
                     f1 <- makeFunction(x1,v1,w1,x2,v2,w2)
                     top1 = max(x2[length(x2)],x1[length(x1)])+max(w1,w2)
                     bottom1 = min(x2[1],x1[1])
@@ -299,7 +290,6 @@ test_that("3 element test w1=0.1, w2=0.2", {
               v1 <- c(0.25,0.70,1.00)
               x2 <- c(1,2,3)
               v2 <- c(0.25,0.70,1.00)
-              print(x1)
               f1 <- makeFunction(x1,v1,w1,x2,v2,w2)
               res2 <- integrate(f1,0,max(x2[3],x1[3])+max(w1,w2),abs.tol=0.0000000001)[[1]]
               res1 <- NetEmdSmoothV2(x1,v1,w1,x2,v2,w2)

From 6f7a515955db3a929ddb718437aa1b6c95d1e31a Mon Sep 17 00:00:00 2001
From: ande <ande.elliott@gmail.com>
Date: Tue, 26 Nov 2019 12:21:13 +0000
Subject: [PATCH 29/84] updated the code

---
 R/emd.R              | 24 +++++++----------------
 src/fastSmoothV2.cpp | 45 ++++++++++++++++++++++----------------------
 2 files changed, 29 insertions(+), 40 deletions(-)

diff --git a/R/emd.R b/R/emd.R
index 3d486c81..99f6ec8d 100644
--- a/R/emd.R
+++ b/R/emd.R
@@ -91,27 +91,16 @@ min_emd_optimise_fast <- function(dhist1, dhist2) {
     binWidth1=dhist1$smoothing_window_width
     binWidth2=dhist2$smoothing_window_width
     count=0
+    # Offset the histograms to make the alignments work
+    loc1Mod <- loc1 - binWidth1/2 
+    loc2Mod <- loc2 - binWidth2/2 
     # Determine minimum and maximum offset of range in which histograms overlap
     # (based on sliding histogram 1)
-    min_offset <- min(dhist2$locations) - max(dhist1$locations) - max(binWidth1,binWidth2)
-    max_offset <- max(dhist2$locations) - min(dhist1$locations) + max(binWidth1,binWidth2)
+    min_offset <- min(loc2Mod) - max(loc1Mod) - max(binWidth1,binWidth2)
+    max_offset <- max(loc2Mod) - min(loc1Mod) + max(binWidth1,binWidth2)
     # Set lower and upper range for optimise algorithm to be somewhat wider than
     # range defined by the minimum and maximum offset. This guards against a
-    # couple of issues that arise if the optimise range is exactly min_offset 
-    # to max_offset
-    # 1) If lower and upper are equal, the optimise method will throw and error
-    # 2) It seems that optimise is not guaranteed to explore its lower and upper
-    #    bounds, even in the case where one of them is the offset with minimum
-    #    EMD
-    buffer <- 0.1
-    min_offset <- min_offset - buffer
-    max_offset <- max_offset + buffer
-    
-    # Define a single parameter function to minimise emd as a function of offset
-    emd_offset <- function(offset) {
-      temp1<- NetEmdSmoothV2(loc1+offset,val1,binWidth1,loc2,val2,binWidth2)
-      temp1
-    }
+    # couple of issues that arise if the op
     # Get solution from optimiser
     soln <- stats::optimise(emd_offset, lower = min_offset, upper = max_offset, 
                             tol = .Machine$double.eps*1000)
@@ -165,6 +154,7 @@ min_emd_optimise <- function(dhist1, dhist2) {
   soln <- stats::optimise(emd_offset, lower = min_offset, upper = max_offset, 
                           tol = .Machine$double.eps*1000)
   
+  browser()
   # Return mnimum EMD and associated offset
   min_emd <- soln$objective
   min_offset <- soln$minimum
diff --git a/src/fastSmoothV2.cpp b/src/fastSmoothV2.cpp
index b87cf747..07491846 100644
--- a/src/fastSmoothV2.cpp
+++ b/src/fastSmoothV2.cpp
@@ -130,7 +130,7 @@ double get_double_segment_constrained(double seg1Loc1, double seg1Loc2, double s
     // This could be easily special cased (saving ~1 if statements ).
     res += get_segment_constrained(seg1Loc2, seg1Loc3, seg2Loc1, seg2Loc2, seg1Val2, seg1Val2, seg2Val1, seg2Val2); 
     
-    // compare the flat section with the linear section
+    // compare the flat section with the flat section
     // This could be easily special cased (saving ~2 if statements ).
     res += get_segment_constrained(seg1Loc2, seg1Loc3, seg2Loc2, seg2Loc3, seg1Val2, seg1Val2, seg2Val2, seg2Val2); 
     return res;
@@ -157,28 +157,28 @@ double NetEmdSmoothV2(NumericVector loc1,NumericVector val1,double binWidth1,Num
   double res=0;
   
   // Hist 1
-  double curSeg1Loc1; 
-  double curSeg1Loc2; 
-  double curSeg1Loc3; 
-  double curSeg1Val1; 
-  double curSeg1Val2; 
+  double curSeg1Loc1; // Start of the gradient section in Seg1
+  double curSeg1Loc2; // End of the gradient section in Seg1
+  double curSeg1Loc3; // End of the flat section in Seg1
+  double curSeg1Val1; // Start value in Seg1
+  double curSeg1Val2; // End value in Seg1
   
-  // Hist 1
-  double curSeg2Loc1; 
-  double curSeg2Loc2; 
-  double curSeg2Loc3; 
-  double curSeg2Val1; 
-  double curSeg2Val2; 
+  // Hist 2
+  double curSeg2Loc1; // Start of the gradient section in Seg2
+  double curSeg2Loc2; // End of the gradient section in Seg2
+  double curSeg2Loc3; // End of the flat section in Seg2
+  double curSeg2Val1; // Start value in Seg2
+  double curSeg2Val2; // End value in Seg2
  
   
-  double tempDouble;
-  // need to iterate through regions of constant gradient
+  // Starting index for the second histogram
   double secondStart=0;
-  
+ 
+  // Smallest and largest location values 
   double maxLoc = std::max(loc1[loc1.size()-1] +binWidth1,loc2[loc2.size()-1]+binWidth2 );
   double minLoc = std::min(loc1[0],loc2[0]);
   
-  // warning index1==-1 case is not well tested
+  // warning area before loc2[0] is not well tested
   // As values outside of the range appear to be zero
   
   // Dealing with segments which are to the left of the region covered by both
@@ -189,10 +189,11 @@ double NetEmdSmoothV2(NumericVector loc1,NumericVector val1,double binWidth1,Num
       // Fix the position of Seg1 and then interate over Seg2 until we have all
       // of the segments of Seg2 before Seg1 starts.
       curSeg1Loc1=minLoc;
-      curSeg1Loc2=loc1[0]; 
+      curSeg1Loc2=minLoc; 
       curSeg1Loc3=loc1[0]; 
       curSeg1Val1=0;
       curSeg1Val2=0;
+      // Set this value so we can update in the loop 
       curSeg2Val2=0;
       for (index2=0;index2<loc2.size();index2++) 
       {
@@ -202,11 +203,9 @@ double NetEmdSmoothV2(NumericVector loc1,NumericVector val1,double binWidth1,Num
               { curSeg2Loc3=maxLoc; }
             else
               { curSeg2Loc3=loc2[index2+1]; }
-            
             curSeg2Val1 = curSeg2Val2; 
             curSeg2Val2 = val2[index2]; 
-            tempDouble = get_double_segment_constrained(curSeg1Loc1,curSeg1Loc2,curSeg1Loc3,curSeg1Val1,curSeg1Val2,curSeg2Loc1,curSeg2Loc2,curSeg2Loc3,curSeg2Val1,curSeg2Val2);
-            res+= tempDouble;
+            res+= get_double_segment_constrained(curSeg1Loc1,curSeg1Loc2,curSeg1Loc3,curSeg1Val1,curSeg1Val2,curSeg2Loc1,curSeg2Loc2,curSeg2Loc3,curSeg2Val1,curSeg2Val2);
             if (curSeg1Loc1>curSeg2Loc3)
               {break;}
       }
@@ -217,10 +216,11 @@ double NetEmdSmoothV2(NumericVector loc1,NumericVector val1,double binWidth1,Num
       // Fix the position of Seg2 and then interate over Seg1 until we have all
       // of the segments of Seg1 before Seg2 starts.
       curSeg2Loc1=minLoc;
-      curSeg2Loc2=loc2[0]; 
+      curSeg2Loc2=minLoc; 
       curSeg2Loc3=loc2[0]; 
       curSeg2Val1=0;
       curSeg2Val2=0;
+      // Set this value so we can update in the lopp 
       curSeg1Val2=0;
       for (index1=0;index1<loc1.size();index1++) 
       {
@@ -237,7 +237,6 @@ double NetEmdSmoothV2(NumericVector loc1,NumericVector val1,double binWidth1,Num
             {break;}
       }
   }
-  std::cout << "res = " << res <<"\n";
   // Add both the  overlapping sections and the non overlapping section on the right
   // Note we reiterate over the first few sections loc1
   // Could store where we are upto from above to save time
@@ -285,7 +284,7 @@ double NetEmdSmoothV2(NumericVector loc1,NumericVector val1,double binWidth1,Num
             }
             // If current Seg2 is beyond Seg1 break out of loop
             res += get_double_segment_constrained(curSeg1Loc1,curSeg1Loc2,curSeg1Loc3,curSeg1Val1,curSeg1Val2,curSeg2Loc1,curSeg2Loc2,curSeg2Loc3,curSeg2Val1,curSeg2Val2);
-            if (curSeg2Loc1>curSeg1Loc3)
+            if (curSeg2Loc3>curSeg1Loc3)
               {break;}
         }
   }

From 8000fa1fc6afda32899f4978f0725c693e7f9c79 Mon Sep 17 00:00:00 2001
From: ande <ande.elliott@gmail.com>
Date: Tue, 26 Nov 2019 13:28:18 +0000
Subject: [PATCH 30/84] replace missing file

---
 R/emd.R | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/R/emd.R b/R/emd.R
index 99f6ec8d..e9eee6f3 100644
--- a/R/emd.R
+++ b/R/emd.R
@@ -100,7 +100,20 @@ min_emd_optimise_fast <- function(dhist1, dhist2) {
     max_offset <- max(loc2Mod) - min(loc1Mod) + max(binWidth1,binWidth2)
     # Set lower and upper range for optimise algorithm to be somewhat wider than
     # range defined by the minimum and maximum offset. This guards against a
-    # couple of issues that arise if the op
+    # couple of issues that arise if the optimise range is exactly min_offset 
+    # to max_offset
+    # 1) If lower and upper are equal, the optimise method will throw and error
+    # 2) It seems that optimise is not guaranteed to explore its lower and upper
+    #    bounds, even in the case where one of them is the offset with minimum
+    #    EMD
+    buffer <- 0.1
+    min_offset <- min_offset - buffer
+    max_offset <- max_offset + buffer
+    # Define a single parameter function to minimise emd as a function of offset
+    emd_offset <- function(offset) {
+      temp1<- NetEmdSmoothV2(loc1Mod+offset,val1,binWidth1,loc2Mod,val2,binWidth2)
+      temp1
+    }
     # Get solution from optimiser
     soln <- stats::optimise(emd_offset, lower = min_offset, upper = max_offset, 
                             tol = .Machine$double.eps*1000)

From 1f5d9d7c57f069ebe65dfd46110c7a5c4b041240 Mon Sep 17 00:00:00 2001
From: Oliver Strickson <ostrickson@turing.ac.uk>
Date: Fri, 17 Apr 2020 07:19:21 +0100
Subject: [PATCH 31/84] Formatting and whitespace

---
 src/fastSmoothV2.cpp | 441 +++++++++++++++++++++++--------------------
 1 file changed, 235 insertions(+), 206 deletions(-)

diff --git a/src/fastSmoothV2.cpp b/src/fastSmoothV2.cpp
index 07491846..242a6b8a 100644
--- a/src/fastSmoothV2.cpp
+++ b/src/fastSmoothV2.cpp
@@ -5,141 +5,149 @@
 #include <algorithm>
 #include <numeric>
 #include <math.h>
+
 using namespace Rcpp;
 
 //compute segment
-inline double get_segment(double start,double end,double val1_start,double val1_end,double val2_start,double val2_end)
+inline double get_segment(double start, double end, double val1_start,
+                          double val1_end, double val2_start, double val2_end)
 {
-  double length;
-  length = end - start;
+  const double length = end - start;
+
   double topTriangle;
   double topRectangle;
   double bottomTriangle;
   double midPoint;
   double midValue;
-  double res=0;
-  if (val1_start > val2_start) 
-  {
-    if (val1_end >= val2_end) 
-    {
+  double res = 0;
+
+  if (val1_start > val2_start) {
+    if (val1_end >= val2_end) {
       // They are in the same order no bowtie
-      // seg1 is above seg2 
+      // seg1 is above seg2
       // triangle of seg1
       topTriangle = 0.5*length*(val1_end-val1_start);
       // rectangle between seg1 and seg2
       topRectangle = length*(val1_start-val2_start);
       // triangle of seg2 (to be removed)
-      bottomTriangle = 0.5*length*(val2_end-val2_start); 
+      bottomTriangle = 0.5*length*(val2_end-val2_start);
       return topTriangle+topRectangle-bottomTriangle;
     }
-    else
-    {
-        //bowtie
-        // lets make this really simple as the compiler 
-        // will combine the expressions as needed
-        midPoint = (val1_start-val2_start)/((val2_end-val2_start) - (val1_end-val1_start));
-        midValue = val1_start + midPoint*(val1_end-val1_start);
-        midPoint = midPoint*length;
-        
-        topTriangle = 0.5*midPoint*(midValue-val1_start);
-        topRectangle = midPoint*(val1_start-val2_start);
-        bottomTriangle = 0.5*midPoint*(midValue-val2_start); 
-        
-        res = topTriangle+topRectangle-bottomTriangle;
-        
-        topTriangle = 0.5*(length-midPoint)*(val2_end-midValue);
-        topRectangle = 0; // midPoint*(val1_start-val2_start);
-        bottomTriangle = 0.5*(length - midPoint)*(val1_end - midValue); 
-        res += topTriangle+topRectangle-bottomTriangle;
-        return res;
+    else {
+      //bowtie
+      // lets make this really simple as the compiler
+      // will combine the expressions as needed
+      midPoint = (val1_start - val2_start) /
+        ((val2_end - val2_start) - (val1_end - val1_start));
+
+      midValue = val1_start + midPoint * (val1_end - val1_start);
+
+      midPoint = midPoint * length;
+
+      topTriangle = 0.5 * midPoint * (midValue - val1_start);
+      topRectangle = midPoint * (val1_start - val2_start);
+      bottomTriangle = 0.5 * midPoint * (midValue - val2_start);
+
+      res = topTriangle + topRectangle - bottomTriangle;
+
+      topTriangle = 0.5 * (length - midPoint) * (val2_end - midValue);
+      topRectangle = 0; // midPoint*(val1_start-val2_start);
+      bottomTriangle = 0.5 * (length - midPoint) * (val1_end - midValue);
+      res += topTriangle + topRectangle - bottomTriangle;
+      return res;
     }
   }
-  else
-    {
-      if (val1_end > val2_end) 
-      {
-        //bowtie
-        // Find the point where they cross.
-        // (Solution of linear equations)
-        midPoint = (val2_start-val1_start)/((val1_end-val1_start) - (val2_end-val2_start));
-        midValue = val2_start + midPoint*(val2_end-val2_start);
-        midPoint = midPoint*length;
-        
-        topTriangle = 0.5*midPoint*(midValue-val2_start);
-        topRectangle = midPoint*(val2_start-val1_start);
-        bottomTriangle = 0.5*midPoint*(midValue-val1_start); 
-        
-        res = topTriangle+topRectangle-bottomTriangle;
-        
-        topTriangle = 0.5*(length-midPoint)*(val1_end-midValue);
-        topRectangle = 0; // midPoint*(val1_start-val2_start);
-        bottomTriangle = 0.5*(length - midPoint)*(val2_end - midValue); 
-        res += topTriangle+topRectangle-bottomTriangle;
-        return res;
-        
-      }
-      else // same order
-      {
-        // seg2 is above seg1
-        // Triangle seg2 above seg1 
-        topTriangle = 0.5*length*(val2_end-val2_start);
-        // rectangle between seg2 and seg1 
-        topRectangle = length*(val2_start-val1_start);
-        // Seg1 triangle to be removed
-        bottomTriangle = 0.5*length*(val1_end-val1_start); 
-        return topTriangle+topRectangle-bottomTriangle;
-      }
+  else {
+    if (val1_end > val2_end) {
+      //bowtie
+      // Find the point where they cross.
+      // (Solution of linear equations)
+      midPoint = (val2_start - val1_start) /
+        ((val1_end - val1_start) - (val2_end - val2_start));
+
+      midValue = val2_start + midPoint * (val2_end - val2_start);
+      midPoint = midPoint * length;
+
+      topTriangle = 0.5 * midPoint * (midValue - val2_start);
+      topRectangle = midPoint * (val2_start - val1_start);
+      bottomTriangle = 0.5 * midPoint * (midValue - val1_start);
+
+      res = topTriangle + topRectangle - bottomTriangle;
+
+      topTriangle = 0.5 * (length - midPoint) * (val1_end - midValue);
+      topRectangle = 0; // midPoint*(val1_start-val2_start);
+      bottomTriangle = 0.5 * (length - midPoint) * (val2_end - midValue);
+      res += topTriangle + topRectangle - bottomTriangle;
+      return res;
     }
+    else { // same order
+      // seg2 is above seg1
+      // Triangle seg2 above seg1
+      topTriangle = 0.5 * length * (val2_end - val2_start);
+      // rectangle between seg2 and seg1
+      topRectangle = length * (val2_start - val1_start);
+      // Seg1 triangle to be removed
+      bottomTriangle = 0.5 * length * (val1_end - val1_start);
+      return topTriangle + topRectangle - bottomTriangle;
+    }
+  }
 }
 
 // cut down and compute segment
-inline double get_segment_constrained(double seg1L1, double seg1L2, double seg2L1, double seg2L2, double seg1V1, double seg1V2, double seg2V1, double seg2V2)  
+inline double get_segment_constrained(double seg1L1, double seg1L2,
+                                      double seg2L1, double seg2L2,
+                                      double seg1V1, double seg1V2,
+                                      double seg2V1, double seg2V2)
 {
-    //We have a valid range
-    double valStart1, valEnd1, valStart2, valEnd2;   
-    double start,end; 
-    start = std::max(seg1L1,seg2L1); 
-    end   = std::min(seg1L2,seg2L2); 
-    if (start<end)
-    {
-      valStart1 = seg1V1 + (seg1V2-seg1V1)*(start - seg1L1)/(seg1L2 - seg1L1);
-      valEnd1   = seg1V1 + (seg1V2-seg1V1)*(end   - seg1L1)/(seg1L2 - seg1L1);
-      valStart2 = seg2V1 + (seg2V2-seg2V1)*(start - seg2L1)/(seg2L2 - seg2L1);
-      valEnd2   = seg2V1 + (seg2V2-seg2V1)*(end   - seg2L1)/(seg2L2 - seg2L1);
-      return get_segment(start,end,valStart1,valEnd1,valStart2,valEnd2);
-    }
-    else
-    {
-      return 0;
-    } 
-}
+  //We have a valid range
+  double valStart1, valEnd1, valStart2, valEnd2;
+  double start,end;
+  start = std::max(seg1L1,seg2L1);
+  end   = std::min(seg1L2,seg2L2);
+  if (start < end) {
+    valStart1 = seg1V1 + (seg1V2 - seg1V1)*(start - seg1L1)/(seg1L2 - seg1L1);
+    valEnd1   = seg1V1 + (seg1V2 - seg1V1)*(end   - seg1L1)/(seg1L2 - seg1L1);
+    valStart2 = seg2V1 + (seg2V2 - seg2V1)*(start - seg2L1)/(seg2L2 - seg2L1);
+    valEnd2   = seg2V1 + (seg2V2 - seg2V1)*(end   - seg2L1)/(seg2L2 - seg2L1);
 
+    return get_segment(start, end, valStart1, valEnd1, valStart2, valEnd2);
+  }
+  else {
+    return 0;
+  }
+}
 
-double get_double_segment_constrained(double seg1Loc1, double seg1Loc2, double seg1Loc3, double seg1Val1, double seg1Val2, double seg2Loc1,  double seg2Loc2,  double seg2Loc3, double seg2Val1, double seg2Val2)
+double get_double_segment_constrained(
+  double seg1Loc1, double seg1Loc2, double seg1Loc3,
+  double seg1Val1, double seg1Val2,
+  double seg2Loc1, double seg2Loc2, double seg2Loc3,
+  double seg2Val1, double seg2Val2)
 {
-    double res=0;
-  
+    double res = 0;
+
     // compare the linear section with the linear section
-    res += get_segment_constrained(seg1Loc1, seg1Loc2, seg2Loc1, seg2Loc2, seg1Val1, seg1Val2, seg2Val1, seg2Val2); 
-    
+    res += get_segment_constrained(seg1Loc1, seg1Loc2, seg2Loc1, seg2Loc2,
+                                   seg1Val1, seg1Val2, seg2Val1, seg2Val2);
+
     // compare the linear section with the flat section
     // This could be easily special cased (saving ~1 if statements ).
-    res += get_segment_constrained(seg1Loc1, seg1Loc2, seg2Loc2, seg2Loc3, seg1Val1, seg1Val2, seg2Val2, seg2Val2); 
-    
+    res += get_segment_constrained(seg1Loc1, seg1Loc2, seg2Loc2, seg2Loc3,
+                                   seg1Val1, seg1Val2, seg2Val2, seg2Val2);
+
     // compare the flat section with the linear section
     // This could be easily special cased (saving ~1 if statements ).
-    res += get_segment_constrained(seg1Loc2, seg1Loc3, seg2Loc1, seg2Loc2, seg1Val2, seg1Val2, seg2Val1, seg2Val2); 
-    
+    res += get_segment_constrained(seg1Loc2, seg1Loc3, seg2Loc1, seg2Loc2,
+                                   seg1Val2, seg1Val2, seg2Val1, seg2Val2);
+
     // compare the flat section with the flat section
     // This could be easily special cased (saving ~2 if statements ).
-    res += get_segment_constrained(seg1Loc2, seg1Loc3, seg2Loc2, seg2Loc3, seg1Val2, seg1Val2, seg2Val2, seg2Val2); 
+    res += get_segment_constrained(seg1Loc2, seg1Loc3, seg2Loc2, seg2Loc3,
+                                   seg1Val2, seg1Val2, seg2Val2, seg2Val2);
+
     return res;
 }
 
 
-
-
-
 //' @title
 //' Compute EMD
 ////'
@@ -150,143 +158,164 @@ double get_double_segment_constrained(double seg1Loc1, double seg1Loc2, double s
 //'
 //' @export
 // [[Rcpp::export]]
-double NetEmdSmoothV2(NumericVector loc1,NumericVector val1,double binWidth1,NumericVector loc2,NumericVector val2,double binWidth2)
+double NetEmdSmoothV2(NumericVector loc1, NumericVector val1, double binWidth1,
+                      NumericVector loc2, NumericVector val2, double binWidth2)
 {
-  int index1,index2;
-  
-  double res=0;
-  
+  int index1, index2;
+
+  double res = 0;
+
   // Hist 1
   double curSeg1Loc1; // Start of the gradient section in Seg1
   double curSeg1Loc2; // End of the gradient section in Seg1
   double curSeg1Loc3; // End of the flat section in Seg1
   double curSeg1Val1; // Start value in Seg1
   double curSeg1Val2; // End value in Seg1
-  
+
   // Hist 2
   double curSeg2Loc1; // Start of the gradient section in Seg2
   double curSeg2Loc2; // End of the gradient section in Seg2
   double curSeg2Loc3; // End of the flat section in Seg2
   double curSeg2Val1; // Start value in Seg2
   double curSeg2Val2; // End value in Seg2
- 
-  
+
+
   // Starting index for the second histogram
-  double secondStart=0;
- 
-  // Smallest and largest location values 
-  double maxLoc = std::max(loc1[loc1.size()-1] +binWidth1,loc2[loc2.size()-1]+binWidth2 );
-  double minLoc = std::min(loc1[0],loc2[0]);
-  
+  double secondStart = 0;
+
+  // Smallest and largest location values
+  double maxLoc = std::max(loc1[loc1.size()-1] + binWidth1,
+                           loc2[loc2.size()-1] + binWidth2);
+
+  double minLoc = std::min(loc1[0], loc2[0]);
+
   // warning area before loc2[0] is not well tested
   // As values outside of the range appear to be zero
-  
+
   // Dealing with segments which are to the left of the region covered by both
   // segs
-  if (loc2[0]<loc1[0])
-  {
-      // loc2 starts before loc1 so lets deal with those segments first  
-      // Fix the position of Seg1 and then interate over Seg2 until we have all
-      // of the segments of Seg2 before Seg1 starts.
-      curSeg1Loc1=minLoc;
-      curSeg1Loc2=minLoc; 
-      curSeg1Loc3=loc1[0]; 
-      curSeg1Val1=0;
-      curSeg1Val2=0;
-      // Set this value so we can update in the loop 
-      curSeg2Val2=0;
-      for (index2=0;index2<loc2.size();index2++) 
-      {
-            curSeg2Loc1=loc2[index2]; 
-            curSeg2Loc2=loc2[index2]+binWidth2; 
-            if (index2==loc2.size()-1)
-              { curSeg2Loc3=maxLoc; }
-            else
-              { curSeg2Loc3=loc2[index2+1]; }
-            curSeg2Val1 = curSeg2Val2; 
-            curSeg2Val2 = val2[index2]; 
-            res+= get_double_segment_constrained(curSeg1Loc1,curSeg1Loc2,curSeg1Loc3,curSeg1Val1,curSeg1Val2,curSeg2Loc1,curSeg2Loc2,curSeg2Loc3,curSeg2Val1,curSeg2Val2);
-            if (curSeg1Loc1>curSeg2Loc3)
-              {break;}
+  if (loc2[0] < loc1[0]) {
+    // loc2 starts before loc1 so lets deal with those segments first
+    // Fix the position of Seg1 and then interate over Seg2 until we have all
+    // of the segments of Seg2 before Seg1 starts.
+    curSeg1Loc1 = minLoc;
+    curSeg1Loc2 = minLoc;
+    curSeg1Loc3 = loc1[0];
+    curSeg1Val1 = 0;
+    curSeg1Val2 = 0;
+
+    // Set this value so we can update in the loop
+    curSeg2Val2 = 0;
+    for (index2 = 0; index2 < loc2.size(); index2++) {
+      curSeg2Loc1 = loc2[index2];
+      curSeg2Loc2 = loc2[index2] + binWidth2;
+      if (index2 == loc2.size() - 1) {
+        curSeg2Loc3 = maxLoc;
+      }
+      else {
+        curSeg2Loc3 = loc2[index2 + 1];
       }
+      curSeg2Val1 = curSeg2Val2;
+      curSeg2Val2 = val2[index2];
+      res += get_double_segment_constrained(
+        curSeg1Loc1, curSeg1Loc2, curSeg1Loc3, curSeg1Val1, curSeg1Val2,
+        curSeg2Loc1, curSeg2Loc2, curSeg2Loc3, curSeg2Val1, curSeg2Val2);
+
+      if (curSeg1Loc1 > curSeg2Loc3) {
+        break;
+      }
+    }
   }
   else
   {
-      // loc2 starts before loc1 so lets deal with those segments first  
-      // Fix the position of Seg2 and then interate over Seg1 until we have all
-      // of the segments of Seg1 before Seg2 starts.
-      curSeg2Loc1=minLoc;
-      curSeg2Loc2=minLoc; 
-      curSeg2Loc3=loc2[0]; 
-      curSeg2Val1=0;
-      curSeg2Val2=0;
-      // Set this value so we can update in the lopp 
-      curSeg1Val2=0;
-      for (index1=0;index1<loc1.size();index1++) 
-      {
-          curSeg1Loc1=loc1[index1]; 
-          curSeg1Loc2=loc1[index1]+binWidth1; 
-          if (index1==loc1.size()-1)
-            {curSeg1Loc3=maxLoc;}
-          else
-            {curSeg1Loc3=loc1[index1+1];}
-          curSeg1Val1 = curSeg1Val2;
-          curSeg1Val2 = val1[index1]; 
-          res += get_double_segment_constrained(curSeg1Loc1,curSeg1Loc2,curSeg1Loc3,curSeg1Val1,curSeg1Val2,curSeg2Loc1,curSeg2Loc2,curSeg2Loc3,curSeg2Val1,curSeg2Val2);
-          if (curSeg2Loc1>curSeg1Loc3)
-            {break;}
+    // loc2 starts before loc1 so lets deal with those segments first
+    // Fix the position of Seg2 and then interate over Seg1 until we have all
+    // of the segments of Seg1 before Seg2 starts.
+    curSeg2Loc1 = minLoc;
+    curSeg2Loc2 = minLoc;
+    curSeg2Loc3 = loc2[0];
+    curSeg2Val1 = 0;
+    curSeg2Val2 = 0;
+
+    // Set this value so we can update in the lopp
+    curSeg1Val2 = 0;
+    for (index1 = 0; index1 < loc1.size(); index1++) {
+      curSeg1Loc1 = loc1[index1];
+      curSeg1Loc2 = loc1[index1] + binWidth1;
+      if (index1 == loc1.size() - 1) {
+        curSeg1Loc3 = maxLoc;
       }
+      else {
+        curSeg1Loc3 = loc1[index1 + 1];
+      }
+      curSeg1Val1 = curSeg1Val2;
+      curSeg1Val2 = val1[index1];
+      res += get_double_segment_constrained(
+        curSeg1Loc1, curSeg1Loc2, curSeg1Loc3, curSeg1Val1, curSeg1Val2,
+        curSeg2Loc1, curSeg2Loc2, curSeg2Loc3, curSeg2Val1, curSeg2Val2);
+      if (curSeg2Loc1 > curSeg1Loc3) {
+        break;
+      }
+    }
   }
+
   // Add both the  overlapping sections and the non overlapping section on the right
   // Note we reiterate over the first few sections loc1
   // Could store where we are upto from above to save time
   // Reset Val counter
-  curSeg1Val2=0;
-  for (index1=0;index1<loc1.size();index1++) 
-  {
-        //  Get the three relevant locations
-        // Start; end of linear section; end of flat section 
-        curSeg1Loc1=loc1[index1]; 
-        curSeg1Loc2=loc1[index1]+binWidth1; 
-        // could pull this check outside of the loop with final case not sure if worth it
-        if (index1==loc1.size()-1)
-          {curSeg1Loc3=maxLoc; }
-        else
-          {curSeg1Loc3=loc1[index1+1];}
-        // Update value to the start and end of the current section
-        curSeg1Val1=curSeg1Val2;
-        curSeg1Val2=val1[index1]; 
-        // Setting up the previous value for the next loop
-        // Could replace this loop with a while 
-        // but if so would need to be careful about overlaps
-        if (secondStart==0)
-          {curSeg2Val2=0;}
-        else
-          { curSeg2Val2=val2[secondStart-1]; }
-        for (index2=secondStart;index2<loc2.size();index2++) 
-        {
-            // Construct 3 sections for second seg.
-            curSeg2Loc1=loc2[index2]; 
-            curSeg2Loc2=loc2[index2]+binWidth2; 
-            if (index2==loc2.size()-1)
-              { curSeg2Loc3=maxLoc; }
-            else
-              { curSeg2Loc3=loc2[index2+1]; }
-            //update values
-            curSeg2Val1=curSeg2Val2;
-            curSeg2Val2 = val2[index2]; 
-            // If this section is behind Seg1 
-            // Do not consider again
-            if (curSeg2Loc3<curSeg1Loc1)
-            {
-               secondStart=index2+1;  
-              continue;
-            }
-            // If current Seg2 is beyond Seg1 break out of loop
-            res += get_double_segment_constrained(curSeg1Loc1,curSeg1Loc2,curSeg1Loc3,curSeg1Val1,curSeg1Val2,curSeg2Loc1,curSeg2Loc2,curSeg2Loc3,curSeg2Val1,curSeg2Val2);
-            if (curSeg2Loc3>curSeg1Loc3)
-              {break;}
-        }
+  curSeg1Val2 = 0;
+  for (index1 = 0; index1 < loc1.size(); index1++) {
+    //  Get the three relevant locations
+    // Start; end of linear section; end of flat section
+    curSeg1Loc1 = loc1[index1];
+    curSeg1Loc2 = loc1[index1] + binWidth1;
+    // could pull this check outside of the loop with final case not sure if worth it
+    if (index1 == loc1.size() - 1) {
+      curSeg1Loc3 = maxLoc;
+    }
+    else {
+      curSeg1Loc3 = loc1[index1 + 1];
+    }
+    // Update value to the start and end of the current section
+    curSeg1Val1 = curSeg1Val2;
+    curSeg1Val2 = val1[index1];
+    // Setting up the previous value for the next loop
+    // Could replace this loop with a while
+    // but if so would need to be careful about overlaps
+    if (secondStart == 0) {
+      curSeg2Val2 = 0;
+    }
+    else {
+      curSeg2Val2 = val2[secondStart - 1];
+    }
+    for (index2 = secondStart; index2 < loc2.size(); index2++) {
+      // Construct 3 sections for second seg.
+      curSeg2Loc1=loc2[index2];
+      curSeg2Loc2=loc2[index2] + binWidth2;
+      if (index2 == loc2.size() - 1) {
+        curSeg2Loc3 = maxLoc;
+      }
+      else {
+        curSeg2Loc3 = loc2[index2 + 1];
+      }
+      //update values
+      curSeg2Val1 = curSeg2Val2;
+      curSeg2Val2 = val2[index2];
+      // If this section is behind Seg1
+      // Do not consider again
+      if (curSeg2Loc3 < curSeg1Loc1) {
+        secondStart = index2 + 1;
+        continue;
+      }
+      // If current Seg2 is beyond Seg1 break out of loop
+      res += get_double_segment_constrained(
+        curSeg1Loc1, curSeg1Loc2, curSeg1Loc3, curSeg1Val1, curSeg1Val2,
+        curSeg2Loc1, curSeg2Loc2, curSeg2Loc3, curSeg2Val1, curSeg2Val2);
+
+      if (curSeg2Loc3 > curSeg1Loc3) {
+        break;
+      }
+    }
   }
   return res;
 }

From 91c9561da39de69e64c7e9a755d5e76e63242abc Mon Sep 17 00:00:00 2001
From: Oliver Strickson <ostrickson@turing.ac.uk>
Date: Fri, 17 Apr 2020 08:09:49 +0100
Subject: [PATCH 32/84] Formatting

---
 src/fastSmoothV2.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/fastSmoothV2.cpp b/src/fastSmoothV2.cpp
index 242a6b8a..d222256d 100644
--- a/src/fastSmoothV2.cpp
+++ b/src/fastSmoothV2.cpp
@@ -26,12 +26,12 @@ inline double get_segment(double start, double end, double val1_start,
       // They are in the same order no bowtie
       // seg1 is above seg2
       // triangle of seg1
-      topTriangle = 0.5*length*(val1_end-val1_start);
+      topTriangle = 0.5*length * (val1_end - val1_start);
       // rectangle between seg1 and seg2
-      topRectangle = length*(val1_start-val2_start);
+      topRectangle = length * (val1_start - val2_start);
       // triangle of seg2 (to be removed)
-      bottomTriangle = 0.5*length*(val2_end-val2_start);
-      return topTriangle+topRectangle-bottomTriangle;
+      bottomTriangle = 0.5 * length * (val2_end - val2_start);
+      return topTriangle + topRectangle - bottomTriangle;
     }
     else {
       //bowtie

From 9151e520eb2eceb39ae1e6e84f6ebf500c2bd2c0 Mon Sep 17 00:00:00 2001
From: Oliver Strickson <ostrickson@turing.ac.uk>
Date: Fri, 17 Apr 2020 09:20:10 +0100
Subject: [PATCH 33/84] Local for-loop index

---
 src/fastSmoothV2.cpp | 52 +++++++++++++++++++++-----------------------
 1 file changed, 25 insertions(+), 27 deletions(-)

diff --git a/src/fastSmoothV2.cpp b/src/fastSmoothV2.cpp
index d222256d..33a76c44 100644
--- a/src/fastSmoothV2.cpp
+++ b/src/fastSmoothV2.cpp
@@ -161,8 +161,6 @@ double get_double_segment_constrained(
 double NetEmdSmoothV2(NumericVector loc1, NumericVector val1, double binWidth1,
                       NumericVector loc2, NumericVector val2, double binWidth2)
 {
-  int index1, index2;
-
   double res = 0;
 
   // Hist 1
@@ -206,17 +204,17 @@ double NetEmdSmoothV2(NumericVector loc1, NumericVector val1, double binWidth1,
 
     // Set this value so we can update in the loop
     curSeg2Val2 = 0;
-    for (index2 = 0; index2 < loc2.size(); index2++) {
-      curSeg2Loc1 = loc2[index2];
-      curSeg2Loc2 = loc2[index2] + binWidth2;
-      if (index2 == loc2.size() - 1) {
+    for (int index = 0; index < loc2.size(); index++) {
+      curSeg2Loc1 = loc2[index];
+      curSeg2Loc2 = loc2[index] + binWidth2;
+      if (index == loc2.size() - 1) {
         curSeg2Loc3 = maxLoc;
       }
       else {
-        curSeg2Loc3 = loc2[index2 + 1];
+        curSeg2Loc3 = loc2[index + 1];
       }
       curSeg2Val1 = curSeg2Val2;
-      curSeg2Val2 = val2[index2];
+      curSeg2Val2 = val2[index];
       res += get_double_segment_constrained(
         curSeg1Loc1, curSeg1Loc2, curSeg1Loc3, curSeg1Val1, curSeg1Val2,
         curSeg2Loc1, curSeg2Loc2, curSeg2Loc3, curSeg2Val1, curSeg2Val2);
@@ -239,17 +237,17 @@ double NetEmdSmoothV2(NumericVector loc1, NumericVector val1, double binWidth1,
 
     // Set this value so we can update in the lopp
     curSeg1Val2 = 0;
-    for (index1 = 0; index1 < loc1.size(); index1++) {
-      curSeg1Loc1 = loc1[index1];
-      curSeg1Loc2 = loc1[index1] + binWidth1;
-      if (index1 == loc1.size() - 1) {
+    for (int index = 0; index < loc1.size(); index++) {
+      curSeg1Loc1 = loc1[index];
+      curSeg1Loc2 = loc1[index] + binWidth1;
+      if (index == loc1.size() - 1) {
         curSeg1Loc3 = maxLoc;
       }
       else {
-        curSeg1Loc3 = loc1[index1 + 1];
+        curSeg1Loc3 = loc1[index + 1];
       }
       curSeg1Val1 = curSeg1Val2;
-      curSeg1Val2 = val1[index1];
+      curSeg1Val2 = val1[index];
       res += get_double_segment_constrained(
         curSeg1Loc1, curSeg1Loc2, curSeg1Loc3, curSeg1Val1, curSeg1Val2,
         curSeg2Loc1, curSeg2Loc2, curSeg2Loc3, curSeg2Val1, curSeg2Val2);
@@ -264,21 +262,21 @@ double NetEmdSmoothV2(NumericVector loc1, NumericVector val1, double binWidth1,
   // Could store where we are upto from above to save time
   // Reset Val counter
   curSeg1Val2 = 0;
-  for (index1 = 0; index1 < loc1.size(); index1++) {
+  for (int index = 0; index < loc1.size(); index++) {
     //  Get the three relevant locations
     // Start; end of linear section; end of flat section
-    curSeg1Loc1 = loc1[index1];
-    curSeg1Loc2 = loc1[index1] + binWidth1;
+    curSeg1Loc1 = loc1[index];
+    curSeg1Loc2 = loc1[index] + binWidth1;
     // could pull this check outside of the loop with final case not sure if worth it
-    if (index1 == loc1.size() - 1) {
+    if (index == loc1.size() - 1) {
       curSeg1Loc3 = maxLoc;
     }
     else {
-      curSeg1Loc3 = loc1[index1 + 1];
+      curSeg1Loc3 = loc1[index + 1];
     }
     // Update value to the start and end of the current section
     curSeg1Val1 = curSeg1Val2;
-    curSeg1Val2 = val1[index1];
+    curSeg1Val2 = val1[index];
     // Setting up the previous value for the next loop
     // Could replace this loop with a while
     // but if so would need to be careful about overlaps
@@ -288,23 +286,23 @@ double NetEmdSmoothV2(NumericVector loc1, NumericVector val1, double binWidth1,
     else {
       curSeg2Val2 = val2[secondStart - 1];
     }
-    for (index2 = secondStart; index2 < loc2.size(); index2++) {
+    for (int index = secondStart; index < loc2.size(); index++) {
       // Construct 3 sections for second seg.
-      curSeg2Loc1=loc2[index2];
-      curSeg2Loc2=loc2[index2] + binWidth2;
-      if (index2 == loc2.size() - 1) {
+      curSeg2Loc1 = loc2[index];
+      curSeg2Loc2 = loc2[index] + binWidth2;
+      if (index == loc2.size() - 1) {
         curSeg2Loc3 = maxLoc;
       }
       else {
-        curSeg2Loc3 = loc2[index2 + 1];
+        curSeg2Loc3 = loc2[index + 1];
       }
       //update values
       curSeg2Val1 = curSeg2Val2;
-      curSeg2Val2 = val2[index2];
+      curSeg2Val2 = val2[index];
       // If this section is behind Seg1
       // Do not consider again
       if (curSeg2Loc3 < curSeg1Loc1) {
-        secondStart = index2 + 1;
+        secondStart = index + 1;
         continue;
       }
       // If current Seg2 is beyond Seg1 break out of loop

From b35563eb90e4dfe050e4970d3eb5fcdca1d6121c Mon Sep 17 00:00:00 2001
From: Oliver Strickson <ostrickson@turing.ac.uk>
Date: Fri, 17 Apr 2020 10:24:46 +0100
Subject: [PATCH 34/84] Combine some conditional cases in get_segment

---
 src/fastSmoothV2.cpp | 119 ++++++++++++++++++-------------------------
 1 file changed, 50 insertions(+), 69 deletions(-)

diff --git a/src/fastSmoothV2.cpp b/src/fastSmoothV2.cpp
index 33a76c44..8955f429 100644
--- a/src/fastSmoothV2.cpp
+++ b/src/fastSmoothV2.cpp
@@ -8,7 +8,36 @@
 
 using namespace Rcpp;
 
-//compute segment
+int signum(double val) {
+  return (0.0 < val) - (val < 0.0);
+}
+
+inline double bowtie_area(double length, double val1_start, double val1_end,
+                          double val2_start, double val2_end)
+{
+  double midPoint = (val1_start - val2_start) /
+    ((val2_end - val2_start) - (val1_end - val1_start));
+
+  const double midValue = val1_start + midPoint * (val1_end - val1_start);
+    
+  midPoint = midPoint * length;
+    
+  double topTriangle = 0.5 * midPoint * (midValue - val1_start);
+  double topRectangle = midPoint * (val1_start - val2_start);
+  double bottomTriangle = 0.5 * midPoint * (midValue - val2_start);
+    
+  double res = topTriangle + topRectangle - bottomTriangle;
+    
+  topTriangle = 0.5 * (length - midPoint) * (val2_end - midValue);
+  topRectangle = 0; // midPoint*(val1_start-val2_start);
+  bottomTriangle = 0.5 * (length - midPoint) * (val1_end - midValue);
+
+  res += topTriangle + topRectangle - bottomTriangle;
+  return res;
+}
+
+// Compute the unsigned area between two line segments
+// assumes that val1_end > val1_start and val2_end > val2_start
 inline double get_segment(double start, double end, double val1_start,
                           double val1_end, double val2_start, double val2_end)
 {
@@ -21,75 +50,27 @@ inline double get_segment(double start, double end, double val1_start,
   double midValue;
   double res = 0;
 
-  if (val1_start > val2_start) {
-    if (val1_end >= val2_end) {
-      // They are in the same order no bowtie
-      // seg1 is above seg2
-      // triangle of seg1
-      topTriangle = 0.5*length * (val1_end - val1_start);
-      // rectangle between seg1 and seg2
-      topRectangle = length * (val1_start - val2_start);
-      // triangle of seg2 (to be removed)
-      bottomTriangle = 0.5 * length * (val2_end - val2_start);
-      return topTriangle + topRectangle - bottomTriangle;
-    }
-    else {
-      //bowtie
-      // lets make this really simple as the compiler
-      // will combine the expressions as needed
-      midPoint = (val1_start - val2_start) /
-        ((val2_end - val2_start) - (val1_end - val1_start));
-
-      midValue = val1_start + midPoint * (val1_end - val1_start);
-
-      midPoint = midPoint * length;
-
-      topTriangle = 0.5 * midPoint * (midValue - val1_start);
-      topRectangle = midPoint * (val1_start - val2_start);
-      bottomTriangle = 0.5 * midPoint * (midValue - val2_start);
-
-      res = topTriangle + topRectangle - bottomTriangle;
-
-      topTriangle = 0.5 * (length - midPoint) * (val2_end - midValue);
-      topRectangle = 0; // midPoint*(val1_start-val2_start);
-      bottomTriangle = 0.5 * (length - midPoint) * (val1_end - midValue);
-      res += topTriangle + topRectangle - bottomTriangle;
-      return res;
-    }
+  bool both_differences_positive = val1_start > val2_start && val1_end >= val2_end;
+  bool both_differences_negative = val1_start <= val2_start && val1_end <= val2_end;
+  
+  if (both_differences_positive || both_differences_negative)
+  {
+    // They are in the same order: no bowtie
+    // triangle of seg1
+    topTriangle = 0.5 * length * (val1_end - val1_start);
+    // rectangle between seg1 and seg2
+    topRectangle = length * (val1_start - val2_start);
+    // triangle of seg2 (to be removed)
+    bottomTriangle = 0.5 * length * (val2_end - val2_start);
+
+    const double sign = both_differences_positive?1.0:-1.0;
+    return sign * (topTriangle + topRectangle - bottomTriangle);
   }
-  else {
-    if (val1_end > val2_end) {
-      //bowtie
-      // Find the point where they cross.
-      // (Solution of linear equations)
-      midPoint = (val2_start - val1_start) /
-        ((val1_end - val1_start) - (val2_end - val2_start));
-
-      midValue = val2_start + midPoint * (val2_end - val2_start);
-      midPoint = midPoint * length;
-
-      topTriangle = 0.5 * midPoint * (midValue - val2_start);
-      topRectangle = midPoint * (val2_start - val1_start);
-      bottomTriangle = 0.5 * midPoint * (midValue - val1_start);
-
-      res = topTriangle + topRectangle - bottomTriangle;
-
-      topTriangle = 0.5 * (length - midPoint) * (val1_end - midValue);
-      topRectangle = 0; // midPoint*(val1_start-val2_start);
-      bottomTriangle = 0.5 * (length - midPoint) * (val2_end - midValue);
-      res += topTriangle + topRectangle - bottomTriangle;
-      return res;
-    }
-    else { // same order
-      // seg2 is above seg1
-      // Triangle seg2 above seg1
-      topTriangle = 0.5 * length * (val2_end - val2_start);
-      // rectangle between seg2 and seg1
-      topRectangle = length * (val2_start - val1_start);
-      // Seg1 triangle to be removed
-      bottomTriangle = 0.5 * length * (val1_end - val1_start);
-      return topTriangle + topRectangle - bottomTriangle;
-    }
+  else if (val1_start > val2_start) { // bowtie, first case
+    return bowtie_area(length, val1_start, val1_end, val2_start, val2_end);
+  }
+  else { // bowtie, second case
+    return bowtie_area(length, val2_start, val2_end, val1_start, val1_end);
   }
 }
 

From 0ca7c893be926a1365d145e3cbec67e8629ef35e Mon Sep 17 00:00:00 2001
From: Oliver Strickson <ostrickson@turing.ac.uk>
Date: Fri, 17 Apr 2020 10:59:53 +0100
Subject: [PATCH 35/84] Combine cases for handling the leftmost segments
 (NetEmdSmoothV2)

---
 src/fastSmoothV2.cpp | 115 ++++++++++++++++++++-----------------------
 1 file changed, 53 insertions(+), 62 deletions(-)

diff --git a/src/fastSmoothV2.cpp b/src/fastSmoothV2.cpp
index 8955f429..604ea673 100644
--- a/src/fastSmoothV2.cpp
+++ b/src/fastSmoothV2.cpp
@@ -129,6 +129,57 @@ double get_double_segment_constrained(
 }
 
 
+// Dealing with segments which are to the left of the region covered by both
+// segs
+inline double leftmost_segments(const NumericVector& loc1,
+                                const NumericVector& loc2,
+                                const NumericVector& val1,
+                                const NumericVector& val2,
+                                double binWidth1,
+                                double maxLoc)
+{
+  double res = 0.0;
+
+  assert(loc1[0] < loc2[0]);
+
+  // Fix the position of Seg2 and then interate over Seg1 until we have all
+  // of the segments of Seg1 before Seg2 starts.
+
+  // are these all used?
+  
+  double curSeg2Loc1 = loc1[0];
+  double curSeg2Loc2 = loc1[0];
+  double curSeg2Loc3 = loc2[0];
+  double curSeg2Val1 = 0;
+  double curSeg2Val2 = 0;
+
+  // Set this value so we can update in the lopp
+  double curSeg1Val2 = 0;
+  for (int index = 0; index < loc1.size(); index++) {
+    double curSeg1Loc1 = loc1[index];
+    double curSeg1Loc2 = loc1[index] + binWidth1;
+    double curSeg1Loc3;
+    if (index == loc1.size() - 1) {
+      curSeg1Loc3 = maxLoc;
+    }
+    else {
+      curSeg1Loc3 = loc1[index + 1];
+    }
+    double curSeg1Val1 = curSeg1Val2;
+    curSeg1Val2 = val1[index];
+    res += get_double_segment_constrained(
+      curSeg1Loc1, curSeg1Loc2, curSeg1Loc3, curSeg1Val1, curSeg1Val2,
+      curSeg2Loc1, curSeg2Loc2, curSeg2Loc3, curSeg2Val1, curSeg2Val2);
+
+    if (curSeg2Loc1 > curSeg1Loc3) {
+      break;
+    }
+  }
+
+  return res;
+}
+
+
 //' @title
 //' Compute EMD
 ////'
@@ -166,76 +217,16 @@ double NetEmdSmoothV2(NumericVector loc1, NumericVector val1, double binWidth1,
   double maxLoc = std::max(loc1[loc1.size()-1] + binWidth1,
                            loc2[loc2.size()-1] + binWidth2);
 
-  double minLoc = std::min(loc1[0], loc2[0]);
 
   // warning area before loc2[0] is not well tested
   // As values outside of the range appear to be zero
 
-  // Dealing with segments which are to the left of the region covered by both
-  // segs
   if (loc2[0] < loc1[0]) {
-    // loc2 starts before loc1 so lets deal with those segments first
-    // Fix the position of Seg1 and then interate over Seg2 until we have all
-    // of the segments of Seg2 before Seg1 starts.
-    curSeg1Loc1 = minLoc;
-    curSeg1Loc2 = minLoc;
-    curSeg1Loc3 = loc1[0];
-    curSeg1Val1 = 0;
-    curSeg1Val2 = 0;
-
-    // Set this value so we can update in the loop
-    curSeg2Val2 = 0;
-    for (int index = 0; index < loc2.size(); index++) {
-      curSeg2Loc1 = loc2[index];
-      curSeg2Loc2 = loc2[index] + binWidth2;
-      if (index == loc2.size() - 1) {
-        curSeg2Loc3 = maxLoc;
-      }
-      else {
-        curSeg2Loc3 = loc2[index + 1];
-      }
-      curSeg2Val1 = curSeg2Val2;
-      curSeg2Val2 = val2[index];
-      res += get_double_segment_constrained(
-        curSeg1Loc1, curSeg1Loc2, curSeg1Loc3, curSeg1Val1, curSeg1Val2,
-        curSeg2Loc1, curSeg2Loc2, curSeg2Loc3, curSeg2Val1, curSeg2Val2);
-
-      if (curSeg1Loc1 > curSeg2Loc3) {
-        break;
-      }
-    }
+    res += leftmost_segments(loc2, loc1, val2, val1, binWidth2, maxLoc);
   }
   else
   {
-    // loc2 starts before loc1 so lets deal with those segments first
-    // Fix the position of Seg2 and then interate over Seg1 until we have all
-    // of the segments of Seg1 before Seg2 starts.
-    curSeg2Loc1 = minLoc;
-    curSeg2Loc2 = minLoc;
-    curSeg2Loc3 = loc2[0];
-    curSeg2Val1 = 0;
-    curSeg2Val2 = 0;
-
-    // Set this value so we can update in the lopp
-    curSeg1Val2 = 0;
-    for (int index = 0; index < loc1.size(); index++) {
-      curSeg1Loc1 = loc1[index];
-      curSeg1Loc2 = loc1[index] + binWidth1;
-      if (index == loc1.size() - 1) {
-        curSeg1Loc3 = maxLoc;
-      }
-      else {
-        curSeg1Loc3 = loc1[index + 1];
-      }
-      curSeg1Val1 = curSeg1Val2;
-      curSeg1Val2 = val1[index];
-      res += get_double_segment_constrained(
-        curSeg1Loc1, curSeg1Loc2, curSeg1Loc3, curSeg1Val1, curSeg1Val2,
-        curSeg2Loc1, curSeg2Loc2, curSeg2Loc3, curSeg2Val1, curSeg2Val2);
-      if (curSeg2Loc1 > curSeg1Loc3) {
-        break;
-      }
-    }
+    res += leftmost_segments(loc1, loc2, val1, val2, binWidth1, maxLoc);
   }
 
   // Add both the  overlapping sections and the non overlapping section on the right

From c36c3f66d1e5f7fed614ca23dacc95d02810a87e Mon Sep 17 00:00:00 2001
From: Oliver Strickson <ostrickson@turing.ac.uk>
Date: Fri, 17 Apr 2020 15:39:04 +0100
Subject: [PATCH 36/84] Remove unused function

---
 src/fastSmoothV2.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/fastSmoothV2.cpp b/src/fastSmoothV2.cpp
index 604ea673..0fa5f39d 100644
--- a/src/fastSmoothV2.cpp
+++ b/src/fastSmoothV2.cpp
@@ -8,10 +8,6 @@
 
 using namespace Rcpp;
 
-int signum(double val) {
-  return (0.0 < val) - (val < 0.0);
-}
-
 inline double bowtie_area(double length, double val1_start, double val1_end,
                           double val2_start, double val2_end)
 {

From 4af238d7c9a6280f907758ebeabd9596d35ba63e Mon Sep 17 00:00:00 2001
From: Oliver Strickson <ostrickson@turing.ac.uk>
Date: Fri, 17 Apr 2020 15:50:06 +0100
Subject: [PATCH 37/84] Rename loop counters

---
 src/fastSmoothV2.cpp | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/fastSmoothV2.cpp b/src/fastSmoothV2.cpp
index 0fa5f39d..3598c0f5 100644
--- a/src/fastSmoothV2.cpp
+++ b/src/fastSmoothV2.cpp
@@ -230,21 +230,21 @@ double NetEmdSmoothV2(NumericVector loc1, NumericVector val1, double binWidth1,
   // Could store where we are upto from above to save time
   // Reset Val counter
   curSeg1Val2 = 0;
-  for (int index = 0; index < loc1.size(); index++) {
+  for (int index1 = 0; index1 < loc1.size(); index1++) {
     //  Get the three relevant locations
     // Start; end of linear section; end of flat section
-    curSeg1Loc1 = loc1[index];
-    curSeg1Loc2 = loc1[index] + binWidth1;
+    curSeg1Loc1 = loc1[index1];
+    curSeg1Loc2 = loc1[index1] + binWidth1;
     // could pull this check outside of the loop with final case not sure if worth it
-    if (index == loc1.size() - 1) {
+    if (index1 == loc1.size() - 1) {
       curSeg1Loc3 = maxLoc;
     }
     else {
-      curSeg1Loc3 = loc1[index + 1];
+      curSeg1Loc3 = loc1[index1 + 1];
     }
     // Update value to the start and end of the current section
     curSeg1Val1 = curSeg1Val2;
-    curSeg1Val2 = val1[index];
+    curSeg1Val2 = val1[index1];
     // Setting up the previous value for the next loop
     // Could replace this loop with a while
     // but if so would need to be careful about overlaps
@@ -254,23 +254,23 @@ double NetEmdSmoothV2(NumericVector loc1, NumericVector val1, double binWidth1,
     else {
       curSeg2Val2 = val2[secondStart - 1];
     }
-    for (int index = secondStart; index < loc2.size(); index++) {
+    for (int index2 = secondStart; index2 < loc2.size(); index2++) {
       // Construct 3 sections for second seg.
-      curSeg2Loc1 = loc2[index];
-      curSeg2Loc2 = loc2[index] + binWidth2;
-      if (index == loc2.size() - 1) {
+      curSeg2Loc1 = loc2[index2];
+      curSeg2Loc2 = loc2[index2] + binWidth2;
+      if (index2 == loc2.size() - 1) {
         curSeg2Loc3 = maxLoc;
       }
       else {
-        curSeg2Loc3 = loc2[index + 1];
+        curSeg2Loc3 = loc2[index2 + 1];
       }
       //update values
       curSeg2Val1 = curSeg2Val2;
-      curSeg2Val2 = val2[index];
+      curSeg2Val2 = val2[index2];
       // If this section is behind Seg1
       // Do not consider again
       if (curSeg2Loc3 < curSeg1Loc1) {
-        secondStart = index + 1;
+        secondStart = index2 + 1;
         continue;
       }
       // If current Seg2 is beyond Seg1 break out of loop

From 3feb0ae0290de2bf6164bcd81fe31c29d2f2cfa7 Mon Sep 17 00:00:00 2001
From: ande <ande.elliott@gmail.com>
Date: Fri, 17 Apr 2020 18:19:31 +0100
Subject: [PATCH 38/84] added tests

---
 src/RcppExports.cpp          |  19 +++-
 src/fastSmooth.cpp           |  57 +++++++---
 src/fastSmoothV2.cpp         |   8 +-
 src/test_emd_fast_smooth.cpp | 202 +++++++++++++++++++++++++++++++++++
 4 files changed, 265 insertions(+), 21 deletions(-)
 create mode 100644 src/test_emd_fast_smooth.cpp

diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
index 99086611..933f6b71 100644
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -62,6 +62,22 @@ BEGIN_RCPP
     return rcpp_result_gen;
 END_RCPP
 }
+// NetEmdSmoothV2_old
+double NetEmdSmoothV2_old(NumericVector loc1, NumericVector val1, double binWidth1, NumericVector loc2, NumericVector val2, double binWidth2);
+RcppExport SEXP _netdist_NetEmdSmoothV2_old(SEXP loc1SEXP, SEXP val1SEXP, SEXP binWidth1SEXP, SEXP loc2SEXP, SEXP val2SEXP, SEXP binWidth2SEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< NumericVector >::type loc1(loc1SEXP);
+    Rcpp::traits::input_parameter< NumericVector >::type val1(val1SEXP);
+    Rcpp::traits::input_parameter< double >::type binWidth1(binWidth1SEXP);
+    Rcpp::traits::input_parameter< NumericVector >::type loc2(loc2SEXP);
+    Rcpp::traits::input_parameter< NumericVector >::type val2(val2SEXP);
+    Rcpp::traits::input_parameter< double >::type binWidth2(binWidth2SEXP);
+    rcpp_result_gen = Rcpp::wrap(NetEmdSmoothV2_old(loc1, val1, binWidth1, loc2, val2, binWidth2));
+    return rcpp_result_gen;
+END_RCPP
+}
 
 RcppExport SEXP run_testthat_tests();
 
@@ -70,7 +86,8 @@ static const R_CallMethodDef CallEntries[] = {
     {"_netdist_emd_fast_no_smoothing", (DL_FUNC) &_netdist_emd_fast_no_smoothing, 4},
     {"_netdist_NetEmdSmooth", (DL_FUNC) &_netdist_NetEmdSmooth, 6},
     {"_netdist_NetEmdSmoothV2", (DL_FUNC) &_netdist_NetEmdSmoothV2, 6},
-    {"run_testthat_tests",                (DL_FUNC) &run_testthat_tests,                0},
+    {"_netdist_NetEmdSmoothV2_old", (DL_FUNC) &_netdist_NetEmdSmoothV2_old, 6},
+    {"run_testthat_tests", (DL_FUNC) &run_testthat_tests, 0},
     {NULL, NULL, 0}
 };
 
diff --git a/src/fastSmooth.cpp b/src/fastSmooth.cpp
index b2d43847..c927547b 100644
--- a/src/fastSmooth.cpp
+++ b/src/fastSmooth.cpp
@@ -59,7 +59,31 @@ double NetEmdSmooth(NumericVector loc1,NumericVector val1,double binWidth1,Numer
    res=0;
    // set as 0 as at bottom of hist
    curStartVal=0;
-
+   
+   std::cout <<  "Loc1: "; 
+  for (i=0;i<loc1.size();i++)
+  {
+   std::cout << loc1[i] << ", "; 
+  }
+   std::cout <<  "\n"; 
+   std::cout <<  "Val1: "; 
+  for (i=0;i<val1.size();i++)
+  {
+   std::cout << val1[i] << ", "; 
+  }
+   std::cout <<  "\n"; 
+   std::cout <<  "\nLoc2 "; 
+  for (i=0;i<loc2.size();i++)
+  {
+   std::cout << loc2[i] << ", "; 
+  }
+   std::cout <<  "\n"; 
+   std::cout <<  "Val2: "; 
+  for (i=0;i<val2.size();i++)
+  {
+   std::cout << val2[i] << ", "; 
+  }
+   std::cout <<  "\n"; 
    // need to know if first y segment ends with hist1 or hist2 
    // Need to set the first start locations 
    // Commented this section as they are both set to zero
@@ -95,13 +119,14 @@ double NetEmdSmooth(NumericVector loc1,NumericVector val1,double binWidth1,Numer
             // compute where end of current segment will be in x in dhist1 proportionally
             loc1End=loc1SegStart+(loc1SegEnd-loc1SegStart)*(loc2SegValEnd-loc1SegValStart)/(loc1SegValEnd-loc1SegValStart);
         }
+       std::cout << loc1End << " "<< loc2End << "\n"; 
         h=(curEndVal-curStartVal)/2.0;
         if (loc1Start<loc2Start)
         {
             //case1 they is no overlap
             if (loc1End<=loc2End)
             {
-		// difference between two right triangles
+		            // difference between two right triangles
                 res+=(h)*(loc2Start+loc2End-loc1Start-loc1End);
             }
             else // we have a bowtie
@@ -114,7 +139,7 @@ double NetEmdSmooth(NumericVector loc1,NumericVector val1,double binWidth1,Numer
             //case1 they is no overlap
             if (loc2End<=loc1End)
             {
-		// difference between two right triangles
+		            // difference between two right triangles
                 res+=(h)*(loc1Start+loc1End-loc2Start-loc2End);
             }
             else // we have a bowtie
@@ -127,12 +152,12 @@ double NetEmdSmooth(NumericVector loc1,NumericVector val1,double binWidth1,Numer
         {
             if (j==val2.size()-1)
             {
-		// we are at the end of both segments
+		            // we are at the end of both segments
                 break;
             }
             else
             {
-	       // at the end of dhist 1 so move dhist2
+	             // at the end of dhist 1 so move dhist2
                j+=1;
                loc2SegStart=loc2[j];
                loc2SegEnd=loc2[j]+binWidth2;
@@ -142,27 +167,27 @@ double NetEmdSmooth(NumericVector loc1,NumericVector val1,double binWidth1,Numer
         }
         else if ((j==val2.size()-1) || (val1[i+1]<val2[j+1]))
         {
-	  // at the end of dhist 2 so move dhist1
-	  // OR
-	  // Next segment in dhist1 is smaller than the next segment in dhist2
-          i+=1;
-           loc1SegStart=loc1[i];
-           loc1SegEnd=loc1[i]+binWidth1;
-           loc1SegValStart=loc1SegValEnd;
-           loc1SegValEnd=val1[i];
+	          // at the end of dhist 2 so move dhist1
+	          // OR
+	          // Next segment in dhist1 is smaller than the next segment in dhist2
+            i+=1;
+            loc1SegStart=loc1[i];
+            loc1SegEnd=loc1[i]+binWidth1;
+            loc1SegValStart=loc1SegValEnd;
+            loc1SegValEnd=val1[i];
         }
         else
         {
-	  // Next segment in dhist2 is smaller than the next segment in dhist1
+	         // Next segment in dhist2 is smaller than the next segment in dhist1
            j+=1;
            loc2SegStart=loc2[j];
            loc2SegEnd=loc2[j]+binWidth2;
            loc2SegValStart=loc2SegValEnd;
            loc2SegValEnd=val2[j];
         }
-	// Move the start of the segment under consideration 
+	      // Move the start of the segment under consideration 
         curStartVal=curEndVal;
-	// Move the start of the x value under consideration 
+	      // Move the start of the x value under consideration 
         loc2Start=loc2End;
         loc1Start=loc1End;
    }
diff --git a/src/fastSmoothV2.cpp b/src/fastSmoothV2.cpp
index 3598c0f5..e61f84d2 100644
--- a/src/fastSmoothV2.cpp
+++ b/src/fastSmoothV2.cpp
@@ -8,7 +8,7 @@
 
 using namespace Rcpp;
 
-inline double bowtie_area(double length, double val1_start, double val1_end,
+double bowtie_area(double length, double val1_start, double val1_end,
                           double val2_start, double val2_end)
 {
   double midPoint = (val1_start - val2_start) /
@@ -34,7 +34,7 @@ inline double bowtie_area(double length, double val1_start, double val1_end,
 
 // Compute the unsigned area between two line segments
 // assumes that val1_end > val1_start and val2_end > val2_start
-inline double get_segment(double start, double end, double val1_start,
+double get_segment(double start, double end, double val1_start,
                           double val1_end, double val2_start, double val2_end)
 {
   const double length = end - start;
@@ -71,7 +71,7 @@ inline double get_segment(double start, double end, double val1_start,
 }
 
 // cut down and compute segment
-inline double get_segment_constrained(double seg1L1, double seg1L2,
+double get_segment_constrained(double seg1L1, double seg1L2,
                                       double seg2L1, double seg2L2,
                                       double seg1V1, double seg1V2,
                                       double seg2V1, double seg2V2)
@@ -127,7 +127,7 @@ double get_double_segment_constrained(
 
 // Dealing with segments which are to the left of the region covered by both
 // segs
-inline double leftmost_segments(const NumericVector& loc1,
+double leftmost_segments(const NumericVector& loc1,
                                 const NumericVector& loc2,
                                 const NumericVector& val1,
                                 const NumericVector& val2,
diff --git a/src/test_emd_fast_smooth.cpp b/src/test_emd_fast_smooth.cpp
new file mode 100644
index 00000000..508be859
--- /dev/null
+++ b/src/test_emd_fast_smooth.cpp
@@ -0,0 +1,202 @@
+/*
+ * This file uses the Catch unit testing library, alongside
+ * testthat's simple bindings, to test a C++ function.
+ *
+ * This file should begin with `test` and be in the  `src/` folder.
+ * `LinkingTo: testthat` must also be within the DESCRIPTION file.
+ */
+// Enable C++11
+// [[Rcpp::plugins(cpp11)]]
+
+// All test files should include the <testthat.h>
+// header file.
+#include "fastSmoothV2.h"
+#include "emd_fast_no_smoothing.h"
+#include <math.h>
+#include <vector>
+#include <testthat.h>
+
+// Helper function to test tolerance
+bool within_toleranceV2(double actual, double expected, double tolerance) {
+  if(actual > expected) {
+    return ((actual - expected) <= tolerance);
+  }
+  else {
+    return ((expected - actual) <= tolerance);
+  }
+}
+
+double simpleSlowArea(double startx,double endx,double starty1,double endy1,double starty2,double endy2)
+{
+        // Making this step size smaller 
+        double step = (endx-startx)/10000000.0;
+        double curX;
+        double curY1;
+        double curY2;
+        double res = 0;
+        for (int i=0;i<10000000;i++)
+        {
+                curX = startx + i*step;
+                curY1 = starty1 +(endy1-starty1)*i/10000000.0;
+                curY2 = starty2 +(endy2-starty2)*i/10000000.0;
+                res += step*std::abs(curY1-curY2);
+        }
+        return res;
+}
+
+
+context("emd_fast_smoothing segment") {
+  test_that("Test Me") {
+    double start=0; 
+    double end=1; 
+    double val1_start=0;
+    double val1_end=1;
+    double val2_start=1; 
+    double val2_end=0;
+    double tempVal1;
+    double tempVal2;
+    tempVal1 = get_segment(start,end, val1_start,val1_end,val2_start,val2_end);
+    tempVal2 = simpleSlowArea(start,end, val1_start,val1_end,val2_start,val2_end);
+    within_toleranceV2(tempVal1,tempVal2,0.0001);
+    val1_start=2;
+    val1_end=1;
+    val2_start=2; 
+    val2_end=0;
+    tempVal1 = get_segment(start,end, val1_start,val1_end,val2_start,val2_end);
+    tempVal2 = simpleSlowArea(start,end, val1_start,val1_end,val2_start,val2_end);
+    within_toleranceV2(tempVal1,tempVal2,0.0001);
+    val1_start=2;
+    val1_end=0;
+    val2_start=2; 
+    val2_end=1;
+    tempVal1 = get_segment(start,end, val1_start,val1_end,val2_start,val2_end);
+    tempVal2 = simpleSlowArea(start,end, val1_start,val1_end,val2_start,val2_end);
+    within_toleranceV2(tempVal1,tempVal2,0.0001);
+    val1_start=0;
+    val1_end=0;
+    val2_start=1; 
+    val2_end=1;
+    tempVal1 = get_segment(start,end, val1_start,val1_end,val2_start,val2_end);
+    tempVal2 = simpleSlowArea(start,end, val1_start,val1_end,val2_start,val2_end);
+    within_toleranceV2(tempVal1,tempVal2,0.0001);
+    val1_start=1; 
+    val1_end=1;
+    val2_start=0;
+    val2_end=0;
+    tempVal1 = get_segment(start,end, val1_start,val1_end,val2_start,val2_end);
+    tempVal2 = simpleSlowArea(start,end, val1_start,val1_end,val2_start,val2_end);
+    within_toleranceV2(tempVal1,tempVal2,0.0001);
+   // for (int startI = 0; startI<10;startI++)
+   // {
+   //     start = (double)startI/10.0;
+   //     for (int endI = startI+1; endI<10;endI++)
+   //     {
+   //         end = (double)endI/10.0;
+   //         for (int val1_startI = 0; val1_startI<10;val1_startI++)
+   //         {
+   //             val1_start = (double)val1_startI/10.0;
+   //             for (int val2_startI = 0; val2_startI<10;val2_startI++)
+   //             {
+   //                 val2_start = (double)val2_startI/10.0;
+   //                 for (int val1_endI = 0; val1_endI<10;val1_endI++)
+   //                 {
+   //                     val1_end = (double)val1_endI/10.0;
+   //                     for (int val2_endI = 0; val2_endI<10;val2_endI++)
+   //                     {
+   //                         val2_end = (double)val2_endI/10.0;
+   //                         tempVal1 = get_segment(start,end, val1_start,val1_end,val2_start,val2_end);
+   //                         tempVal2 = simpleSlowArea(start,end, val1_start,val1_end,val2_start,val2_end);
+   //                         expect_true(tempVal1==tempVal2);
+   //                     }
+   //                 }
+   //             }
+   //         }
+   //     }
+   // }
+
+        
+}
+}
+
+
+// Initialize a unit test context. This is similar to how you
+// might begin an R test file with 'context()', expect the
+// associated context should be wrapped in braced.
+context("emd_fast_no_smoothing v2") {
+  
+  // Test Kahan summation helper function
+  // We also verify that the example data used to test the Kahan summation
+  // results in an unacceptably large error when using naiive summation.
+  test_that("mk2 Kahan summation works when naiive summation fails") {
+    // NOTE: If this test fails when the `add_element_kahan()` function has not
+    // been changed, there is a small chance that it may be that the package is
+    // being compiled with the `-ffast-math` compiler flag set (or an aggressive
+    // optimisation level that sets the fast math flag). If this flag is set, 
+    // the Kahan compensation being optimised away.
+    
+    // Set up suitable test data
+    // =========================
+    // NOTE: It is surprisingly easy to come up with test data where the naiive
+    // summation works fine, so the test to validate that the naiive sum results
+    // in an unnacceptably high error is important. I think this is for the 
+    // following reasons.
+    // (A) If the start number is too large in comparison to the expected sum of
+    //     the elements in the small number vector, then the floating point
+    //     representation of the expected total will be the same as that of the
+    //     start number.
+    // (B) If the start number is too small in comparison to the elements in the
+    //     small number vector, we don't run into the floating point 
+    //     representation issue when we add each element to the running total.
+    // NOTE: Appveyor tests with both 32 and 64 bit builds of R (i386 and ix64 
+    //       respectively). Therefore values have been chosen that all fall
+    //       within a 32-bit floating point range (including the expected total)
+    //       while still causing the naiive summation to fail with 64-bit R.
+    //      
+    // =========================
+    // 1. Define all test data components as powers of a common base to make
+    // it easy to accurately calculate the expected sum without adding any small
+    // numbers together
+    double start_num = 1125899906842624.0; // 2^50 = 1125899906842624
+    double element_value = 0.03125; // 2^-5 = 0.03125
+    double num_elements = 4096; // 2^12 = 4096
+    double expected_total = 1125899906842752.0; // 2^50 + 2^7
+    
+    std::vector<double> input(num_elements, element_value);
+    
+    // Uncomment me if debugging test failure
+    Rcerr << std::fixed << "Num elements: " << num_elements << "; Element value: " 
+          << element_value << "; Exp. total: " << expected_total 
+          << "; Sizeof(double): " << sizeof(double) << "\n";
+    
+    
+    // Define acceptable tolerance
+    double tolerance = element_value;
+
+    // Do naiive and Kahan summation
+    double naiive_total = start_num;
+    double kahan_total = start_num;
+    double kahan_compensation = 0.0;
+    for(auto const& value: input) {
+      naiive_total += value;
+      add_element_kahan(kahan_total, value, kahan_compensation);
+    }
+    
+    double naiive_diff = (expected_total - naiive_total);
+    double kahan_diff = (expected_total - kahan_total);
+    
+    // Uncomment me if debugging test failure
+    Rcerr << std::fixed << "Expected: " << expected_total << "; " << "Naiive: " 
+          << naiive_total
+          << "; Kahan: " << kahan_total << "; Naiive diff: " << naiive_diff
+          << "; Kahan diff: " << kahan_diff << "; Tolerance: " << tolerance 
+          << "\n";
+    
+    
+    // Check naiive summation has unacceptable error
+    expect_false(within_toleranceV2(naiive_total, expected_total, tolerance));
+    // Check Kahan summation has acceptable error
+    expect_true(within_toleranceV2(kahan_total, expected_total, tolerance));
+    
+  }
+  
+}

From a39892029127b01568a2bf5721e8376a8cf28986 Mon Sep 17 00:00:00 2001
From: Oliver Strickson <ostrickson@turing.ac.uk>
Date: Mon, 20 Apr 2020 09:42:34 +0100
Subject: [PATCH 39/84] Iterable class to interleave two sequences in the
 required way

---
 src/interleaved_iterator.h | 159 +++++++++++++++++++++++++++++++++++++
 1 file changed, 159 insertions(+)
 create mode 100644 src/interleaved_iterator.h

diff --git a/src/interleaved_iterator.h b/src/interleaved_iterator.h
new file mode 100644
index 00000000..d81e205b
--- /dev/null
+++ b/src/interleaved_iterator.h
@@ -0,0 +1,159 @@
+#ifndef INTERLEAVED_ITERATOR_H
+#define INTERLEAVED_ITERATOR_H
+
+#include <cstddef>
+#include <utility>
+#include <iterator>
+#include <vector>
+#include <iostream>
+
+// Interleave two containers of ordered elements in a particular way.
+//
+// Supports iteration with Interleave::iterator
+template <typename Container>
+class Interleaved {
+  // short alias for the value type of the container
+  typedef typename Container::value_type CvalT;
+  typedef long int IndexT;
+
+  Container& xs;
+  Container& ys;
+
+public:
+  Interleaved(Container& xs_, Container& ys_) : xs(xs_), ys(ys_) { }
+  
+  class iterator {
+    const Interleaved * const xys;
+
+    IndexT Nx, Ny;
+
+    // the minimum and maximum contained values;
+    CvalT minloc, maxloc;
+
+    // the current iteration state
+    std::pair<IndexT, IndexT> idx;
+
+  public:
+    typedef std::pair<IndexT, IndexT> value_type;
+    typedef void difference_type;
+    typedef value_type* pointer;
+    typedef value_type& reference;
+    typedef std::input_iterator_tag iterator_category;
+          
+    explicit iterator(Interleaved *xys_)
+      : xys(xys_), Nx(xys_->xs.size()), Ny(xys_->ys.size())
+    {
+      //// this slightly simpler version assumes both xs and ys are
+      //// nonempty:
+      
+      // if (xys->xs[0] <= xys->ys[0]) {
+      //   minloc = xys->xs[0];
+      //   idx.first = 0;
+      //   idx.second = -1;
+      // }
+      // else {
+      //   minloc = xys->ys[0];
+      //   idx.first = -1;
+      //   idx.second = 0;
+      // }
+      // maxloc = std::max(xys->xs.back(), xys->ys.back());
+
+      //// no such assumption:
+      
+      if (Nx != 0 && Ny != 0) {
+        maxloc = std::max(xys->xs.back(), xys->ys.back());
+      }
+      else if (Nx != 0) {
+        maxloc = xys->xs.back();
+      }
+      else if (Ny != 0) {
+        maxloc = xys->ys.back();
+      }
+      else {
+        maxloc = 0;
+        minloc = 0;
+        idx.first = -1;
+        idx.second = -1;
+      }
+      
+      if (Nx != 0 && (Ny == 0 || Ny != 0 && xys->xs[0] <= xys->ys[0])) {
+        minloc = xys->xs[0];
+        idx.first = 0;
+        idx.second = -1;
+      }
+      else if (Ny != 0) {
+        minloc = xys->ys[0];
+        idx.first = -1;
+        idx.second = 0;
+      }
+    }
+
+    CvalT get_x(IndexT i) const {
+      if (i < 0) return minloc;
+      else if (i >= Nx) return maxloc;
+      else return xys->xs[i];
+    }
+
+    CvalT get_y(IndexT i) const {
+      if (i < 0) return minloc;
+      else if (i >= Ny) return maxloc;
+      else return xys->ys[i];
+    }
+    
+    std::pair<CvalT, CvalT> get_loc(const std::pair<IndexT, IndexT>& i) const {
+      CvalT x(get_x(i.first));
+      CvalT y(get_y(i.second));
+
+      return std::make_pair(x, y);
+    }
+
+    std::pair<CvalT, CvalT> get_current_loc() const {
+      return get_loc(idx);
+    }
+
+    iterator& advance_to_end() {
+      idx.first = Nx;
+      idx.second = Ny;
+      return *this;
+    }
+
+    value_type operator*() const {
+      return idx;
+    }
+
+    const value_type *operator->() const {
+      return &idx;
+    }
+
+    iterator& operator++() {
+      std::pair<CvalT, CvalT> loc(get_current_loc());
+      while (idx.second < Ny && get_y(++idx.second) < loc.first) { }
+
+      if (get_y(idx.second + 1) > get_x(idx.first + 1) || idx.second >= Ny) {
+        if (idx.first < Nx) idx.first++;
+      } else {
+        if (idx.second < Ny) idx.second++;
+      }
+      
+    }
+
+    iterator& operator++(int) {
+      iterator res = *this;
+      operator++();
+      return res;
+    }
+
+    friend bool operator==(const iterator& lhs, const iterator& rhs) {
+      return lhs.idx == rhs.idx;
+    }
+
+    friend bool operator!=(const iterator& lhs, const iterator& rhs) {
+      return !(lhs == rhs);
+    }
+  };
+
+  iterator begin() { return iterator(this); };
+  iterator end() { return iterator(this).advance_to_end(); };
+};
+
+#endif // INTERLEAVED_ITERATOR_H

From c302c43627f24de8f7d3e906f1302b29e3dbaa21 Mon Sep 17 00:00:00 2001
From: ande <ande.elliott@gmail.com>
Date: Tue, 21 Apr 2020 09:19:29 +0100
Subject: [PATCH 40/84] added fast smooth header

---
 src/fastSmoothV2.h | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)
 create mode 100644 src/fastSmoothV2.h

diff --git a/src/fastSmoothV2.h b/src/fastSmoothV2.h
new file mode 100644
index 00000000..d822d846
--- /dev/null
+++ b/src/fastSmoothV2.h
@@ -0,0 +1,34 @@
+// Enable C++11
+// [[Rcpp::plugins(cpp11)]]
+#include <Rcpp.h>
+#include <unordered_set>
+#include <algorithm>
+#include <numeric>
+#include <math.h>
+
+using namespace Rcpp;
+
+inline double bowtie_area(double length, double val1_start, double val1_end,
+                          double val2_start, double val2_end);
+inline double get_segment(double start, double end, double val1_start,
+                          double val1_end, double val2_start, double val2_end);
+inline double get_segment_constrained(double seg1L1, double seg1L2,
+                                      double seg2L1, double seg2L2,
+                                      double seg1V1, double seg1V2,
+                                      double seg2V1, double seg2V2);
+
+double get_double_segment_constrained(
+  double seg1Loc1, double seg1Loc2, double seg1Loc3,
+  double seg1Val1, double seg1Val2,
+  double seg2Loc1, double seg2Loc2, double seg2Loc3,
+  double seg2Val1, double seg2Val2);
+
+inline double leftmost_segments(const NumericVector& loc1,
+                                const NumericVector& loc2,
+                                const NumericVector& val1,
+                                const NumericVector& val2,
+                                double binWidth1,
+                                double maxLoc);
+
+double NetEmdSmoothV2(NumericVector loc1, NumericVector val1, double binWidth1,
+                      NumericVector loc2, NumericVector val2, double binWidth2);

From 89bba30534af8a291d8dac5ba8d006072bd2ca50 Mon Sep 17 00:00:00 2001
From: ande <ande.elliott@gmail.com>
Date: Tue, 21 Apr 2020 10:55:32 +0100
Subject: [PATCH 41/84] updated tests

---
 src/fastSmoothV2.cpp         |   5 +-
 src/fastSmoothV2.h           |   6 +-
 src/test_emd_fast_smooth.cpp | 217 +++++++++++------------------------
 3 files changed, 72 insertions(+), 156 deletions(-)

diff --git a/src/fastSmoothV2.cpp b/src/fastSmoothV2.cpp
index e61f84d2..1b544a2d 100644
--- a/src/fastSmoothV2.cpp
+++ b/src/fastSmoothV2.cpp
@@ -79,6 +79,7 @@ double get_segment_constrained(double seg1L1, double seg1L2,
   //We have a valid range
   double valStart1, valEnd1, valStart2, valEnd2;
   double start,end;
+  double result;
   start = std::max(seg1L1,seg2L1);
   end   = std::min(seg1L2,seg2L2);
   if (start < end) {
@@ -86,8 +87,8 @@ double get_segment_constrained(double seg1L1, double seg1L2,
     valEnd1   = seg1V1 + (seg1V2 - seg1V1)*(end   - seg1L1)/(seg1L2 - seg1L1);
     valStart2 = seg2V1 + (seg2V2 - seg2V1)*(start - seg2L1)/(seg2L2 - seg2L1);
     valEnd2   = seg2V1 + (seg2V2 - seg2V1)*(end   - seg2L1)/(seg2L2 - seg2L1);
-
-    return get_segment(start, end, valStart1, valEnd1, valStart2, valEnd2);
+    result = get_segment(start, end, valStart1, valEnd1, valStart2, valEnd2);
+    return result;
   }
   else {
     return 0;
diff --git a/src/fastSmoothV2.h b/src/fastSmoothV2.h
index d822d846..ac3b1b11 100644
--- a/src/fastSmoothV2.h
+++ b/src/fastSmoothV2.h
@@ -8,11 +8,11 @@
 
 using namespace Rcpp;
 
-inline double bowtie_area(double length, double val1_start, double val1_end,
+double bowtie_area(double length, double val1_start, double val1_end,
                           double val2_start, double val2_end);
-inline double get_segment(double start, double end, double val1_start,
+double get_segment(double start, double end, double val1_start,
                           double val1_end, double val2_start, double val2_end);
-inline double get_segment_constrained(double seg1L1, double seg1L2,
+double get_segment_constrained(double seg1L1, double seg1L2,
                                       double seg2L1, double seg2L2,
                                       double seg1V1, double seg1V2,
                                       double seg2V1, double seg2V2);
diff --git a/src/test_emd_fast_smooth.cpp b/src/test_emd_fast_smooth.cpp
index 508be859..798e0c84 100644
--- a/src/test_emd_fast_smooth.cpp
+++ b/src/test_emd_fast_smooth.cpp
@@ -29,174 +29,89 @@ bool within_toleranceV2(double actual, double expected, double tolerance) {
 double simpleSlowArea(double startx,double endx,double starty1,double endy1,double starty2,double endy2)
 {
         // Making this step size smaller 
-        double step = (endx-startx)/10000000.0;
+        double step = (endx-startx)/100000000.0;
         double curX;
         double curY1;
         double curY2;
         double res = 0;
-        for (int i=0;i<10000000;i++)
+        for (int i=0;i<100000000;i++)
         {
                 curX = startx + i*step;
-                curY1 = starty1 +(endy1-starty1)*i/10000000.0;
-                curY2 = starty2 +(endy2-starty2)*i/10000000.0;
+                curY1 = starty1 +(endy1-starty1)*i/100000000.0;
+                curY2 = starty2 +(endy2-starty2)*i/100000000.0;
                 res += step*std::abs(curY1-curY2);
         }
         return res;
 }
 
-
-context("emd_fast_smoothing segment") {
-  test_that("Test Me") {
-    double start=0; 
-    double end=1; 
-    double val1_start=0;
-    double val1_end=1;
-    double val2_start=1; 
-    double val2_end=0;
+void runSegmentConstraintTest(double start,double end,double val1_start,double val1_end,double val2_start,double val2_end)
+{
     double tempVal1;
     double tempVal2;
-    tempVal1 = get_segment(start,end, val1_start,val1_end,val2_start,val2_end);
+    tempVal1 = get_segment_constrained(start,end,start,end,val1_start,val1_end,val2_start,val2_end);
     tempVal2 = simpleSlowArea(start,end, val1_start,val1_end,val2_start,val2_end);
-    within_toleranceV2(tempVal1,tempVal2,0.0001);
-    val1_start=2;
-    val1_end=1;
-    val2_start=2; 
-    val2_end=0;
-    tempVal1 = get_segment(start,end, val1_start,val1_end,val2_start,val2_end);
-    tempVal2 = simpleSlowArea(start,end, val1_start,val1_end,val2_start,val2_end);
-    within_toleranceV2(tempVal1,tempVal2,0.0001);
-    val1_start=2;
-    val1_end=0;
-    val2_start=2; 
-    val2_end=1;
-    tempVal1 = get_segment(start,end, val1_start,val1_end,val2_start,val2_end);
-    tempVal2 = simpleSlowArea(start,end, val1_start,val1_end,val2_start,val2_end);
-    within_toleranceV2(tempVal1,tempVal2,0.0001);
-    val1_start=0;
-    val1_end=0;
-    val2_start=1; 
-    val2_end=1;
-    tempVal1 = get_segment(start,end, val1_start,val1_end,val2_start,val2_end);
-    tempVal2 = simpleSlowArea(start,end, val1_start,val1_end,val2_start,val2_end);
-    within_toleranceV2(tempVal1,tempVal2,0.0001);
-    val1_start=1; 
-    val1_end=1;
-    val2_start=0;
-    val2_end=0;
+    std::cout << "\n";
+    std::cout << "segment constrained " << tempVal1 << " simpleResult " << tempVal2 << "\n";
+    expect_true(within_toleranceV2(tempVal1,tempVal2,0.0001));
+}
+
+
+void runSegmentTest(double start,double end,double val1_start,double val1_end,double val2_start,double val2_end)
+{
+    double tempVal1;
+    double tempVal2;
     tempVal1 = get_segment(start,end, val1_start,val1_end,val2_start,val2_end);
     tempVal2 = simpleSlowArea(start,end, val1_start,val1_end,val2_start,val2_end);
-    within_toleranceV2(tempVal1,tempVal2,0.0001);
-   // for (int startI = 0; startI<10;startI++)
-   // {
-   //     start = (double)startI/10.0;
-   //     for (int endI = startI+1; endI<10;endI++)
-   //     {
-   //         end = (double)endI/10.0;
-   //         for (int val1_startI = 0; val1_startI<10;val1_startI++)
-   //         {
-   //             val1_start = (double)val1_startI/10.0;
-   //             for (int val2_startI = 0; val2_startI<10;val2_startI++)
-   //             {
-   //                 val2_start = (double)val2_startI/10.0;
-   //                 for (int val1_endI = 0; val1_endI<10;val1_endI++)
-   //                 {
-   //                     val1_end = (double)val1_endI/10.0;
-   //                     for (int val2_endI = 0; val2_endI<10;val2_endI++)
-   //                     {
-   //                         val2_end = (double)val2_endI/10.0;
-   //                         tempVal1 = get_segment(start,end, val1_start,val1_end,val2_start,val2_end);
-   //                         tempVal2 = simpleSlowArea(start,end, val1_start,val1_end,val2_start,val2_end);
-   //                         expect_true(tempVal1==tempVal2);
-   //                     }
-   //                 }
-   //             }
-   //         }
-   //     }
-   // }
-
-        
-}
+    std::cout << "\n";
+    std::cout << "segment test " << tempVal1 << " simpleResult " << tempVal2 << "\n";
+    expect_true(within_toleranceV2(tempVal1,tempVal2,0.0001));
 }
 
 
-// Initialize a unit test context. This is similar to how you
-// might begin an R test file with 'context()', expect the
-// associated context should be wrapped in braced.
-context("emd_fast_no_smoothing v2") {
-  
-  // Test Kahan summation helper function
-  // We also verify that the example data used to test the Kahan summation
-  // results in an unacceptably large error when using naiive summation.
-  test_that("mk2 Kahan summation works when naiive summation fails") {
-    // NOTE: If this test fails when the `add_element_kahan()` function has not
-    // been changed, there is a small chance that it may be that the package is
-    // being compiled with the `-ffast-math` compiler flag set (or an aggressive
-    // optimisation level that sets the fast math flag). If this flag is set, 
-    // the Kahan compensation being optimised away.
-    
-    // Set up suitable test data
-    // =========================
-    // NOTE: It is surprisingly easy to come up with test data where the naiive
-    // summation works fine, so the test to validate that the naiive sum results
-    // in an unnacceptably high error is important. I think this is for the 
-    // following reasons.
-    // (A) If the start number is too large in comparison to the expected sum of
-    //     the elements in the small number vector, then the floating point
-    //     representation of the expected total will be the same as that of the
-    //     start number.
-    // (B) If the start number is too small in comparison to the elements in the
-    //     small number vector, we don't run into the floating point 
-    //     representation issue when we add each element to the running total.
-    // NOTE: Appveyor tests with both 32 and 64 bit builds of R (i386 and ix64 
-    //       respectively). Therefore values have been chosen that all fall
-    //       within a 32-bit floating point range (including the expected total)
-    //       while still causing the naiive summation to fail with 64-bit R.
-    //      
-    // =========================
-    // 1. Define all test data components as powers of a common base to make
-    // it easy to accurately calculate the expected sum without adding any small
-    // numbers together
-    double start_num = 1125899906842624.0; // 2^50 = 1125899906842624
-    double element_value = 0.03125; // 2^-5 = 0.03125
-    double num_elements = 4096; // 2^12 = 4096
-    double expected_total = 1125899906842752.0; // 2^50 + 2^7
-    
-    std::vector<double> input(num_elements, element_value);
-    
-    // Uncomment me if debugging test failure
-    Rcerr << std::fixed << "Num elements: " << num_elements << "; Element value: " 
-          << element_value << "; Exp. total: " << expected_total 
-          << "; Sizeof(double): " << sizeof(double) << "\n";
-    
-    
-    // Define acceptable tolerance
-    double tolerance = element_value;
+context("emd_fast_smoothing segment constrain simple") {
+  test_that("emd_fast_smoothing segment constrain simple") {
+    // Two upward linear segments
+    runSegmentConstraintTest(0.0,1.0,0.0,1.0,0.0,1.0);
+    // One upward one down linear segments
+    runSegmentConstraintTest(0.0,1.0,0.0,1.0,1.0,0.0);
+    runSegmentConstraintTest(0.0,1.0,1.0,0.0,0.0,1.0);
+    // Two down linear segments
+    runSegmentConstraintTest(0.0,1.0,1.0,0.0,1.0,0.0);
+    // One flat one up segments
+    runSegmentConstraintTest(0.0,1.0,0.0,0.0,0.0,1.0);
+    runSegmentConstraintTest(0.0,1.0,0.0,1.0,0.0,0.0);
+    // One flat one down segments
+    runSegmentConstraintTest(0.0,1.0,1.0,0.0,0.0,0.0);
+    runSegmentConstraintTest(0.0,1.0,0.0,0.0,1.0,0.0);
+    // Different gradients segments
+    runSegmentConstraintTest(0.0,1.0,0.0,3.0,0.0,0.0);
+    runSegmentConstraintTest(0.0,1.0,0.0,0.0,0.0,3.0);
+    // Different gradients segments
+    runSegmentConstraintTest(0.0,1.0,2.0,4.0,1.0,2.0);
+    runSegmentConstraintTest(0.0,1.0,1.0,2.0,2.0,3.0);
+}}
+
+context("emd_fast_smoothing segment full") {
+  test_that("emd_fast_smoothing segment full") {
+    // Two upward linear segments
+    runSegmentTest(0.0,1.0,0.0,1.0,0.0,1.0);
+    // One upward one down linear segments
+    runSegmentTest(0.0,1.0,0.0,1.0,1.0,0.0);
+    runSegmentTest(0.0,1.0,1.0,0.0,0.0,1.0);
+    // Two down linear segments
+    runSegmentTest(0.0,1.0,1.0,0.0,1.0,0.0);
+    // One flat one up segments
+    runSegmentTest(0.0,1.0,0.0,0.0,0.0,1.0);
+    runSegmentTest(0.0,1.0,0.0,1.0,0.0,0.0);
+    // One flat one down segments
+    runSegmentTest(0.0,1.0,1.0,0.0,0.0,0.0);
+    runSegmentTest(0.0,1.0,0.0,0.0,1.0,0.0);
+    // Different gradients segments
+    runSegmentTest(0.0,1.0,0.0,3.0,0.0,0.0);
+    runSegmentTest(0.0,1.0,0.0,0.0,0.0,3.0);
+    // Different gradients segments
+    runSegmentTest(0.0,1.0,2.0,4.0,1.0,2.0);
+    runSegmentTest(0.0,1.0,1.0,2.0,2.0,3.0);
+}}
+
 
-    // Do naiive and Kahan summation
-    double naiive_total = start_num;
-    double kahan_total = start_num;
-    double kahan_compensation = 0.0;
-    for(auto const& value: input) {
-      naiive_total += value;
-      add_element_kahan(kahan_total, value, kahan_compensation);
-    }
-    
-    double naiive_diff = (expected_total - naiive_total);
-    double kahan_diff = (expected_total - kahan_total);
-    
-    // Uncomment me if debugging test failure
-    Rcerr << std::fixed << "Expected: " << expected_total << "; " << "Naiive: " 
-          << naiive_total
-          << "; Kahan: " << kahan_total << "; Naiive diff: " << naiive_diff
-          << "; Kahan diff: " << kahan_diff << "; Tolerance: " << tolerance 
-          << "\n";
-    
-    
-    // Check naiive summation has unacceptable error
-    expect_false(within_toleranceV2(naiive_total, expected_total, tolerance));
-    // Check Kahan summation has acceptable error
-    expect_true(within_toleranceV2(kahan_total, expected_total, tolerance));
-    
-  }
-  
-}

From 54363104d9efcacd444ce51b00148c4c83eaea45 Mon Sep 17 00:00:00 2001
From: ande <ande.elliott@gmail.com>
Date: Tue, 21 Apr 2020 16:27:19 +0100
Subject: [PATCH 42/84] remove browser

---
 R/emd.R | 1 -
 1 file changed, 1 deletion(-)

diff --git a/R/emd.R b/R/emd.R
index e9eee6f3..95d4fcc4 100644
--- a/R/emd.R
+++ b/R/emd.R
@@ -167,7 +167,6 @@ min_emd_optimise <- function(dhist1, dhist2) {
   soln <- stats::optimise(emd_offset, lower = min_offset, upper = max_offset, 
                           tol = .Machine$double.eps*1000)
   
-  browser()
   # Return mnimum EMD and associated offset
   min_emd <- soln$objective
   min_offset <- soln$minimum

From 5c4dd42d93033567ab61ff6487a909589870ed3b Mon Sep 17 00:00:00 2001
From: Oliver Strickson <ostrickson@turing.ac.uk>
Date: Wed, 22 Apr 2020 14:23:14 +0100
Subject: [PATCH 43/84] Iterate over overlapping intervals

---
 src/interleaved_iterator.h   | 197 +++++++++++++++++++++--------------
 src/test_emd_fast_smooth.cpp | 152 +++++++++++++++++++++------
 2 files changed, 243 insertions(+), 106 deletions(-)

diff --git a/src/interleaved_iterator.h b/src/interleaved_iterator.h
index d81e205b..7451a9fb 100644
--- a/src/interleaved_iterator.h
+++ b/src/interleaved_iterator.h
@@ -1,29 +1,48 @@
-#ifndef INTERLEAVED_ITERATOR_H
-#define INTERLEAVED_ITERATOR_H
+#ifndef OVERLAPPING_SEGMENTS_H
+#define OVERLAPPING_SEGMENTS_H
 
 #include <cstddef>
+#include <cassert>
 #include <utility>
 #include <iterator>
 #include <vector>
 #include <iostream>
+#include <stdexcept>
+#include <limits>
 
-// Interleave two containers of ordered elements in a particular way.
+// Class used to support iteration (with OverlappingSegments::iterator)
+// over pairs of overlapping segments within two sequences.
 //
-// Supports iteration with Interleave::iterator
 template <typename Container>
-class Interleaved {
+class OverlappingSegments {
   // short alias for the value type of the container
-  typedef typename Container::value_type CvalT;
+  typedef typename std::remove_reference<typename Container::value_type>::type CvalT;
   typedef long int IndexT;
 
-  Container& xs;
-  Container& ys;
+  // references the two sequences
+  const Container& loc1;
+  const Container& loc2;
 
 public:
-  Interleaved(Container& xs_, Container& ys_) : xs(xs_), ys(ys_) { }
-  
+  OverlappingSegments(Container& loc1_, Container& loc2_)
+    : loc1(loc1_), loc2(loc2_) {
+    // check the requirement that loc1 and loc2 are nonempty and
+    // (strictly) sorted
+
+    if (loc1.size() == 0 || loc2.size() == 0)
+      throw std::invalid_argument("Input vectors must be nonempty");
+    
+    for (int i = 0; i < loc1.size() - 1; i++)
+      if (loc1[i] > loc1[i + 1])
+        throw std::invalid_argument("Input vectors must be sorted in strict ascending order");
+
+    for (int i = 0; i < loc2.size() - 1; i++)
+      if (loc2[i] > loc2[i + 1])
+        throw std::invalid_argument("Input vectors must be sorted in strict ascending order");
+  }
+
   class iterator {
-    const Interleaved * const xys;
+    const OverlappingSegments * const segs;
 
     IndexT Nx, Ny;
 
@@ -39,102 +58,126 @@ class Interleaved {
     typedef value_type* pointer;
     typedef value_type& reference;
     typedef std::input_iterator_tag iterator_category;
-          
-    explicit iterator(Interleaved *xys_)
-      : xys(xys_), Nx(xys_->xs.size()), Ny(xys_->ys.size())
+
+    // Iterate over pairs of indices (i,j) into the sequences loc1 and
+    // loc2, where the intervals [loc1[i], loc1[i+1]] and [loc2[j],
+    // loc2[j+1]] overlap.
+    //
+    // These indices are returned from the iterator as
+    //   std::pair<long, long>.
+    //
+    // A sequence has an implicit segment from minloc (with index -1)
+    // to its zeroth element.  The elements loc1[0] and loc2[0] are
+    // compared to determine whether, for either sequence, this
+    // initial implicit segment overlaps the zeroth segment of the
+    // other one.  If both sequences start with the same value, the
+    // iteration starts at (0,0).
+    //
+    explicit iterator(OverlappingSegments *segs_)
+      : segs(segs_), Nx(segs_->loc1.size()), Ny(segs_->loc2.size())
     {
-      //// this slightly simpler version assumes both xs and ys are
-      //// nonempty:
-      
-      // if (xys->xs[0] <= xys->ys[0]) {
-      //   minloc = xys->xs[0];
-      //   idx.first = 0;
-      //   idx.second = -1;
-      // }
-      // else {
-      //   minloc = xys->ys[0];
-      //   idx.first = -1;
-      //   idx.second = 0;
-      // }
-      // maxloc = std::max(xys->xs.back(), xys->ys.back());
-
-      //// no such assumption:
+      // The initial state of the iterator
       
-      if (Nx != 0 && Ny != 0) {
-        maxloc = std::max(xys->xs.back(), xys->ys.back());
-      }
-      else if (Nx != 0) {
-        maxloc = xys->xs.back();
-      }
-      else if (Ny != 0) {
-        maxloc = xys->ys.back();
-      }
-      else {
-        maxloc = 0;
-        minloc = 0;
-        idx.first = -1;
+      if (segs->loc1[0] < segs->loc2[0]) {
+        minloc = segs->loc1[0];
+        idx.first = 0;
         idx.second = -1;
       }
-      
-      if (Nx != 0 && (Ny == 0 || Ny != 0 && xys->xs[0] <= xys->ys[0])) {
-        minloc = xys->xs[0];
+      else if (segs->loc1[0] == segs->loc2[0]) {
+        minloc = segs->loc1[0]; // == segs->loc2[0]
         idx.first = 0;
-        idx.second = -1;
+        idx.second = 0;
       }
-      else if (Ny != 0) {
-        minloc = xys->ys[0];
+      else {
+        minloc = segs->loc2[0];
         idx.first = -1;
         idx.second = 0;
       }
+
+      // maxloc is used to signal the end of the current segment:
+      // since we consider only left-hand endpoints, the only
+      // requirement is that it compares greater than any real loc in
+      // either sequence - in reality it will be the largest loc plus
+      // the corresponding binwidth
+      maxloc = std::numeric_limits<double>::infinity();
     }
 
-    CvalT get_x(IndexT i) const {
+    CvalT get_loc1(IndexT i) const {
       if (i < 0) return minloc;
       else if (i >= Nx) return maxloc;
-      else return xys->xs[i];
+      else return segs->loc1[i];
     }
 
-    CvalT get_y(IndexT i) const {
+    CvalT get_loc2(IndexT i) const {
       if (i < 0) return minloc;
       else if (i >= Ny) return maxloc;
-      else return xys->ys[i];
+      else return segs->loc2[i];
     }
-    
-    std::pair<CvalT, CvalT> get_loc(const std::pair<IndexT, IndexT>& i) const {
-      CvalT x(get_x(i.first));
-      CvalT y(get_y(i.second));
 
-      return std::make_pair(x, y);
+    // Does interval i (from the first collection of segments) overlap
+    // interval j (from the second)?
+    bool intervals_overlap(IndexT i, IndexT j) const {
+      return (get_loc1(i) < get_loc2(j + 1) && get_loc2(j) < get_loc1(i + 1));
     }
 
-    std::pair<CvalT, CvalT> get_current_loc() const {
-      return get_loc(idx);
-    }
+    bool at_end() const { return idx.first == Nx && idx.second == Ny - 1; }
 
     iterator& advance_to_end() {
       idx.first = Nx;
-      idx.second = Ny;
+      idx.second = Ny - 1;
       return *this;
     }
+    
+    value_type operator*() const { return idx; }
 
-    value_type operator*() const {
-      return idx;
-    }
-
-    const value_type *operator->() const {
-      return &idx;
-    }
+    const value_type *operator->() const { return &idx; }
 
     iterator& operator++() {
-      std::pair<CvalT, CvalT> loc(get_current_loc());
-      while (idx.second < Ny && get_y(++idx.second) < loc.first) { }
 
-      if (get_y(idx.second + 1) > get_x(idx.first + 1) || idx.second >= Ny) {
-        if (idx.first < Nx) idx.first++;
-      } else {
-        if (idx.second < Ny) idx.second++;
+#if !NDEBUG
+      // Verify precondition
+      if (!intervals_overlap(idx.first, idx.second)) {
+        throw std::logic_error("Iterator precondition not satisfied: "
+                               "current intervals do not overlap");
       }
-      
+#endif
+
+      // Advance the second segment if it would still overlap the first
+      // 
+      // The condition below is equivalent to
+      //   idx.second < Ny - 1 && intervals_overlap(idx.first, idx.second + 1)
+      //
+      // For the right-hand condition, we know that (by precondition)
+      //   get_loc1(idx.first) < get_loc2(idx.second + 1)
+      //
+      // and therefore that
+      //   get_loc1(idx.first) < get_loc2(idx.second + 2),
+      //
+      // so this inequality is all that remains to be checked.
+      //
+      if (idx.second < Ny - 1
+          && get_loc2(idx.second + 1) < get_loc1(idx.first + 1)) {
+        idx.second++;
+      }
+      // Could not advance the second segment above: advance the first instead,
+      // and the second as well if they share an endpoint
+      else {
+        if (idx.second < Ny - 1
+            && get_loc1(idx.first + 1) == get_loc2(idx.second + 1)) {
+          idx.second++;
+        }
+        idx.first++;
+      }
+
+#if !NDEBUG
+      // Verify postcondition
+      if (!(at_end() || intervals_overlap(idx.first, idx.second))) {
+        throw std::logic_error("Iterator postcondition not satisfied: "
+                               "current intervals do not overlap (not at end)");
+      }
+#endif
+
+      return *this;
     }
 
     iterator& operator++(int) {
@@ -156,4 +199,4 @@ class Interleaved {
   iterator end() { return iterator(this).advance_to_end(); };
 };
 
-#endif // INTERLEAVED_ITERATOR_H
+#endif // OVERLAPPING_SEGMENTS_H
diff --git a/src/test_emd_fast_smooth.cpp b/src/test_emd_fast_smooth.cpp
index 798e0c84..ac0a65b7 100644
--- a/src/test_emd_fast_smooth.cpp
+++ b/src/test_emd_fast_smooth.cpp
@@ -12,6 +12,8 @@
 // header file.
 #include "fastSmoothV2.h"
 #include "emd_fast_no_smoothing.h"
+#include "interleaved_iterator.h"
+
 #include <math.h>
 #include <vector>
 #include <testthat.h>
@@ -28,46 +30,45 @@ bool within_toleranceV2(double actual, double expected, double tolerance) {
 
 double simpleSlowArea(double startx,double endx,double starty1,double endy1,double starty2,double endy2)
 {
-        // Making this step size smaller 
-        double step = (endx-startx)/100000000.0;
-        double curX;
-        double curY1;
-        double curY2;
-        double res = 0;
-        for (int i=0;i<100000000;i++)
-        {
-                curX = startx + i*step;
-                curY1 = starty1 +(endy1-starty1)*i/100000000.0;
-                curY2 = starty2 +(endy2-starty2)*i/100000000.0;
-                res += step*std::abs(curY1-curY2);
-        }
-        return res;
+  // Making this step size smaller
+  double step = (endx-startx)/100000000.0;
+  double curX;
+  double curY1;
+  double curY2;
+  double res = 0;
+  for (int i=0;i<100000000;i++)
+  {
+    curX = startx + i*step;
+    curY1 = starty1 +(endy1-starty1)*i/100000000.0;
+    curY2 = starty2 +(endy2-starty2)*i/100000000.0;
+    res += step*std::abs(curY1-curY2);
+  }
+  return res;
 }
 
 void runSegmentConstraintTest(double start,double end,double val1_start,double val1_end,double val2_start,double val2_end)
 {
-    double tempVal1;
-    double tempVal2;
-    tempVal1 = get_segment_constrained(start,end,start,end,val1_start,val1_end,val2_start,val2_end);
-    tempVal2 = simpleSlowArea(start,end, val1_start,val1_end,val2_start,val2_end);
-    std::cout << "\n";
-    std::cout << "segment constrained " << tempVal1 << " simpleResult " << tempVal2 << "\n";
-    expect_true(within_toleranceV2(tempVal1,tempVal2,0.0001));
+  double tempVal1;
+  double tempVal2;
+  tempVal1 = get_segment_constrained(start,end,start,end,val1_start,val1_end,val2_start,val2_end);
+  tempVal2 = simpleSlowArea(start,end, val1_start,val1_end,val2_start,val2_end);
+  std::cout << "\n";
+  std::cout << "segment constrained " << tempVal1 << " simpleResult " << tempVal2 << "\n";
+  expect_true(within_toleranceV2(tempVal1,tempVal2,0.0001));
 }
 
 
 void runSegmentTest(double start,double end,double val1_start,double val1_end,double val2_start,double val2_end)
 {
-    double tempVal1;
-    double tempVal2;
-    tempVal1 = get_segment(start,end, val1_start,val1_end,val2_start,val2_end);
-    tempVal2 = simpleSlowArea(start,end, val1_start,val1_end,val2_start,val2_end);
-    std::cout << "\n";
-    std::cout << "segment test " << tempVal1 << " simpleResult " << tempVal2 << "\n";
-    expect_true(within_toleranceV2(tempVal1,tempVal2,0.0001));
+  double tempVal1;
+  double tempVal2;
+  tempVal1 = get_segment(start,end, val1_start,val1_end,val2_start,val2_end);
+  tempVal2 = simpleSlowArea(start,end, val1_start,val1_end,val2_start,val2_end);
+  std::cout << "\n";
+  std::cout << "segment test " << tempVal1 << " simpleResult " << tempVal2 << "\n";
+  expect_true(within_toleranceV2(tempVal1,tempVal2,0.0001));
 }
 
-
 context("emd_fast_smoothing segment constrain simple") {
   test_that("emd_fast_smoothing segment constrain simple") {
     // Two upward linear segments
@@ -114,4 +115,97 @@ context("emd_fast_smoothing segment full") {
     runSegmentTest(0.0,1.0,1.0,2.0,2.0,3.0);
 }}
 
+template <typename Container1T, typename Container2T>
+void runIntervalOverlapTest(Container1T& actual, Container2T& expected)
+{
+  std::cout << "Left endpoints of overlapping intervals:" << std::endl;
+  for (std::pair<long, long> p : actual)
+    std::cout << p.first << ", " << p.second << std::endl;
+
+  std::cout << "Expected:" << std::endl;
+  for (std::pair<long, long> p : expected)
+    std::cout << p.first << ", " << p.second << std::endl;
+
+  bool result = std::equal(actual.begin(), actual.end(), expected.begin());
+  
+  std::cout << "Same? " << std::boolalpha << result << std::endl;
+  std::cout << "~~~~~~~~~~\n";
+  
+  expect_true(result);
+}
+
+context("emd_fast_smooth overlapping interval iterator") {
+  test_that("emd_fast_smooth overlapping interval iterator") {
+    {
+      std::vector<double> xs {1.0, 3.0, 5.0};
+      std::vector<double> ys {2.0, 4.0, 6.0};
+      OverlappingSegments<std::vector<double> > actual(xs, ys);
+      std::vector<std::pair<long, long> > expected {
+        {0, -1}, {0, 0}, {1, 0}, {1, 1}, {2, 1}, {2, 2}};
+      runIntervalOverlapTest(actual, expected);
+    }
 
+    {
+      std::vector<double> xs {1.0, 3.0};
+      std::vector<double> ys {4.0, 6.0, 8.0};
+      OverlappingSegments<std::vector<double> > actual(xs, ys);
+      std::vector<std::pair<long, long> > expected {
+        {0, -1}, {1, -1}, {1, 0}, {1, 1}, {1, 2}};
+      runIntervalOverlapTest(actual, expected);
+    }
+
+    {
+      std::vector<double> xs {5.0, 5.5};
+      std::vector<double> ys {4.0, 6.0, 8.0};
+      OverlappingSegments<std::vector<double> > actual(xs, ys);
+      std::vector<std::pair<long, long> > expected {
+        {-1, 0}, {0, 0}, {1, 0}, {1, 1}, {1, 2}};
+      runIntervalOverlapTest(actual, expected);
+    }
+
+    {
+      std::vector<double> xs {1.0, 2.0};
+      std::vector<double> ys {1.0, 3.0};
+      OverlappingSegments<std::vector<double> > actual(xs, ys);
+      std::vector<std::pair<long, long> > expected {
+        {0, 0}, {1, 0}, {1, 1}};
+      runIntervalOverlapTest(actual, expected);
+    }
+
+    {
+      std::vector<double> xs {1.0, 3.0};
+      std::vector<double> ys {1.0, 2.0};
+      OverlappingSegments<std::vector<double> > actual(xs, ys);
+      std::vector<std::pair<long, long> > expected {
+        {0, 0}, {0, 1}, {1, 1}};
+      runIntervalOverlapTest(actual, expected);
+    }
+
+    {
+      std::vector<double> xs {1.0, 2.0};
+      std::vector<double> ys {1.5, 2.0, 3.0};
+      OverlappingSegments<std::vector<double> > actual(xs, ys);
+      std::vector<std::pair<long, long> > expected {
+        {0, -1}, {0, 0}, {1, 1}, {1, 2}};
+      runIntervalOverlapTest(actual, expected);
+    }
+
+    {
+      std::vector<double> xs {1.0, 2.0};
+      std::vector<double> ys {1.0, 2.0};
+      OverlappingSegments<std::vector<double> > actual(xs, ys);
+      std::vector<std::pair<long, long> > expected {
+        {0, 0}, {1, 1}};
+      runIntervalOverlapTest(actual, expected);
+    }
+
+    {
+      std::vector<double> xs {1.0, 2.0};
+      std::vector<double> ys {1.5, 2.0};
+      OverlappingSegments<std::vector<double> > actual(xs, ys);
+      std::vector<std::pair<long, long> > expected {
+        {0, -1}, {0, 0}, {1, 1}};
+      runIntervalOverlapTest(actual, expected);
+    }
+  }
+}

From 77b9338be707b1e3e436384825c06b92a143fc26 Mon Sep 17 00:00:00 2001
From: Oliver Strickson <ostrickson@turing.ac.uk>
Date: Wed, 22 Apr 2020 21:04:55 +0100
Subject: [PATCH 44/84] Move overlapping segment iterator into fastSmoothV2.h

---
 src/fastSmoothV2.h         | 201 ++++++++++++++++++++++++++++++++++++
 src/interleaved_iterator.h | 202 -------------------------------------
 2 files changed, 201 insertions(+), 202 deletions(-)
 delete mode 100644 src/interleaved_iterator.h

diff --git a/src/fastSmoothV2.h b/src/fastSmoothV2.h
index ac3b1b11..130f8b52 100644
--- a/src/fastSmoothV2.h
+++ b/src/fastSmoothV2.h
@@ -1,6 +1,18 @@
 // Enable C++11
 // [[Rcpp::plugins(cpp11)]]
+
+#ifndef FASTSMOOTHV2_H
+#define FASTSMOOTHV2_H
+
 #include <Rcpp.h>
+#include <cstddef>
+#include <cassert>
+#include <utility>
+#include <iterator>
+// #include <vector>
+// #include <iostream>
+#include <stdexcept>
+#include <limits>
 #include <unordered_set>
 #include <algorithm>
 #include <numeric>
@@ -8,6 +20,193 @@
 
 using namespace Rcpp;
 
+class OverlappingSegments {
+  // references the two sequences
+  const NumericVector& loc1;
+  const NumericVector& loc2;
+  const double binWidth1, binWidth2;
+
+public:
+  OverlappingSegments(NumericVector& loc1_, NumericVector& loc2_,
+                      double binWidth1_ = 1.0, double binWidth2_ = 1.0)
+    : loc1(loc1_), loc2(loc2_), binWidth1(binWidth1_), binWidth2(binWidth2_)
+  {
+    // check the requirement that loc1 and loc2 are nonempty and
+    // (strictly) sorted
+
+    if (loc1.size() == 0 || loc2.size() == 0)
+      throw std::invalid_argument("Input vectors must be nonempty");
+    
+    for (int i = 0; i < loc1.size() - 1; i++)
+      if (loc1[i] > loc1[i + 1])
+        throw std::invalid_argument("Input vectors must be sorted in strict ascending order");
+
+    for (int i = 0; i < loc2.size() - 1; i++)
+      if (loc2[i] > loc2[i + 1])
+        throw std::invalid_argument("Input vectors must be sorted in strict ascending order");
+  }
+
+  class iterator {
+    const OverlappingSegments * const segs;
+
+    long Nx, Ny;
+
+    // the minimum and maximum contained values;
+    double minloc, maxloc;
+
+    // the current iteration state
+    std::pair<long, long> idx;
+
+  public:
+    typedef std::pair<long, long> value_type;
+    typedef void difference_type;
+    typedef value_type* pointer;
+    typedef value_type& reference;
+    typedef std::input_iterator_tag iterator_category;
+
+    // Iterate over pairs of indices (i,j) into the sequences loc1 and
+    // loc2, where the intervals [loc1[i], loc1[i+1]] and [loc2[j],
+    // loc2[j+1]] overlap.
+    //
+    // These indices are returned from the iterator as
+    //   std::pair<long, long>.
+    //
+    // A sequence has an implicit segment from minloc (with index -1)
+    // to its zeroth element.  The elements loc1[0] and loc2[0] are
+    // compared to determine whether, for either sequence, this
+    // initial implicit segment overlaps the zeroth segment of the
+    // other one.  If both sequences start with the same value, the
+    // iteration starts at (0,0).
+    //
+    explicit iterator(OverlappingSegments *segs_)
+      : segs(segs_), Nx(segs_->loc1.size()), Ny(segs_->loc2.size())
+    {
+      // The initial state of the iterator
+      
+      if (segs->loc1[0] < segs->loc2[0]) {
+        minloc = segs->loc1[0];
+        idx.first = 0;
+        idx.second = -1;
+      }
+      else if (segs->loc1[0] == segs->loc2[0]) {
+        minloc = segs->loc1[0]; // == segs->loc2[0]
+        idx.first = 0;
+        idx.second = 0;
+      }
+      else {
+        minloc = segs->loc2[0];
+        idx.first = -1;
+        idx.second = 0;
+      }
+
+      // maxloc is used to signal the end of the current segment:
+      // since we consider only left-hand endpoints, the only
+      // requirement is that it compares greater than any real loc in
+      // either sequence - in reality it will be the largest loc plus
+      // the corresponding bin width
+      // maxloc = std::numeric_limits<double>::infinity();
+      maxloc = std::max(segs->loc1[segs->loc1.size() - 1] + segs->binWidth1,
+                        segs->loc2[segs->loc2.size() - 1] + segs->binWidth2);
+    }
+
+    double get_loc1(long i) const {
+      if (i < 0) return minloc;
+      else if (i >= Nx) return maxloc;
+      else return segs->loc1[i];
+    }
+
+    double get_loc2(long i) const {
+      if (i < 0) return minloc;
+      else if (i >= Ny) return maxloc;
+      else return segs->loc2[i];
+    }
+
+    // Does interval i (from the first collection of segments) overlap
+    // interval j (from the second)?
+    bool intervals_overlap(long i, long j) const {
+      return (get_loc1(i) < get_loc2(j + 1) && get_loc2(j) < get_loc1(i + 1));
+    }
+
+    bool at_end() const { return idx.first == Nx && idx.second == Ny - 1; }
+
+    iterator& advance_to_end() {
+      idx.first = Nx;
+      idx.second = Ny - 1;
+      return *this;
+    }
+    
+    value_type operator*() const { return idx; }
+
+    const value_type *operator->() const { return &idx; }
+
+    iterator& operator++() {
+
+#if !NDEBUG
+      // Verify precondition
+      if (!intervals_overlap(idx.first, idx.second)) {
+        throw std::logic_error("Iterator precondition not satisfied: "
+                               "current intervals do not overlap");
+      }
+#endif
+
+      // Advance the second segment if it would still overlap the first
+      // 
+      // The condition below is equivalent to
+      //   idx.second < Ny - 1 && intervals_overlap(idx.first, idx.second + 1)
+      //
+      // For the right-hand condition, we know that (by precondition)
+      //   get_loc1(idx.first) < get_loc2(idx.second + 1)
+      //
+      // and therefore that
+      //   get_loc1(idx.first) < get_loc2(idx.second + 2),
+      //
+      // so this inequality is all that remains to be checked.
+      //
+      if (idx.second < Ny - 1
+          && get_loc2(idx.second + 1) < get_loc1(idx.first + 1)) {
+        idx.second++;
+      }
+      // Could not advance the second segment above: advance the first instead,
+      // and the second as well if they share an endpoint
+      else {
+        if (idx.second < Ny - 1
+            && get_loc1(idx.first + 1) == get_loc2(idx.second + 1)) {
+          idx.second++;
+        }
+        idx.first++;
+      }
+
+#if !NDEBUG
+      // Verify postcondition
+      if (!(at_end() || intervals_overlap(idx.first, idx.second))) {
+        throw std::logic_error("Iterator postcondition not satisfied: "
+                               "current intervals do not overlap (not at end)");
+      }
+#endif
+
+      return *this;
+    }
+
+    iterator operator++(int) {
+      iterator res = *this;
+      operator++();
+      return res;
+    }
+
+    friend bool operator==(const iterator& lhs, const iterator& rhs) {
+      return lhs.idx == rhs.idx;
+    }
+
+    friend bool operator!=(const iterator& lhs, const iterator& rhs) {
+      return !(lhs == rhs);
+    }
+  };
+
+  iterator begin() { return iterator(this); };
+  iterator end() { return iterator(this).advance_to_end(); };
+};
+
+
 double bowtie_area(double length, double val1_start, double val1_end,
                           double val2_start, double val2_end);
 double get_segment(double start, double end, double val1_start,
@@ -32,3 +231,5 @@ inline double leftmost_segments(const NumericVector& loc1,
 
 double NetEmdSmoothV2(NumericVector loc1, NumericVector val1, double binWidth1,
                       NumericVector loc2, NumericVector val2, double binWidth2);
+
+#endif // FASTSMOOTHV2_H
diff --git a/src/interleaved_iterator.h b/src/interleaved_iterator.h
deleted file mode 100644
index 7451a9fb..00000000
--- a/src/interleaved_iterator.h
+++ /dev/null
@@ -1,202 +0,0 @@
-#ifndef OVERLAPPING_SEGMENTS_H
-#define OVERLAPPING_SEGMENTS_H
-
-#include <cstddef>
-#include <cassert>
-#include <utility>
-#include <iterator>
-#include <vector>
-#include <iostream>
-#include <stdexcept>
-#include <limits>
-
-// Class used to support iteration (with OverlappingSegments::iterator)
-// over pairs of overlapping segments within two sequences.
-//
-template <typename Container>
-class OverlappingSegments {
-  // short alias for the value type of the container
-  typedef typename std::remove_reference<typename Container::value_type>::type CvalT;
-  typedef long int IndexT;
-
-  // references the two sequences
-  const Container& loc1;
-  const Container& loc2;
-
-public:
-  OverlappingSegments(Container& loc1_, Container& loc2_)
-    : loc1(loc1_), loc2(loc2_) {
-    // check the requirement that loc1 and loc2 are nonempty and
-    // (strictly) sorted
-
-    if (loc1.size() == 0 || loc2.size() == 0)
-      throw std::invalid_argument("Input vectors must be nonempty");
-    
-    for (int i = 0; i < loc1.size() - 1; i++)
-      if (loc1[i] > loc1[i + 1])
-        throw std::invalid_argument("Input vectors must be sorted in strict ascending order");
-
-    for (int i = 0; i < loc2.size() - 1; i++)
-      if (loc2[i] > loc2[i + 1])
-        throw std::invalid_argument("Input vectors must be sorted in strict ascending order");
-  }
-
-  class iterator {
-    const OverlappingSegments * const segs;
-
-    IndexT Nx, Ny;
-
-    // the minimum and maximum contained values;
-    CvalT minloc, maxloc;
-
-    // the current iteration state
-    std::pair<IndexT, IndexT> idx;
-
-  public:
-    typedef std::pair<IndexT, IndexT> value_type;
-    typedef void difference_type;
-    typedef value_type* pointer;
-    typedef value_type& reference;
-    typedef std::input_iterator_tag iterator_category;
-
-    // Iterate over pairs of indices (i,j) into the sequences loc1 and
-    // loc2, where the intervals [loc1[i], loc1[i+1]] and [loc2[j],
-    // loc2[j+1]] overlap.
-    //
-    // These indices are returned from the iterator as
-    //   std::pair<long, long>.
-    //
-    // A sequence has an implicit segment from minloc (with index -1)
-    // to its zeroth element.  The elements loc1[0] and loc2[0] are
-    // compared to determine whether, for either sequence, this
-    // initial implicit segment overlaps the zeroth segment of the
-    // other one.  If both sequences start with the same value, the
-    // iteration starts at (0,0).
-    //
-    explicit iterator(OverlappingSegments *segs_)
-      : segs(segs_), Nx(segs_->loc1.size()), Ny(segs_->loc2.size())
-    {
-      // The initial state of the iterator
-      
-      if (segs->loc1[0] < segs->loc2[0]) {
-        minloc = segs->loc1[0];
-        idx.first = 0;
-        idx.second = -1;
-      }
-      else if (segs->loc1[0] == segs->loc2[0]) {
-        minloc = segs->loc1[0]; // == segs->loc2[0]
-        idx.first = 0;
-        idx.second = 0;
-      }
-      else {
-        minloc = segs->loc2[0];
-        idx.first = -1;
-        idx.second = 0;
-      }
-
-      // maxloc is used to signal the end of the current segment:
-      // since we consider only left-hand endpoints, the only
-      // requirement is that it compares greater than any real loc in
-      // either sequence - in reality it will be the largest loc plus
-      // the corresponding binwidth
-      maxloc = std::numeric_limits<double>::infinity();
-    }
-
-    CvalT get_loc1(IndexT i) const {
-      if (i < 0) return minloc;
-      else if (i >= Nx) return maxloc;
-      else return segs->loc1[i];
-    }
-
-    CvalT get_loc2(IndexT i) const {
-      if (i < 0) return minloc;
-      else if (i >= Ny) return maxloc;
-      else return segs->loc2[i];
-    }
-
-    // Does interval i (from the first collection of segments) overlap
-    // interval j (from the second)?
-    bool intervals_overlap(IndexT i, IndexT j) const {
-      return (get_loc1(i) < get_loc2(j + 1) && get_loc2(j) < get_loc1(i + 1));
-    }
-
-    bool at_end() const { return idx.first == Nx && idx.second == Ny - 1; }
-
-    iterator& advance_to_end() {
-      idx.first = Nx;
-      idx.second = Ny - 1;
-      return *this;
-    }
-    
-    value_type operator*() const { return idx; }
-
-    const value_type *operator->() const { return &idx; }
-
-    iterator& operator++() {
-
-#if !NDEBUG
-      // Verify precondition
-      if (!intervals_overlap(idx.first, idx.second)) {
-        throw std::logic_error("Iterator precondition not satisfied: "
-                               "current intervals do not overlap");
-      }
-#endif
-
-      // Advance the second segment if it would still overlap the first
-      // 
-      // The condition below is equivalent to
-      //   idx.second < Ny - 1 && intervals_overlap(idx.first, idx.second + 1)
-      //
-      // For the right-hand condition, we know that (by precondition)
-      //   get_loc1(idx.first) < get_loc2(idx.second + 1)
-      //
-      // and therefore that
-      //   get_loc1(idx.first) < get_loc2(idx.second + 2),
-      //
-      // so this inequality is all that remains to be checked.
-      //
-      if (idx.second < Ny - 1
-          && get_loc2(idx.second + 1) < get_loc1(idx.first + 1)) {
-        idx.second++;
-      }
-      // Could not advance the second segment above: advance the first instead,
-      // and the second as well if they share an endpoint
-      else {
-        if (idx.second < Ny - 1
-            && get_loc1(idx.first + 1) == get_loc2(idx.second + 1)) {
-          idx.second++;
-        }
-        idx.first++;
-      }
-
-#if !NDEBUG
-      // Verify postcondition
-      if (!(at_end() || intervals_overlap(idx.first, idx.second))) {
-        throw std::logic_error("Iterator postcondition not satisfied: "
-                               "current intervals do not overlap (not at end)");
-      }
-#endif
-
-      return *this;
-    }
-
-    iterator& operator++(int) {
-      iterator res = *this;
-      operator++();
-      return res;
-    }
-
-    friend bool operator==(const iterator& lhs, const iterator& rhs) {
-      return lhs.idx == rhs.idx;
-    }
-
-    friend bool operator!=(const iterator& lhs, const iterator& rhs) {
-      return !(lhs == rhs);
-    }
-  };
-
-  iterator begin() { return iterator(this); };
-  iterator end() { return iterator(this).advance_to_end(); };
-};
-
-#endif // OVERLAPPING_SEGMENTS_H

From 72de9eafb26705389fcfaff450cf0aae3b3c279b Mon Sep 17 00:00:00 2001
From: Oliver Strickson <ostrickson@turing.ac.uk>
Date: Wed, 22 Apr 2020 21:05:43 +0100
Subject: [PATCH 45/84] Use NumericVectors in tests

---
 src/test_emd_fast_smooth.cpp | 49 ++++++++++++++++++------------------
 1 file changed, 24 insertions(+), 25 deletions(-)

diff --git a/src/test_emd_fast_smooth.cpp b/src/test_emd_fast_smooth.cpp
index ac0a65b7..8d9f453a 100644
--- a/src/test_emd_fast_smooth.cpp
+++ b/src/test_emd_fast_smooth.cpp
@@ -12,7 +12,6 @@
 // header file.
 #include "fastSmoothV2.h"
 #include "emd_fast_no_smoothing.h"
-#include "interleaved_iterator.h"
 
 #include <math.h>
 #include <vector>
@@ -137,72 +136,72 @@ void runIntervalOverlapTest(Container1T& actual, Container2T& expected)
 context("emd_fast_smooth overlapping interval iterator") {
   test_that("emd_fast_smooth overlapping interval iterator") {
     {
-      std::vector<double> xs {1.0, 3.0, 5.0};
-      std::vector<double> ys {2.0, 4.0, 6.0};
-      OverlappingSegments<std::vector<double> > actual(xs, ys);
+      NumericVector xs {1.0, 3.0, 5.0};
+      NumericVector ys {2.0, 4.0, 6.0};
+      OverlappingSegments actual(xs, ys);
       std::vector<std::pair<long, long> > expected {
         {0, -1}, {0, 0}, {1, 0}, {1, 1}, {2, 1}, {2, 2}};
       runIntervalOverlapTest(actual, expected);
     }
 
     {
-      std::vector<double> xs {1.0, 3.0};
-      std::vector<double> ys {4.0, 6.0, 8.0};
-      OverlappingSegments<std::vector<double> > actual(xs, ys);
+      NumericVector xs {1.0, 3.0};
+      NumericVector ys {4.0, 6.0, 8.0};
+      OverlappingSegments actual(xs, ys);
       std::vector<std::pair<long, long> > expected {
         {0, -1}, {1, -1}, {1, 0}, {1, 1}, {1, 2}};
       runIntervalOverlapTest(actual, expected);
     }
 
     {
-      std::vector<double> xs {5.0, 5.5};
-      std::vector<double> ys {4.0, 6.0, 8.0};
-      OverlappingSegments<std::vector<double> > actual(xs, ys);
+      NumericVector xs {5.0, 5.5};
+      NumericVector ys {4.0, 6.0, 8.0};
+      OverlappingSegments actual(xs, ys);
       std::vector<std::pair<long, long> > expected {
         {-1, 0}, {0, 0}, {1, 0}, {1, 1}, {1, 2}};
       runIntervalOverlapTest(actual, expected);
     }
 
     {
-      std::vector<double> xs {1.0, 2.0};
-      std::vector<double> ys {1.0, 3.0};
-      OverlappingSegments<std::vector<double> > actual(xs, ys);
+      NumericVector xs {1.0, 2.0};
+      NumericVector ys {1.0, 3.0};
+      OverlappingSegments actual(xs, ys);
       std::vector<std::pair<long, long> > expected {
         {0, 0}, {1, 0}, {1, 1}};
       runIntervalOverlapTest(actual, expected);
     }
 
     {
-      std::vector<double> xs {1.0, 3.0};
-      std::vector<double> ys {1.0, 2.0};
-      OverlappingSegments<std::vector<double> > actual(xs, ys);
+      NumericVector xs {1.0, 3.0};
+      NumericVector ys {1.0, 2.0};
+      OverlappingSegments actual(xs, ys);
       std::vector<std::pair<long, long> > expected {
         {0, 0}, {0, 1}, {1, 1}};
       runIntervalOverlapTest(actual, expected);
     }
 
     {
-      std::vector<double> xs {1.0, 2.0};
-      std::vector<double> ys {1.5, 2.0, 3.0};
-      OverlappingSegments<std::vector<double> > actual(xs, ys);
+      NumericVector xs {1.0, 2.0};
+      NumericVector ys {1.5, 2.0, 3.0};
+      OverlappingSegments actual(xs, ys);
       std::vector<std::pair<long, long> > expected {
         {0, -1}, {0, 0}, {1, 1}, {1, 2}};
       runIntervalOverlapTest(actual, expected);
     }
 
     {
-      std::vector<double> xs {1.0, 2.0};
-      std::vector<double> ys {1.0, 2.0};
-      OverlappingSegments<std::vector<double> > actual(xs, ys);
+      NumericVector xs {1.0, 2.0};
+      NumericVector ys {1.0, 2.0};
+      OverlappingSegments actual(xs, ys);
       std::vector<std::pair<long, long> > expected {
         {0, 0}, {1, 1}};
       runIntervalOverlapTest(actual, expected);
     }
 
     {
-      std::vector<double> xs {1.0, 2.0};
-      std::vector<double> ys {1.5, 2.0};
-      OverlappingSegments<std::vector<double> > actual(xs, ys);
+      NumericVector xs {1.0, 2.0};
+      NumericVector ys {1.5, 2.0};
+      OverlappingSegments actual(xs, ys);
       std::vector<std::pair<long, long> > expected {
         {0, -1}, {0, 0}, {1, 1}};
       runIntervalOverlapTest(actual, expected);

From 8e12200baef07c3955fe6deb1e920ecc24ecabda Mon Sep 17 00:00:00 2001
From: Oliver Strickson <ostrickson@turing.ac.uk>
Date: Wed, 22 Apr 2020 21:07:04 +0100
Subject: [PATCH 46/84] Use overlapping interval iterator in NetEmdSmoothV2

---
 src/fastSmoothV2.cpp | 228 ++++++++++++++++++++++++++++---------------
 1 file changed, 147 insertions(+), 81 deletions(-)

diff --git a/src/fastSmoothV2.cpp b/src/fastSmoothV2.cpp
index 1b544a2d..90f02074 100644
--- a/src/fastSmoothV2.cpp
+++ b/src/fastSmoothV2.cpp
@@ -5,6 +5,7 @@
 #include <algorithm>
 #include <numeric>
 #include <math.h>
+#include "fastSmoothV2.h"
 
 using namespace Rcpp;
 
@@ -168,6 +169,10 @@ double leftmost_segments(const NumericVector& loc1,
       curSeg1Loc1, curSeg1Loc2, curSeg1Loc3, curSeg1Val1, curSeg1Val2,
       curSeg2Loc1, curSeg2Loc2, curSeg2Loc3, curSeg2Val1, curSeg2Val2);
 
+    std::cerr << "%% " << index << " : " << curSeg1Loc1 << " " << curSeg1Loc2 << " " << curSeg1Loc3 << " "
+              << curSeg1Val1 << " " << curSeg1Val2 << " " << curSeg2Loc1 << " " << curSeg2Loc2
+              << " " << curSeg2Loc3 << " " << curSeg2Val1 << " " << curSeg2Val2 << std::endl;
+    
     if (curSeg2Loc1 > curSeg1Loc3) {
       break;
     }
@@ -190,99 +195,160 @@ double leftmost_segments(const NumericVector& loc1,
 double NetEmdSmoothV2(NumericVector loc1, NumericVector val1, double binWidth1,
                       NumericVector loc2, NumericVector val2, double binWidth2)
 {
-  double res = 0;
+  // double res = 0;
 
-  // Hist 1
-  double curSeg1Loc1; // Start of the gradient section in Seg1
-  double curSeg1Loc2; // End of the gradient section in Seg1
-  double curSeg1Loc3; // End of the flat section in Seg1
-  double curSeg1Val1; // Start value in Seg1
-  double curSeg1Val2; // End value in Seg1
+  // // Hist 1
+  // double curSeg1Loc1; // Start of the gradient section in Seg1
+  // double curSeg1Loc2; // End of the gradient section in Seg1
+  // double curSeg1Loc3; // End of the flat section in Seg1
+  // double curSeg1Val1; // Start value in Seg1
+  // double curSeg1Val2; // End value in Seg1
 
-  // Hist 2
-  double curSeg2Loc1; // Start of the gradient section in Seg2
-  double curSeg2Loc2; // End of the gradient section in Seg2
-  double curSeg2Loc3; // End of the flat section in Seg2
-  double curSeg2Val1; // Start value in Seg2
-  double curSeg2Val2; // End value in Seg2
+  // // Hist 2
+  // double curSeg2Loc1; // Start of the gradient section in Seg2
+  // double curSeg2Loc2; // End of the gradient section in Seg2
+  // double curSeg2Loc3; // End of the flat section in Seg2
+  // double curSeg2Val1; // Start value in Seg2
+  // double curSeg2Val2; // End value in Seg2
 
 
-  // Starting index for the second histogram
-  double secondStart = 0;
+  // // Starting index for the second histogram
+  // double secondStart = 0;
 
-  // Smallest and largest location values
-  double maxLoc = std::max(loc1[loc1.size()-1] + binWidth1,
-                           loc2[loc2.size()-1] + binWidth2);
+  // // Smallest and largest location values
+  // double maxLoc = std::max(loc1[loc1.size()-1] + binWidth1,
+  //                          loc2[loc2.size()-1] + binWidth2);
 
 
-  // warning area before loc2[0] is not well tested
-  // As values outside of the range appear to be zero
+  // // warning area before loc2[0] is not well tested
+  // // As values outside of the range appear to be zero
 
-  if (loc2[0] < loc1[0]) {
-    res += leftmost_segments(loc2, loc1, val2, val1, binWidth2, maxLoc);
-  }
-  else
+  // std::cerr << "===============\n";
+  
+  // if (loc2[0] < loc1[0]) {
+  //   res += leftmost_segments(loc2, loc1, val2, val1, binWidth2, maxLoc);
+  // }
+  // else
+  // {
+  //   res += leftmost_segments(loc1, loc2, val1, val2, binWidth1, maxLoc);
+  // }
+
+  // // Add both the  overlapping sections and the non overlapping section on the right
+  // // Note we reiterate over the first few sections loc1
+  // // Could store where we are upto from above to save time
+  // // Reset Val counter
+  // curSeg1Val2 = 0;
+  // for (int index1 = 0; index1 < loc1.size(); index1++) {
+  //   //  Get the three relevant locations
+  //   // Start; end of linear section; end of flat section
+  //   curSeg1Loc1 = loc1[index1];
+  //   curSeg1Loc2 = loc1[index1] + binWidth1;
+  //   // could pull this check outside of the loop with final case not sure if worth it
+  //   if (index1 == loc1.size() - 1) {
+  //     curSeg1Loc3 = maxLoc;
+  //   }
+  //   else {
+  //     curSeg1Loc3 = loc1[index1 + 1];
+  //   }
+  //   // Update value to the start and end of the current section
+  //   curSeg1Val1 = curSeg1Val2;
+  //   curSeg1Val2 = val1[index1];
+  //   // Setting up the previous value for the next loop
+  //   // Could replace this loop with a while
+  //   // but if so would need to be careful about overlaps
+  //   if (secondStart == 0) {
+  //     curSeg2Val2 = 0;
+  //   }
+  //   else {
+  //     curSeg2Val2 = val2[secondStart - 1];
+  //   }
+  //   for (int index2 = secondStart; index2 < loc2.size(); index2++) {
+  //     // Construct 3 sections for second seg.
+  //     curSeg2Loc1 = loc2[index2];
+  //     curSeg2Loc2 = loc2[index2] + binWidth2;
+  //     if (index2 == loc2.size() - 1) {
+  //       curSeg2Loc3 = maxLoc;
+  //     }
+  //     else {
+  //       curSeg2Loc3 = loc2[index2 + 1];
+  //     }
+  //     //update values
+  //     curSeg2Val1 = curSeg2Val2;
+  //     curSeg2Val2 = val2[index2];
+  //     // If this section is behind Seg1
+  //     // Do not consider again
+  //     if (curSeg2Loc3 < curSeg1Loc1) {
+  //       secondStart = index2 + 1;
+  //       continue;
+  //     }
+  //     // If current Seg2 is beyond Seg1 break out of loop
+  //     res += get_double_segment_constrained(
+  //       curSeg1Loc1, curSeg1Loc2, curSeg1Loc3, curSeg1Val1, curSeg1Val2,
+  //       curSeg2Loc1, curSeg2Loc2, curSeg2Loc3, curSeg2Val1, curSeg2Val2);
+
+  //     std::cerr << "%% " << index1 << " " << index2 << " : " << curSeg1Loc1 << " " << curSeg1Loc2 << " " << curSeg1Loc3 << " "
+  //               << curSeg1Val1 << " " << curSeg1Val2 << " " << curSeg2Loc1 << " " << curSeg2Loc2
+  //               << " " << curSeg2Loc3 << " " << curSeg2Val1 << " " << curSeg2Val2 << std::endl;
+      
+  //     if (curSeg2Loc3 > curSeg1Loc3) {
+  //       break;
+  //     }
+  //   }
+  // }
+
+  // std::cerr << "---------------\n";
+ 
   {
-    res += leftmost_segments(loc1, loc2, val1, val2, binWidth1, maxLoc);
-  }
+    OverlappingSegments segs(loc1, loc2, binWidth1, binWidth2);
+
+    double res = 0.0;
+  
+    for (OverlappingSegments::iterator it = segs.begin(), endIt = segs.end();
+         it != endIt; ++it)
+    {
+      // Hist 1
+      // Start of the gradient section in Seg1
+      double curSeg1Loc1 = it.get_loc1(it->first);
+
+      // End of the gradient section in Seg1
+      double curSeg1Loc2 = (it->first < 0) ? curSeg1Loc1 : (curSeg1Loc1 + binWidth1);
+
+      // End of the flat section in Seg1
+      double curSeg1Loc3 = it.get_loc1(it->first + 1);
+
+      // Start value in Seg1
+      double curSeg1Val1 = (it->first <= 0) ? 0.0 : val1[it->first - 1];
+
+      // End value in Seg1
+      double curSeg1Val2 = (it->first < 0) ? 0.0 : val1[it->first];
+
+      // Hist 2
+      // Start of the gradient section in Seg2
+      double curSeg2Loc1 = it.get_loc2(it->second);
+
+      // End of the gradient section in Seg2
+      double curSeg2Loc2 = (it->second < 0) ? curSeg2Loc1 : (curSeg2Loc1 + binWidth2);
+
+      // End of the flat section in Seg2
+      double curSeg2Loc3 = it.get_loc2(it->second + 1); 
+
+      // Start value in Seg2
+      double curSeg2Val1 = (it->second <= 0) ? 0.0 : val2[it->second - 1];
+
+      // End value in Seg2
+      double curSeg2Val2 = (it->second < 0) ? 0.0 : val2[it->second];
+
+      // std::cerr << "%% " << it->first << " " << it->second << " : " << curSeg1Loc1 << " " << curSeg1Loc2 << " " << curSeg1Loc3 << " "
+      //           << curSeg1Val1 << " " << curSeg1Val2 << " " << curSeg2Loc1 << " " << curSeg2Loc2
+      //           << " " << curSeg2Loc3 << " " << curSeg2Val1 << " " << curSeg2Val2 << std::endl;
 
-  // Add both the  overlapping sections and the non overlapping section on the right
-  // Note we reiterate over the first few sections loc1
-  // Could store where we are upto from above to save time
-  // Reset Val counter
-  curSeg1Val2 = 0;
-  for (int index1 = 0; index1 < loc1.size(); index1++) {
-    //  Get the three relevant locations
-    // Start; end of linear section; end of flat section
-    curSeg1Loc1 = loc1[index1];
-    curSeg1Loc2 = loc1[index1] + binWidth1;
-    // could pull this check outside of the loop with final case not sure if worth it
-    if (index1 == loc1.size() - 1) {
-      curSeg1Loc3 = maxLoc;
-    }
-    else {
-      curSeg1Loc3 = loc1[index1 + 1];
-    }
-    // Update value to the start and end of the current section
-    curSeg1Val1 = curSeg1Val2;
-    curSeg1Val2 = val1[index1];
-    // Setting up the previous value for the next loop
-    // Could replace this loop with a while
-    // but if so would need to be careful about overlaps
-    if (secondStart == 0) {
-      curSeg2Val2 = 0;
-    }
-    else {
-      curSeg2Val2 = val2[secondStart - 1];
-    }
-    for (int index2 = secondStart; index2 < loc2.size(); index2++) {
-      // Construct 3 sections for second seg.
-      curSeg2Loc1 = loc2[index2];
-      curSeg2Loc2 = loc2[index2] + binWidth2;
-      if (index2 == loc2.size() - 1) {
-        curSeg2Loc3 = maxLoc;
-      }
-      else {
-        curSeg2Loc3 = loc2[index2 + 1];
-      }
-      //update values
-      curSeg2Val1 = curSeg2Val2;
-      curSeg2Val2 = val2[index2];
-      // If this section is behind Seg1
-      // Do not consider again
-      if (curSeg2Loc3 < curSeg1Loc1) {
-        secondStart = index2 + 1;
-        continue;
-      }
-      // If current Seg2 is beyond Seg1 break out of loop
       res += get_double_segment_constrained(
         curSeg1Loc1, curSeg1Loc2, curSeg1Loc3, curSeg1Val1, curSeg1Val2,
-        curSeg2Loc1, curSeg2Loc2, curSeg2Loc3, curSeg2Val1, curSeg2Val2);
-
-      if (curSeg2Loc3 > curSeg1Loc3) {
-        break;
-      }
+        curSeg2Loc1, curSeg2Loc2, curSeg2Loc3, curSeg2Val1, curSeg2Val2);    
     }
+
+    return res;
   }
-  return res;
+
+  // std::cerr << "===============\n";
 }

From 857c91ce2b459467b850716fd00b09c7a66d1b6b Mon Sep 17 00:00:00 2001
From: Oliver Strickson <ostrickson@turing.ac.uk>
Date: Thu, 23 Apr 2020 11:04:43 +0100
Subject: [PATCH 47/84] Small refactor of OverlappingSegments, and use for
 NetEmdSmoothV2

---
 src/fastSmoothV2.cpp | 289 ++++++++++---------------------------------
 src/fastSmoothV2.h   | 189 ++++++++++++++--------------
 2 files changed, 157 insertions(+), 321 deletions(-)

diff --git a/src/fastSmoothV2.cpp b/src/fastSmoothV2.cpp
index 90f02074..0c2ffb5c 100644
--- a/src/fastSmoothV2.cpp
+++ b/src/fastSmoothV2.cpp
@@ -10,21 +10,21 @@
 using namespace Rcpp;
 
 double bowtie_area(double length, double val1_start, double val1_end,
-                          double val2_start, double val2_end)
+                   double val2_start, double val2_end)
 {
   double midPoint = (val1_start - val2_start) /
     ((val2_end - val2_start) - (val1_end - val1_start));
 
   const double midValue = val1_start + midPoint * (val1_end - val1_start);
-    
+
   midPoint = midPoint * length;
-    
+
   double topTriangle = 0.5 * midPoint * (midValue - val1_start);
   double topRectangle = midPoint * (val1_start - val2_start);
   double bottomTriangle = 0.5 * midPoint * (midValue - val2_start);
-    
+
   double res = topTriangle + topRectangle - bottomTriangle;
-    
+
   topTriangle = 0.5 * (length - midPoint) * (val2_end - midValue);
   topRectangle = 0; // midPoint*(val1_start-val2_start);
   bottomTriangle = 0.5 * (length - midPoint) * (val1_end - midValue);
@@ -36,7 +36,7 @@ double bowtie_area(double length, double val1_start, double val1_end,
 // Compute the unsigned area between two line segments
 // assumes that val1_end > val1_start and val2_end > val2_start
 double get_segment(double start, double end, double val1_start,
-                          double val1_end, double val2_start, double val2_end)
+                   double val1_end, double val2_start, double val2_end)
 {
   const double length = end - start;
 
@@ -49,7 +49,7 @@ double get_segment(double start, double end, double val1_start,
 
   bool both_differences_positive = val1_start > val2_start && val1_end >= val2_end;
   bool both_differences_negative = val1_start <= val2_start && val1_end <= val2_end;
-  
+
   if (both_differences_positive || both_differences_negative)
   {
     // They are in the same order: no bowtie
@@ -73,9 +73,9 @@ double get_segment(double start, double end, double val1_start,
 
 // cut down and compute segment
 double get_segment_constrained(double seg1L1, double seg1L2,
-                                      double seg2L1, double seg2L2,
-                                      double seg1V1, double seg1V2,
-                                      double seg2V1, double seg2V2)
+                               double seg2L1, double seg2L2,
+                               double seg1V1, double seg1V2,
+                               double seg2V1, double seg2V2)
 {
   //We have a valid range
   double valStart1, valEnd1, valStart2, valEnd2;
@@ -102,86 +102,30 @@ double get_double_segment_constrained(
   double seg2Loc1, double seg2Loc2, double seg2Loc3,
   double seg2Val1, double seg2Val2)
 {
-    double res = 0;
-
-    // compare the linear section with the linear section
-    res += get_segment_constrained(seg1Loc1, seg1Loc2, seg2Loc1, seg2Loc2,
-                                   seg1Val1, seg1Val2, seg2Val1, seg2Val2);
-
-    // compare the linear section with the flat section
-    // This could be easily special cased (saving ~1 if statements ).
-    res += get_segment_constrained(seg1Loc1, seg1Loc2, seg2Loc2, seg2Loc3,
-                                   seg1Val1, seg1Val2, seg2Val2, seg2Val2);
-
-    // compare the flat section with the linear section
-    // This could be easily special cased (saving ~1 if statements ).
-    res += get_segment_constrained(seg1Loc2, seg1Loc3, seg2Loc1, seg2Loc2,
-                                   seg1Val2, seg1Val2, seg2Val1, seg2Val2);
-
-    // compare the flat section with the flat section
-    // This could be easily special cased (saving ~2 if statements ).
-    res += get_segment_constrained(seg1Loc2, seg1Loc3, seg2Loc2, seg2Loc3,
-                                   seg1Val2, seg1Val2, seg2Val2, seg2Val2);
-
-    return res;
-}
+  double res = 0;
 
+  // compare the linear section with the linear section
+  res += get_segment_constrained(seg1Loc1, seg1Loc2, seg2Loc1, seg2Loc2,
+                                 seg1Val1, seg1Val2, seg2Val1, seg2Val2);
 
-// Dealing with segments which are to the left of the region covered by both
-// segs
-double leftmost_segments(const NumericVector& loc1,
-                                const NumericVector& loc2,
-                                const NumericVector& val1,
-                                const NumericVector& val2,
-                                double binWidth1,
-                                double maxLoc)
-{
-  double res = 0.0;
+  // compare the linear section with the flat section
+  // This could be easily special cased (saving ~1 if statements ).
+  res += get_segment_constrained(seg1Loc1, seg1Loc2, seg2Loc2, seg2Loc3,
+                                 seg1Val1, seg1Val2, seg2Val2, seg2Val2);
 
-  assert(loc1[0] < loc2[0]);
-
-  // Fix the position of Seg2 and then interate over Seg1 until we have all
-  // of the segments of Seg1 before Seg2 starts.
-
-  // are these all used?
-  
-  double curSeg2Loc1 = loc1[0];
-  double curSeg2Loc2 = loc1[0];
-  double curSeg2Loc3 = loc2[0];
-  double curSeg2Val1 = 0;
-  double curSeg2Val2 = 0;
-
-  // Set this value so we can update in the lopp
-  double curSeg1Val2 = 0;
-  for (int index = 0; index < loc1.size(); index++) {
-    double curSeg1Loc1 = loc1[index];
-    double curSeg1Loc2 = loc1[index] + binWidth1;
-    double curSeg1Loc3;
-    if (index == loc1.size() - 1) {
-      curSeg1Loc3 = maxLoc;
-    }
-    else {
-      curSeg1Loc3 = loc1[index + 1];
-    }
-    double curSeg1Val1 = curSeg1Val2;
-    curSeg1Val2 = val1[index];
-    res += get_double_segment_constrained(
-      curSeg1Loc1, curSeg1Loc2, curSeg1Loc3, curSeg1Val1, curSeg1Val2,
-      curSeg2Loc1, curSeg2Loc2, curSeg2Loc3, curSeg2Val1, curSeg2Val2);
+  // compare the flat section with the linear section
+  // This could be easily special cased (saving ~1 if statements ).
+  res += get_segment_constrained(seg1Loc2, seg1Loc3, seg2Loc1, seg2Loc2,
+                                 seg1Val2, seg1Val2, seg2Val1, seg2Val2);
 
-    std::cerr << "%% " << index << " : " << curSeg1Loc1 << " " << curSeg1Loc2 << " " << curSeg1Loc3 << " "
-              << curSeg1Val1 << " " << curSeg1Val2 << " " << curSeg2Loc1 << " " << curSeg2Loc2
-              << " " << curSeg2Loc3 << " " << curSeg2Val1 << " " << curSeg2Val2 << std::endl;
-    
-    if (curSeg2Loc1 > curSeg1Loc3) {
-      break;
-    }
-  }
+  // compare the flat section with the flat section
+  // This could be easily special cased (saving ~2 if statements ).
+  res += get_segment_constrained(seg1Loc2, seg1Loc3, seg2Loc2, seg2Loc3,
+                                 seg1Val2, seg1Val2, seg2Val2, seg2Val2);
 
   return res;
 }
 
-
 //' @title
 //' Compute EMD
 ////'
@@ -195,160 +139,55 @@ double leftmost_segments(const NumericVector& loc1,
 double NetEmdSmoothV2(NumericVector loc1, NumericVector val1, double binWidth1,
                       NumericVector loc2, NumericVector val2, double binWidth2)
 {
-  // double res = 0;
-
-  // // Hist 1
-  // double curSeg1Loc1; // Start of the gradient section in Seg1
-  // double curSeg1Loc2; // End of the gradient section in Seg1
-  // double curSeg1Loc3; // End of the flat section in Seg1
-  // double curSeg1Val1; // Start value in Seg1
-  // double curSeg1Val2; // End value in Seg1
-
-  // // Hist 2
-  // double curSeg2Loc1; // Start of the gradient section in Seg2
-  // double curSeg2Loc2; // End of the gradient section in Seg2
-  // double curSeg2Loc3; // End of the flat section in Seg2
-  // double curSeg2Val1; // Start value in Seg2
-  // double curSeg2Val2; // End value in Seg2
-
-
-  // // Starting index for the second histogram
-  // double secondStart = 0;
-
-  // // Smallest and largest location values
-  // double maxLoc = std::max(loc1[loc1.size()-1] + binWidth1,
-  //                          loc2[loc2.size()-1] + binWidth2);
-
-
-  // // warning area before loc2[0] is not well tested
-  // // As values outside of the range appear to be zero
-
-  // std::cerr << "===============\n";
-  
-  // if (loc2[0] < loc1[0]) {
-  //   res += leftmost_segments(loc2, loc1, val2, val1, binWidth2, maxLoc);
-  // }
-  // else
-  // {
-  //   res += leftmost_segments(loc1, loc2, val1, val2, binWidth1, maxLoc);
-  // }
-
-  // // Add both the  overlapping sections and the non overlapping section on the right
-  // // Note we reiterate over the first few sections loc1
-  // // Could store where we are upto from above to save time
-  // // Reset Val counter
-  // curSeg1Val2 = 0;
-  // for (int index1 = 0; index1 < loc1.size(); index1++) {
-  //   //  Get the three relevant locations
-  //   // Start; end of linear section; end of flat section
-  //   curSeg1Loc1 = loc1[index1];
-  //   curSeg1Loc2 = loc1[index1] + binWidth1;
-  //   // could pull this check outside of the loop with final case not sure if worth it
-  //   if (index1 == loc1.size() - 1) {
-  //     curSeg1Loc3 = maxLoc;
-  //   }
-  //   else {
-  //     curSeg1Loc3 = loc1[index1 + 1];
-  //   }
-  //   // Update value to the start and end of the current section
-  //   curSeg1Val1 = curSeg1Val2;
-  //   curSeg1Val2 = val1[index1];
-  //   // Setting up the previous value for the next loop
-  //   // Could replace this loop with a while
-  //   // but if so would need to be careful about overlaps
-  //   if (secondStart == 0) {
-  //     curSeg2Val2 = 0;
-  //   }
-  //   else {
-  //     curSeg2Val2 = val2[secondStart - 1];
-  //   }
-  //   for (int index2 = secondStart; index2 < loc2.size(); index2++) {
-  //     // Construct 3 sections for second seg.
-  //     curSeg2Loc1 = loc2[index2];
-  //     curSeg2Loc2 = loc2[index2] + binWidth2;
-  //     if (index2 == loc2.size() - 1) {
-  //       curSeg2Loc3 = maxLoc;
-  //     }
-  //     else {
-  //       curSeg2Loc3 = loc2[index2 + 1];
-  //     }
-  //     //update values
-  //     curSeg2Val1 = curSeg2Val2;
-  //     curSeg2Val2 = val2[index2];
-  //     // If this section is behind Seg1
-  //     // Do not consider again
-  //     if (curSeg2Loc3 < curSeg1Loc1) {
-  //       secondStart = index2 + 1;
-  //       continue;
-  //     }
-  //     // If current Seg2 is beyond Seg1 break out of loop
-  //     res += get_double_segment_constrained(
-  //       curSeg1Loc1, curSeg1Loc2, curSeg1Loc3, curSeg1Val1, curSeg1Val2,
-  //       curSeg2Loc1, curSeg2Loc2, curSeg2Loc3, curSeg2Val1, curSeg2Val2);
-
-  //     std::cerr << "%% " << index1 << " " << index2 << " : " << curSeg1Loc1 << " " << curSeg1Loc2 << " " << curSeg1Loc3 << " "
-  //               << curSeg1Val1 << " " << curSeg1Val2 << " " << curSeg2Loc1 << " " << curSeg2Loc2
-  //               << " " << curSeg2Loc3 << " " << curSeg2Val1 << " " << curSeg2Val2 << std::endl;
-      
-  //     if (curSeg2Loc3 > curSeg1Loc3) {
-  //       break;
-  //     }
-  //   }
-  // }
-
-  // std::cerr << "---------------\n";
- 
-  {
-    OverlappingSegments segs(loc1, loc2, binWidth1, binWidth2);
-
-    double res = 0.0;
-  
-    for (OverlappingSegments::iterator it = segs.begin(), endIt = segs.end();
-         it != endIt; ++it)
-    {
-      // Hist 1
-      // Start of the gradient section in Seg1
-      double curSeg1Loc1 = it.get_loc1(it->first);
+  OverlappingSegments segs(loc1, loc2, binWidth1, binWidth2);
 
-      // End of the gradient section in Seg1
-      double curSeg1Loc2 = (it->first < 0) ? curSeg1Loc1 : (curSeg1Loc1 + binWidth1);
-
-      // End of the flat section in Seg1
-      double curSeg1Loc3 = it.get_loc1(it->first + 1);
+  double res = 0.0;
 
-      // Start value in Seg1
-      double curSeg1Val1 = (it->first <= 0) ? 0.0 : val1[it->first - 1];
+  for (OverlappingSegments::iterator it = segs.begin(), endIt = segs.end();
+       it != endIt; ++it)
+  {
+    // The OverlappingSegments iterator returns pairs of the left
+    // indices of overlapping endpoints: it->first is the index into
+    // loc1, it->second is the index into loc2.  When the smallest
+    // element in one of these vectors is larger than the current
+    // element of the other, an 'index' of -1 is returned.
 
-      // End value in Seg1
-      double curSeg1Val2 = (it->first < 0) ? 0.0 : val1[it->first];
+    // Hist 1
+    // Start of the gradient section in Seg1
+    double curSeg1Loc1 = segs.loc1_left(it->first);
 
-      // Hist 2
-      // Start of the gradient section in Seg2
-      double curSeg2Loc1 = it.get_loc2(it->second);
+    // End of the gradient section in Seg1
+    double curSeg1Loc2 = segs.loc1_mid(it->first);
 
-      // End of the gradient section in Seg2
-      double curSeg2Loc2 = (it->second < 0) ? curSeg2Loc1 : (curSeg2Loc1 + binWidth2);
+    // End of the flat section in Seg1
+    double curSeg1Loc3 = segs.loc1_right(it->first);
 
-      // End of the flat section in Seg2
-      double curSeg2Loc3 = it.get_loc2(it->second + 1); 
+    // Start and end values in Seg1: val1 gives the values at *right*
+    // endpoints of the segments.  A value of 0.0 is used before the
+    // first segment.
+    double curSeg1Val1 = (it->first > 0) ? val1[it->first - 1] : 0.0;
+    double curSeg1Val2 = (it->first >= 0) ? val1[it->first] : 0.0;
 
-      // Start value in Seg2
-      double curSeg2Val1 = (it->second <= 0) ? 0.0 : val2[it->second - 1];
+    // Hist 2
+    // Start of the gradient section in Seg1
+    double curSeg2Loc1 = segs.loc2_left(it->second);
 
-      // End value in Seg2
-      double curSeg2Val2 = (it->second < 0) ? 0.0 : val2[it->second];
+    // End of the gradient section in Seg1
+    double curSeg2Loc2 = segs.loc2_mid(it->second);
 
-      // std::cerr << "%% " << it->first << " " << it->second << " : " << curSeg1Loc1 << " " << curSeg1Loc2 << " " << curSeg1Loc3 << " "
-      //           << curSeg1Val1 << " " << curSeg1Val2 << " " << curSeg2Loc1 << " " << curSeg2Loc2
-      //           << " " << curSeg2Loc3 << " " << curSeg2Val1 << " " << curSeg2Val2 << std::endl;
+    // End of the flat section in Seg1
+    double curSeg2Loc3 = segs.loc2_right(it->second);
 
-      res += get_double_segment_constrained(
-        curSeg1Loc1, curSeg1Loc2, curSeg1Loc3, curSeg1Val1, curSeg1Val2,
-        curSeg2Loc1, curSeg2Loc2, curSeg2Loc3, curSeg2Val1, curSeg2Val2);    
-    }
+    // Start and end values in Seg2: val2 gives the values at *right*
+    // endpoints of the segments.  A value of 0.0 is used before the
+    // first segment.
+    double curSeg2Val1 = (it->second > 0) ? val2[it->second - 1] : 0.0;
+    double curSeg2Val2 = (it->second >= 0) ? val2[it->second] : 0.0;
 
-    return res;
+    res += get_double_segment_constrained(
+      curSeg1Loc1, curSeg1Loc2, curSeg1Loc3, curSeg1Val1, curSeg1Val2,
+      curSeg2Loc1, curSeg2Loc2, curSeg2Loc3, curSeg2Val1, curSeg2Val2);
   }
 
-  // std::cerr << "===============\n";
+  return res;
 }
diff --git a/src/fastSmoothV2.h b/src/fastSmoothV2.h
index 130f8b52..6f4036c1 100644
--- a/src/fastSmoothV2.h
+++ b/src/fastSmoothV2.h
@@ -9,8 +9,6 @@
 #include <cassert>
 #include <utility>
 #include <iterator>
-// #include <vector>
-// #include <iostream>
 #include <stdexcept>
 #include <limits>
 #include <unordered_set>
@@ -21,38 +19,81 @@
 using namespace Rcpp;
 
 class OverlappingSegments {
-  // references the two sequences
+
+  // The two sequences of left-hand segment endpoints
   const NumericVector& loc1;
   const NumericVector& loc2;
   const double binWidth1, binWidth2;
 
+  // shorter names for loc1.size() and loc2.size()
+  const long N1, N2;
+
+  // the minimum and maximum values over both sequences;
+  double minloc, maxloc;
+
 public:
   OverlappingSegments(NumericVector& loc1_, NumericVector& loc2_,
                       double binWidth1_ = 1.0, double binWidth2_ = 1.0)
-    : loc1(loc1_), loc2(loc2_), binWidth1(binWidth1_), binWidth2(binWidth2_)
+    : loc1(loc1_), loc2(loc2_),
+      binWidth1(binWidth1_), binWidth2(binWidth2_),
+      N1(loc1.size()), N2(loc2.size())
   {
-    // check the requirement that loc1 and loc2 are nonempty and
-    // (strictly) sorted
-
-    if (loc1.size() == 0 || loc2.size() == 0)
+    if (N1 == 0 || N2 == 0)
       throw std::invalid_argument("Input vectors must be nonempty");
-    
-    for (int i = 0; i < loc1.size() - 1; i++)
-      if (loc1[i] > loc1[i + 1])
-        throw std::invalid_argument("Input vectors must be sorted in strict ascending order");
-
-    for (int i = 0; i < loc2.size() - 1; i++)
-      if (loc2[i] > loc2[i + 1])
-        throw std::invalid_argument("Input vectors must be sorted in strict ascending order");
+
+    for (int i = 0; i < N1 - 1; i++)
+      if (loc1[i] > loc1[i + 1] + binWidth1)
+        throw std::invalid_argument(
+          "Elements of loc1 must be sorted in ascending order, "
+          "with elements separated by at least binWidth1");
+
+    for (int i = 0; i < N2 - 1; i++)
+      if (loc2[i] > loc2[i + 1] + binWidth2)
+        throw std::invalid_argument(
+          "Elements of loc2 must be sorted in ascending order, "
+          "with elements separated by at least binWidth2");
+
+    minloc = std::min(loc1[0], loc2[0]);
+    maxloc = std::max(loc1[N1 - 1] + binWidth1, loc2[N2 - 1] + binWidth2);
   }
 
-  class iterator {
-    const OverlappingSegments * const segs;
+  // left, mid and right locations of the segments, for loc1 and loc2
+  double loc1_left(long i) const {
+    return (i >= 0) ? loc1[i] : minloc;
+  }
 
-    long Nx, Ny;
+  double loc1_mid(long i) const {
+    return (i >= 0) ? (loc1[i] + binWidth1) : minloc;
+  }
 
-    // the minimum and maximum contained values;
-    double minloc, maxloc;
+  double loc1_right(long i) const {
+    return (i + 1 < N1) ? loc1[i + 1] : maxloc;
+  }
+
+  double loc2_left(long i) const {
+    return (i >= 0) ? loc2[i] : minloc;
+  }
+
+  double loc2_mid(long i) const {
+    return (i >= 0) ? (loc2[i] + binWidth2) : minloc;
+  }
+
+  double loc2_right(long i) const {
+    return (i + 1 < N2) ? loc2[i + 1] : maxloc;
+  }
+
+  // Does interval i (from the first collection of segments) overlap
+  // interval j (from the second)?
+  bool intervals_overlap(long i, long j) const {
+    return (loc1_left(i) < loc2_right(j) && loc2_left(j) < loc1_right(i));
+  }
+
+  iterator begin() { return iterator(*this); };
+  iterator end() { return iterator(*this).advance_to_end(); };
+
+  // OverlappingSegments iterator
+  class iterator {
+    const OverlappingSegments& segs;
 
     // the current iteration state
     std::pair<long, long> idx;
@@ -78,99 +119,59 @@ class OverlappingSegments {
     // other one.  If both sequences start with the same value, the
     // iteration starts at (0,0).
     //
-    explicit iterator(OverlappingSegments *segs_)
-      : segs(segs_), Nx(segs_->loc1.size()), Ny(segs_->loc2.size())
-    {
-      // The initial state of the iterator
-      
-      if (segs->loc1[0] < segs->loc2[0]) {
-        minloc = segs->loc1[0];
+    explicit iterator(const OverlappingSegments& segs_) : segs(segs_) {
+      if (segs.loc1[0] < segs.loc2[0]) {
         idx.first = 0;
         idx.second = -1;
       }
-      else if (segs->loc1[0] == segs->loc2[0]) {
-        minloc = segs->loc1[0]; // == segs->loc2[0]
+      else if (segs.loc1[0] == segs.loc2[0]) {
         idx.first = 0;
         idx.second = 0;
       }
       else {
-        minloc = segs->loc2[0];
         idx.first = -1;
         idx.second = 0;
       }
-
-      // maxloc is used to signal the end of the current segment:
-      // since we consider only left-hand endpoints, the only
-      // requirement is that it compares greater than any real loc in
-      // either sequence - in reality it will be the largest loc plus
-      // the corresponding bin width
-      // maxloc = std::numeric_limits<double>::infinity();
-      maxloc = std::max(segs->loc1[segs->loc1.size() - 1] + segs->binWidth1,
-                        segs->loc2[segs->loc2.size() - 1] + segs->binWidth2);
     }
 
-    double get_loc1(long i) const {
-      if (i < 0) return minloc;
-      else if (i >= Nx) return maxloc;
-      else return segs->loc1[i];
-    }
-
-    double get_loc2(long i) const {
-      if (i < 0) return minloc;
-      else if (i >= Ny) return maxloc;
-      else return segs->loc2[i];
-    }
-
-    // Does interval i (from the first collection of segments) overlap
-    // interval j (from the second)?
-    bool intervals_overlap(long i, long j) const {
-      return (get_loc1(i) < get_loc2(j + 1) && get_loc2(j) < get_loc1(i + 1));
-    }
-
-    bool at_end() const { return idx.first == Nx && idx.second == Ny - 1; }
+    // Is the current iterator at one-past-the-end?  Equivalent to an
+    // equality comparison with segs.end().
+    bool at_end() const { return idx.first == segs.N1 && idx.second == segs.N2 - 1; }
 
+    // Update the current iterator to point to one-past-the-end
     iterator& advance_to_end() {
-      idx.first = Nx;
-      idx.second = Ny - 1;
+      idx.first = segs.N1;
+      idx.second = segs.N2 - 1;
       return *this;
     }
-    
-    value_type operator*() const { return idx; }
-
-    const value_type *operator->() const { return &idx; }
 
     iterator& operator++() {
-
 #if !NDEBUG
       // Verify precondition
-      if (!intervals_overlap(idx.first, idx.second)) {
+      if (!segs.intervals_overlap(idx.first, idx.second)) {
         throw std::logic_error("Iterator precondition not satisfied: "
                                "current intervals do not overlap");
       }
 #endif
 
       // Advance the second segment if it would still overlap the first
-      // 
-      // The condition below is equivalent to
-      //   idx.second < Ny - 1 && intervals_overlap(idx.first, idx.second + 1)
-      //
-      // For the right-hand condition, we know that (by precondition)
-      //   get_loc1(idx.first) < get_loc2(idx.second + 1)
       //
+      // The condition below is equivalent to
+      //   idx.second < N2 - 1 && intervals_overlap(idx.first, idx.second + 1)
+      // given that we know (by the precondition) that
+      //   loc1_left(idx.first) < loc2_right(idx.second)
       // and therefore that
-      //   get_loc1(idx.first) < get_loc2(idx.second + 2),
-      //
-      // so this inequality is all that remains to be checked.
+      //   loc1_left(idx.first) < loc2_right(idx.second + 1),
       //
-      if (idx.second < Ny - 1
-          && get_loc2(idx.second + 1) < get_loc1(idx.first + 1)) {
+      if (idx.second < segs.N2 - 1
+          && segs.loc2_left(idx.second + 1) < segs.loc1_right(idx.first)) {
         idx.second++;
       }
       // Could not advance the second segment above: advance the first instead,
       // and the second as well if they share an endpoint
       else {
-        if (idx.second < Ny - 1
-            && get_loc1(idx.first + 1) == get_loc2(idx.second + 1)) {
+        if (idx.second < segs.N2 - 1
+            && segs.loc2_left(idx.second + 1) == segs.loc1_right(idx.first)) {
           idx.second++;
         }
         idx.first++;
@@ -178,7 +179,7 @@ class OverlappingSegments {
 
 #if !NDEBUG
       // Verify postcondition
-      if (!(at_end() || intervals_overlap(idx.first, idx.second))) {
+      if (!(at_end() || segs.intervals_overlap(idx.first, idx.second))) {
         throw std::logic_error("Iterator postcondition not satisfied: "
                                "current intervals do not overlap (not at end)");
       }
@@ -193,6 +194,10 @@ class OverlappingSegments {
       return res;
     }
 
+    value_type operator*() const { return idx; }
+
+    const value_type *operator->() const { return &idx; }
+
     friend bool operator==(const iterator& lhs, const iterator& rhs) {
       return lhs.idx == rhs.idx;
     }
@@ -201,20 +206,19 @@ class OverlappingSegments {
       return !(lhs == rhs);
     }
   };
-
-  iterator begin() { return iterator(this); };
-  iterator end() { return iterator(this).advance_to_end(); };
 };
 
 
 double bowtie_area(double length, double val1_start, double val1_end,
-                          double val2_start, double val2_end);
+                   double val2_start, double val2_end);
+
 double get_segment(double start, double end, double val1_start,
-                          double val1_end, double val2_start, double val2_end);
+                   double val1_end, double val2_start, double val2_end);
+
 double get_segment_constrained(double seg1L1, double seg1L2,
-                                      double seg2L1, double seg2L2,
-                                      double seg1V1, double seg1V2,
-                                      double seg2V1, double seg2V2);
+                               double seg2L1, double seg2L2,
+                               double seg1V1, double seg1V2,
+                               double seg2V1, double seg2V2);
 
 double get_double_segment_constrained(
   double seg1Loc1, double seg1Loc2, double seg1Loc3,
@@ -222,13 +226,6 @@ double get_double_segment_constrained(
   double seg2Loc1, double seg2Loc2, double seg2Loc3,
   double seg2Val1, double seg2Val2);
 
-inline double leftmost_segments(const NumericVector& loc1,
-                                const NumericVector& loc2,
-                                const NumericVector& val1,
-                                const NumericVector& val2,
-                                double binWidth1,
-                                double maxLoc);
-
 double NetEmdSmoothV2(NumericVector loc1, NumericVector val1, double binWidth1,
                       NumericVector loc2, NumericVector val2, double binWidth2);
 

From 17ef75e815c11966704eb0330eaf202b83c7f09e Mon Sep 17 00:00:00 2001
From: Oliver Strickson <ostrickson@turing.ac.uk>
Date: Thu, 23 Apr 2020 11:27:53 +0100
Subject: [PATCH 48/84] Small formatting changes

---
 src/fastSmoothV2.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/fastSmoothV2.h b/src/fastSmoothV2.h
index 6f4036c1..98a9672b 100644
--- a/src/fastSmoothV2.h
+++ b/src/fastSmoothV2.h
@@ -119,7 +119,9 @@ class OverlappingSegments {
     // other one.  If both sequences start with the same value, the
     // iteration starts at (0,0).
     //
-    explicit iterator(const OverlappingSegments& segs_) : segs(segs_) {
+    explicit iterator(const OverlappingSegments& segs_)
+      : segs(segs_)
+    {
       if (segs.loc1[0] < segs.loc2[0]) {
         idx.first = 0;
         idx.second = -1;
@@ -136,7 +138,9 @@ class OverlappingSegments {
 
     // Is the current iterator at one-past-the-end?  Equivalent to an
     // equality comparison with segs.end().
-    bool at_end() const { return idx.first == segs.N1 && idx.second == segs.N2 - 1; }
+    bool at_end() const {
+      return idx.first == segs.N1 && idx.second == segs.N2 - 1;
+    }
 
     // Update the current iterator to point to one-past-the-end
     iterator& advance_to_end() {

From a67c2cebaa66a3a87fb3ccc21eb87e9697f2c29c Mon Sep 17 00:00:00 2001
From: Oliver Strickson <ostrickson@turing.ac.uk>
Date: Thu, 23 Apr 2020 11:30:11 +0100
Subject: [PATCH 49/84] Move begin() and end() after iterator class (fix
 breaking build)

---
 src/fastSmoothV2.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/fastSmoothV2.h b/src/fastSmoothV2.h
index 98a9672b..77e3d461 100644
--- a/src/fastSmoothV2.h
+++ b/src/fastSmoothV2.h
@@ -88,9 +88,6 @@ class OverlappingSegments {
     return (loc1_left(i) < loc2_right(j) && loc2_left(j) < loc1_right(i));
   }
 
-  iterator begin() { return iterator(*this); };
-  iterator end() { return iterator(*this).advance_to_end(); };
-
   // OverlappingSegments iterator
   class iterator {
     const OverlappingSegments& segs;
@@ -210,6 +207,9 @@ class OverlappingSegments {
       return !(lhs == rhs);
     }
   };
+
+  iterator begin() { return iterator(*this); }
+  iterator end() { return iterator(*this).advance_to_end(); }  
 };
 
 

From a59cc6f8ddadd4f6ab1b43181f571f6ddd0b62f7 Mon Sep 17 00:00:00 2001
From: leospinaf <leospinaf@unal.edu.co>
Date: Tue, 23 Jun 2020 10:49:05 +0100
Subject: [PATCH 50/84] test

---
 README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 43b3164f..26da5aaa 100755
--- a/README.md
+++ b/README.md
@@ -1,5 +1,6 @@
 # Network Comparison
-An R package implementing the Netdis and NetEMD alignment-free network comparison measures.
+An R package implementing the Netdis and NetEMD alignment-free network comparison measures.   
+
 
 ### :warning: BETA: Package under construction (pre-release) :warning:
 Until this package hits release 1.0 anything can change with no notice.

From 1fd6fb7df02feec9d597f430ed8681a081e54839 Mon Sep 17 00:00:00 2001
From: andeElliott <ande.elliott@gmail.com>
Date: Wed, 18 May 2022 10:26:56 +0100
Subject: [PATCH 51/84] removed the flag for sse to test if this will work on
 M1

---
 src/Makevars | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Makevars b/src/Makevars
index 25761e11..07ae5f45 100644
--- a/src/Makevars
+++ b/src/Makevars
@@ -1,2 +1,2 @@
 CXX_STD = CXX11
-PKG_CPPFLAGS += -fno-fast-math -msse2 -mfpmath=sse -mstackrealign
+PKG_CPPFLAGS += -fno-fast-math -msse2 -mstackrealign

From 66f9f8f42503cfed15d41311c253c80f5c681385 Mon Sep 17 00:00:00 2001
From: andeElliott <ande.elliott@gmail.com>
Date: Wed, 18 May 2022 10:29:49 +0100
Subject: [PATCH 52/84] attempt2 to fix the M1 issues

---
 src/Makevars | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Makevars b/src/Makevars
index 07ae5f45..abab4e6b 100644
--- a/src/Makevars
+++ b/src/Makevars
@@ -1,2 +1,2 @@
 CXX_STD = CXX11
-PKG_CPPFLAGS += -fno-fast-math -msse2 -mstackrealign
+PKG_CPPFLAGS += -fno-fast-math -mstackrealign

From dcff9a3558a4dd6276aaad12c590031b40a8b28d Mon Sep 17 00:00:00 2001
From: andeElliott <ande.elliott@gmail.com>
Date: Wed, 18 May 2022 10:43:19 +0100
Subject: [PATCH 53/84] adding arg back in to see if it is needed

---
 src/Makevars | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Makevars b/src/Makevars
index abab4e6b..fe240994 100644
--- a/src/Makevars
+++ b/src/Makevars
@@ -1,2 +1,2 @@
 CXX_STD = CXX11
-PKG_CPPFLAGS += -fno-fast-math -mstackrealign
+PKG_CPPFLAGS += -fno-fast-math -msse2  -mstackrealign

From 0a04e3bdacb5108071c9f445d5013f9bcf492cb4 Mon Sep 17 00:00:00 2001
From: andeElliott <ande.elliott@gmail.com>
Date: Wed, 18 May 2022 11:13:10 +0100
Subject: [PATCH 54/84] updated Rcpp Exports

---
 src/RcppExports.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
index 933f6b71..3779ab6d 100644
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -5,6 +5,11 @@
 
 using namespace Rcpp;
 
+#ifdef RCPP_USE_GLOBAL_ROSTREAM
+Rcpp::Rostream<true>&  Rcpp::Rcout = Rcpp::Rcpp_cout_get();
+Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
+#endif
+
 // counts_from_observations
 NumericMatrix counts_from_observations(NumericMatrix features);
 RcppExport SEXP _netdist_counts_from_observations(SEXP featuresSEXP) {

From 474d8952c4adceaee0d43a799d1397bff5b35f5e Mon Sep 17 00:00:00 2001
From: andeElliott <ande.elliott@gmail.com>
Date: Wed, 18 May 2022 11:27:48 +0100
Subject: [PATCH 55/84] update namespace to test if functions will export

---
 NAMESPACE | 1 +
 1 file changed, 1 insertion(+)

diff --git a/NAMESPACE b/NAMESPACE
index f428db48..ca0bca0e 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -79,3 +79,4 @@ import(Rcpp)
 importFrom(Rcpp,evalCpp)
 importFrom(Rcpp,sourceCpp)
 useDynLib(netdist, .registration=TRUE)
+exportPattern("^[[:alpha:]]+")

From ebc0873766b7fb41a3f436e850dccdcd89a656b3 Mon Sep 17 00:00:00 2001
From: jack89roberts <jroberts@turing.ac.uk>
Date: Mon, 6 Jun 2022 21:07:39 +0100
Subject: [PATCH 56/84] setup linting actions

---
 .github/workflows/build.yml            |  22 +-
 R/PlottingFunctions.R                  |  35 ++-
 R/data.R                               |   7 +-
 R/dhist.R                              |  12 +-
 R/emd.R                                |   3 +-
 R/graph_binning.R                      |   2 +-
 R/measures_net_dis.R                   | 342 ++++++++++++-------------
 R/measures_net_emd.R                   | 220 ++++++++--------
 R/orca_interface.R                     |   7 +-
 tests/testthat/test_emd.R              |  20 +-
 tests/testthat/test_measures_net_emd.R |  26 +-
 11 files changed, 359 insertions(+), 337 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 46523f94..e38577d3 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -20,7 +20,7 @@ jobs:
       fail-fast: false
       matrix:
         config:
-          - {os: ubuntu-latest,   r: 'release'}
+          - {os: ubuntu-latest,   r: 'release'}  # run coverage, linting, and update docs on this platform only
           - {os: ubuntu-latest,   r: 'oldrel'}
           - {os: ubuntu-latest,   r: 'devel'}
           - {os: macOS-latest,   r: 'release'}
@@ -50,7 +50,14 @@ jobs:
         with:
           extra-packages: |
             any::devtools
+
+      - uses: r-lib/actions/setup-r-dependencies@v2
+        if: ${{ (matrix.config.os == 'ubuntu-latest') && (matrix.config.r == 'release') }}
+        with:
+          extra-packages: |
             any::covr
+            any::lintr
+            any::styler
 
       - name: Document
         run: devtools::document()
@@ -66,10 +73,21 @@ jobs:
           git pull --ff-only
           git push origin
 
-      - name: Check package
+      - name: Check style with styler
+        if: ${{ (matrix.config.os == 'ubuntu-latest') && (matrix.config.r == 'release') }}
+        run: styler::style_pkg(dry='fail')
+        shell: Rscript {0}
+
+      - name: Check style with lintr
+        if: ${{ (matrix.config.os == 'ubuntu-latest') && (matrix.config.r == 'release') }}
+        run: lintr::lint_package()
+        shell: Rscript {0}
+
+      - name: Check package with devtools
         run: devtools::check()
         shell: Rscript {0}
 
       - name: Check coverage
+        if: ${{ (matrix.config.os == 'ubuntu-latest') && (matrix.config.r == 'release') }}
         run: covr::codecov(quiet = FALSE)
         shell: Rscript {0}
diff --git a/R/PlottingFunctions.R b/R/PlottingFunctions.R
index e525dabc..b4f9dbf8 100644
--- a/R/PlottingFunctions.R
+++ b/R/PlottingFunctions.R
@@ -2,7 +2,7 @@
 #' Heatmap of Netdis many-to-many comparisons
 #'
 #' Provides a heatmap and dendrogram for the network comparisons via \code{pheatmap}.
-#' 
+#'
 #' @param netdislist Default output of \code{netdis_many_to_many}.
 #'
 #' @param whatrow Selection of the row in \code{netdis_many_to_many$comp_spec} to be used for plotting.
@@ -10,19 +10,19 @@
 #' @param clustering_method Clustering method as allowed in the \code{pheatmap} function from the \code{pheatmap} package. The dendrogram will appear if \code{docluster} is TRUE (default).
 #'
 #' @param main Title of the plot.
-#' 
+#'
 #' @param docluster controls the order of the rows and columns. If TRUE (default) the rows and columns will be reordered to create the dendrogram. If FALSE, then only the heatmap is drawn.
-#' 
+#'
 #' @return Provides a heatmap and dendrogram for the network comparisons via \code{pheatmap}.
 #' @export
 
-netdis.plot <- function(netdislist,whatrow=c(1,2)[2],clustering_method="ward.D",main="Nedis",docluster=TRUE){
-  adjmat <- cross_comp_to_matrix(measure = netdislist$netdis[whatrow,], cross_comparison_spec = netdislist$comp_spec)
+netdis.plot <- function(netdislist, whatrow = c(1, 2)[2], clustering_method = "ward.D", main = "Nedis", docluster = TRUE) {
+  adjmat <- cross_comp_to_matrix(measure = netdislist$netdis[whatrow, ], cross_comparison_spec = netdislist$comp_spec)
   vnames <- rownames(adjmat)
-  
-  legend1 <- seq(min(adjmat),max(adjmat),length.out = 5)
-  levels1 <- round(legend1,digits = 2)
-  pheatmap::pheatmap(mat = as.dist(adjmat),cluster_rows = docluster,cluster_cols = docluster,clustering_method = clustering_method,angle_col=45,main = main,treeheight_row = 80,labels_row = vnames,labels_col = vnames,display_numbers = TRUE,legend_breaks = legend1,legend_labels = levels1)
+
+  legend1 <- seq(min(adjmat), max(adjmat), length.out = 5)
+  levels1 <- round(legend1, digits = 2)
+  pheatmap::pheatmap(mat = as.dist(adjmat), cluster_rows = docluster, cluster_cols = docluster, clustering_method = clustering_method, angle_col = 45, main = main, treeheight_row = 80, labels_row = vnames, labels_col = vnames, display_numbers = TRUE, legend_breaks = legend1, legend_labels = levels1)
 }
 
 
@@ -31,7 +31,7 @@ netdis.plot <- function(netdislist,whatrow=c(1,2)[2],clustering_method="ward.D",
 #' Heatmap of NetEmd many-to-many comparisons
 #'
 #' Provides a heatmap and dendrogram for the network comparisons via \code{pheatmap}.
-#' 
+#'
 #' @param netdislist Default output of \code{netdis_many_to_many}.
 #'
 #' @param whatrow Selection of the row in \code{netdis_many_to_many$comp_spec} to be used for plotting.
@@ -39,18 +39,17 @@ netdis.plot <- function(netdislist,whatrow=c(1,2)[2],clustering_method="ward.D",
 #' @param clustering_method Clustering method as allowed in the \code{pheatmap} function from the \code{pheatmap} package. The dendrogram will appear if \code{docluster} is TRUE (default).
 #'
 #' @param main Title of the plot.
-#' 
+#'
 #' @param docluster controls the order of the rows and columns. If TRUE (default) the rows and columns will be reordered to create the dendrogram. If FALSE, then only the heatmap is drawn.
-#' 
+#'
 #' @return Provides a heat map and dendrogram for the network comparisons via \code{pheatmap}.
 #' @export
 
-netemd.plot <- function(netemdlist,clustering_method="ward.D",main="NetEmd",docluster=TRUE){
+netemd.plot <- function(netemdlist, clustering_method = "ward.D", main = "NetEmd", docluster = TRUE) {
   adjmat <- cross_comp_to_matrix(measure = netemdlist$netemds, cross_comparison_spec = netemdlist$comp_spec)
   vnames <- rownames(adjmat)
-  
-  legend1 <- seq(min(adjmat),max(adjmat),length.out = 5)
-  levels1 <- round(legend1,digits = 2)
-  pheatmap::pheatmap(mat = as.dist(adjmat),cluster_rows = docluster,cluster_cols = docluster,clustering_method = clustering_method,angle_col=45,main = main,treeheight_row = 80,labels_row = vnames,labels_col = vnames,display_numbers = TRUE,legend_breaks = legend1,legend_labels = levels1)
-  
+
+  legend1 <- seq(min(adjmat), max(adjmat), length.out = 5)
+  levels1 <- round(legend1, digits = 2)
+  pheatmap::pheatmap(mat = as.dist(adjmat), cluster_rows = docluster, cluster_cols = docluster, clustering_method = clustering_method, angle_col = 45, main = main, treeheight_row = 80, labels_row = vnames, labels_col = vnames, display_numbers = TRUE, legend_breaks = legend1, legend_labels = levels1)
 }
diff --git a/R/data.R b/R/data.R
index cf5c5dff..4753ed66 100644
--- a/R/data.R
+++ b/R/data.R
@@ -47,18 +47,17 @@
 
 
 #' World trade networks from 1985–2014
-#' 
+#'
 #' The world trade data set consists of a small sample of world trade networks for the years 2001-2014, and pre-computed subgraph counts of a larger set of world trade networks (1985–2014). The world trade networks are based on the data set from [Feenstra et al., 2005] for the years 1962- 2000 and on the United Nations division COMTRADE [Division, 2015] for the years 2001-2014.
 #'
 #' \itemize{
 #'  \item wtnets:  List of \code{igraph} objects providing the world trade networks from 2001–2014.
 #'  \item Counts:  Pre-computed graphlet counts for the world trade networks in the years 1985-2014.
 #'  }
-#'  
+#'
 #' @format A list of two elements. The first element, 'wtnets', is a list of \code{igraph} objects providing a small sample of world trade networks from 2001–2014. The second element, 'Counts', is a list of pre-computed subgraph counts of world trade networks in the years 1985-2014.
 #' @source \strong{World trade networks:}. United nations commodity trade statistics database (UN comtrade). http://comtrade.un.org/, 2015.
 #' @source \strong{Subgraph Counts:} Feenstra RC,Lipsey RE, Deng H, Ma AC, and Mo H. (2005) World trade flows: 1962-2000. Technical report, National Bureau of Economic Research. (See also https://cid.econ.ucdavis.edu/wix.html).
-#' 
+#'
 #' @encoding UTF-8
 "worldtradesub"
-
diff --git a/R/dhist.R b/R/dhist.R
index a5261f76..862afccd 100644
--- a/R/dhist.R
+++ b/R/dhist.R
@@ -123,12 +123,12 @@ is_dhist <- function(x, fast_check = TRUE) {
   has_masses <- purrr::has_element(attr(x, "name"), "masses")
   # Require list with correct class and presence of 1D numeric vector named
   # elements "locations" and "masses"
-  return(has_class_attr
-  && purrr::is_list(x)
-  && has_locations
-  && has_masses
-  && is_numeric_vector_1d(x$locations)
-  && is_numeric_vector_1d(x$masses))
+  return(has_class_attr &&
+    purrr::is_list(x) &&
+    has_locations &&
+    has_masses &&
+    is_numeric_vector_1d(x$locations) &&
+    is_numeric_vector_1d(x$masses))
 }
 
 #' Discrete histogram from observations (Pure R slow version)
diff --git a/R/emd.R b/R/emd.R
index 86575521..da99cb36 100644
--- a/R/emd.R
+++ b/R/emd.R
@@ -80,8 +80,7 @@ min_emd_optimise_fast <- function(dhist1, dhist2) {
     min_emd <- soln$objective
     min_offset <- soln$minimum
     return(list(min_emd = min_emd, min_offset = min_offset))
-  }
-  else {
+  } else {
     # Fall back on other version if either dhist is smoothed
     return(min_emd_optimise(dhist1, dhist2))
   }
diff --git a/R/graph_binning.R b/R/graph_binning.R
index 3f8aa21b..482fc524 100644
--- a/R/graph_binning.R
+++ b/R/graph_binning.R
@@ -10,7 +10,7 @@
 binned_densities_adaptive <- function(densities,
                                       min_counts_per_interval,
                                       num_intervals) {
-  if( length(densities) < min_counts_per_interval) min_counts_per_interval <- length(densities)
+  if (length(densities) < min_counts_per_interval) min_counts_per_interval <- length(densities)
   breaks <- adaptive_breaks(densities,
     min_count = min_counts_per_interval,
     breaks = num_intervals
diff --git a/R/measures_net_dis.R b/R/measures_net_dis.R
index d1120d03..2b0bbc21 100644
--- a/R/measures_net_dis.R
+++ b/R/measures_net_dis.R
@@ -1,12 +1,12 @@
 #' Netdis between two graphs
 #'
 #' Calculates the different variants of the network dissimilarity statistic Netdis between two graphs. The variants currently supported are Netdis using a gold-standard network, Netdis using no expecations (\code{ref_graph = 0}), and Netdis using a Geometric Poisson  approximation for the expectation (\code{ref_graph = NULL}).
-#' 
-#' 
+#'
+#'
 #' @param graph_1 A simple graph object from the \code{igraph} package. \code{graph_1} can be set to \code{NULL} (default) if \code{graphlet_counts_1} is provided. If both \code{graph_1} and \code{graphlet_counts_1} are not \code{NULL}, then only \code{graphlet_counts_1} will be considered.
 #'
 #' @param graph_2 A simple graph object from the \code{igraph} package. \code{graph_2} can be set to \code{NULL} (default) if \code{graphlet_counts_2} is provided. If both \code{graph_2} and \code{graphlet_counts_2} are not \code{NULL}, then only \code{graphlet_counts_2} will be considered.
-#' 
+#'
 #' @param graphlet_counts_1 Pre-generated graphlet counts for the first query
 #' graph.  Matrix containing counts of each graphlet (columns) for
 #' each ego-network (rows) in the input graph. Columns are labelled with
@@ -16,7 +16,7 @@
 #' each ego network. (default: NULL).
 #' If the \code{graphlet_counts_1} argument is defined then
 #' \code{graph_1} will not be used. These counts can be obtained with \code{count_graphlets_ego}.
-#' 
+#'
 #'
 #' @param graphlet_counts_2 Pre-generated graphlet counts for the second query
 #' graph. Matrix containing counts of each graphlet (columns) for
@@ -35,7 +35,7 @@
 #' expected counts are calculated for all query graphs.
 #' 3) NULL (Default) - Used for Netdis-GP, where the expected counts will be calculated based on the properties of the
 #' query graphs themselves. (Geometric-Poisson approximation).
-#' 
+#'
 #' @param graphlet_counts_ref Pre-generated reference graphlet counts.
 #' Matrix containing counts of each graphlet (columns) for
 #' each ego-network (rows) in the reference graph. Columns are labelled with
@@ -46,7 +46,7 @@
 #' If the \code{graphlet_counts_ref} argument is defined then \code{ref_graph} will not
 #' be used.
 #'
-#' @param max_graphlet_size Generate graphlets up to this size. Currently only 4 (default) and 5 are supported. 
+#' @param max_graphlet_size Generate graphlets up to this size. Currently only 4 (default) and 5 are supported.
 #'
 #' @param neighbourhood_size Ego network neighborhood size (default: 2).
 #'
@@ -75,50 +75,50 @@
 #' If \code{exp_counts_fn} is \code{NULL}, (default), it will apply
 #' either the approach from the original Netdis paper, or the respective Geometric-Poisson approximation; depending on the
 #' values of \code{ref_graph} and \code{graphlet_counts_ref}.
-#' 
+#'
 #' @return Netdis statistics between graph_1 and graph_2 for graphlet sizes
 #' up to and including max_graphlet_size.
 #'
 #' @examples
 #' require(netdist)
 #' require(igraph)
-#' #Set source directory for Virus PPI graph edge files stored in the netdist package.
+#' # Set source directory for Virus PPI graph edge files stored in the netdist package.
 #' source_dir <- system.file(file.path("extdata", "VRPINS"), package = "netdist")
 #' # Load query graphs as igraph objects
-#' graph_1 <- read_simple_graph(file.path(source_dir, "EBV.txt"),format = "ncol")
-#' graph_2 <- read_simple_graph(file.path(source_dir, "ECL.txt"),format = "ncol")
-#' 
-#' #Netdis variant using the Geometric Poisson approximation to remove the background expectation of each network.
-#' netdis_one_to_one(graph_1= graph_1, graph_2= graph_2,  ref_graph = NULL) #This option will focus on detecting more general and global discrepancies between the ego-network structures.
-#' 
-#' #Comparing the networks via their observed ego counts without centering them (equivalent to using expectation equal to zero). This option, will focus on detecting small discrepancies.
-#' netdis_one_to_one(graph_1= graph_1, graph_2= graph_2,  ref_graph = 0)
-#' 
+#' graph_1 <- read_simple_graph(file.path(source_dir, "EBV.txt"), format = "ncol")
+#' graph_2 <- read_simple_graph(file.path(source_dir, "ECL.txt"), format = "ncol")
+#'
+#' # Netdis variant using the Geometric Poisson approximation to remove the background expectation of each network.
+#' netdis_one_to_one(graph_1 = graph_1, graph_2 = graph_2, ref_graph = NULL) # This option will focus on detecting more general and global discrepancies between the ego-network structures.
+#'
+#' # Comparing the networks via their observed ego counts without centering them (equivalent to using expectation equal to zero). This option, will focus on detecting small discrepancies.
+#' netdis_one_to_one(graph_1 = graph_1, graph_2 = graph_2, ref_graph = 0)
+#'
 #' # Example of the use of netdis with a reference graph.This option will focus on detecting discrepancies between the networks relative to the ego-network structure of the reference network / gold-standard.
-#' # Two lattice networks of different sizes are used for this example. 
-#'  goldstd_1 <- graph.lattice(c(8,8)) #A reference net
-#'  goldstd_2 <- graph.lattice(c(44,44)) #A reference net
-#'  
-#'  netdis_one_to_one(graph_1= graph_1, graph_2= graph_2,  ref_graph = goldstd_1)
-#'  netdis_one_to_one(graph_1= graph_1, graph_2= graph_2,  ref_graph = goldstd_2)
-#'  
-#'  
-#'  #Providing pre-calculated subgraph counts.
-#'  
-#'  props_1 <- count_graphlets_ego(graph = graph_1)
-#'  props_2 <- count_graphlets_ego(graph = graph_2)
-#'  props_goldstd_1 <- count_graphlets_ego(graph = goldstd_1)
-#'  props_goldstd_2 <- count_graphlets_ego(graph = goldstd_2)
-#'  
-#' #Netdis Geometric-Poisson.
-#' netdis_one_to_one(graphlet_counts_1= props_1,graphlet_counts_2= props_2, ref_graph = NULL)
-#' 
-#' #Netdis Zero Expectation.
-#' netdis_one_to_one(graphlet_counts_1= props_1,graphlet_counts_2= props_2, ref_graph = 0)
-#' 
-#' #Netdis using gold-standard network
-#' netdis_one_to_one(graphlet_counts_1= props_1,graphlet_counts_2= props_2, graphlet_counts_ref = props_goldstd_1)
-#' netdis_one_to_one(graphlet_counts_1= props_1,graphlet_counts_2= props_2, graphlet_counts_ref = props_goldstd_2)
+#' # Two lattice networks of different sizes are used for this example.
+#' goldstd_1 <- graph.lattice(c(8, 8)) # A reference net
+#' goldstd_2 <- graph.lattice(c(44, 44)) # A reference net
+#'
+#' netdis_one_to_one(graph_1 = graph_1, graph_2 = graph_2, ref_graph = goldstd_1)
+#' netdis_one_to_one(graph_1 = graph_1, graph_2 = graph_2, ref_graph = goldstd_2)
+#'
+#'
+#' # Providing pre-calculated subgraph counts.
+#'
+#' props_1 <- count_graphlets_ego(graph = graph_1)
+#' props_2 <- count_graphlets_ego(graph = graph_2)
+#' props_goldstd_1 <- count_graphlets_ego(graph = goldstd_1)
+#' props_goldstd_2 <- count_graphlets_ego(graph = goldstd_2)
+#'
+#' # Netdis Geometric-Poisson.
+#' netdis_one_to_one(graphlet_counts_1 = props_1, graphlet_counts_2 = props_2, ref_graph = NULL)
+#'
+#' # Netdis Zero Expectation.
+#' netdis_one_to_one(graphlet_counts_1 = props_1, graphlet_counts_2 = props_2, ref_graph = 0)
+#'
+#' # Netdis using gold-standard network
+#' netdis_one_to_one(graphlet_counts_1 = props_1, graphlet_counts_2 = props_2, graphlet_counts_ref = props_goldstd_1)
+#' netdis_one_to_one(graphlet_counts_1 = props_1, graphlet_counts_2 = props_2, graphlet_counts_ref = props_goldstd_2)
 #' @export
 netdis_one_to_one <- function(graph_1 = NULL,
                               graph_2 = NULL,
@@ -132,8 +132,8 @@ netdis_one_to_one <- function(graph_1 = NULL,
                               exp_counts_fn = NULL,
                               graphlet_counts_1 = NULL,
                               graphlet_counts_2 = NULL,
-                              graphlet_counts_ref= NULL) {
-  
+                              graphlet_counts_ref = NULL) {
+
   ## ------------------------------------------------------------------------
   # Check arguments
   if (is.null(graph_1) && is.null(graphlet_counts_1)) {
@@ -145,7 +145,7 @@ netdis_one_to_one <- function(graph_1 = NULL,
   ## ------------------------------------------------------------------------
   # Generate graphlet counts and bundle them into named list with format needed
   # for netdis_many_to_many.
-  
+
   if (is.null(graphlet_counts_1)) {
     graphlet_counts_1 <- count_graphlets_ego(
       graph_1,
@@ -157,7 +157,7 @@ netdis_one_to_one <- function(graph_1 = NULL,
     )
   }
   rm(graph_1)
-  
+
   if (is.null(graphlet_counts_2)) {
     graphlet_counts_2 <- count_graphlets_ego(
       graph_2,
@@ -169,13 +169,13 @@ netdis_one_to_one <- function(graph_1 = NULL,
     )
   }
   rm(graph_2)
-  
+
   graphlet_counts <- list(
     graph_1 = graphlet_counts_1,
     graph_2 = graphlet_counts_2
   )
-  
-  if(!is.null(ref_graph)){
+
+  if (!is.null(ref_graph)) {
     if (!is.numeric(ref_graph) && is.null(graphlet_counts_ref)) {
       graphlet_counts_ref <- count_graphlets_ego(
         ref_graph,
@@ -203,7 +203,7 @@ netdis_one_to_one <- function(graph_1 = NULL,
     graphlet_counts = graphlet_counts,
     graphlet_counts_ref = graphlet_counts_ref
   )
-  
+
   ## ------------------------------------------------------------------------
   # extract netdis statistics from list returned by netdis_many_to_many
   result$netdis[, 1]
@@ -226,7 +226,7 @@ netdis_one_to_one <- function(graph_1 = NULL,
 #' 3) NULL - Expected counts will be calculated based on the properties of the
 #' query graphs themselves.
 #'
-#' @param max_graphlet_size Generate graphlets up to this size. Currently only 4 and 5 are supported. 
+#' @param max_graphlet_size Generate graphlets up to this size. Currently only 4 and 5 are supported.
 #'
 #' @param neighbourhood_size Ego network neighbourhood size.
 #'
@@ -255,7 +255,7 @@ netdis_one_to_one <- function(graph_1 = NULL,
 #' If \code{exp_counts_fn} is \code{NULL}, (default), it will apply
 #' either the approach from the original Netdis paper, or the respective Geometric-Poisson approximation; depending on the
 #' values of \code{ref_graph} and \code{graphlet_counts_ref}.
-#' 
+#'
 #'
 #' @param graphlet_counts_1 Pre-generated graphlet counts for the first query
 #' graph. If the \code{graphlet_counts_1} argument is defined then
@@ -268,7 +268,7 @@ netdis_one_to_one <- function(graph_1 = NULL,
 #' @param graphlet_counts_ref Pre-generated reference graphlet counts. If the
 #' \code{graphlet_counts_ref} argument is defined then \code{ref_graph} will not
 #' be used.
-#' 
+#'
 #' @return Netdis statistics between graph_1 and graph_2 for graphlet sizes
 #' up to and including max_graphlet_size
 #' @export
@@ -284,7 +284,7 @@ netdis_one_to_many <- function(graph_1 = NULL,
                                exp_counts_fn = NULL,
                                graphlet_counts_1 = NULL,
                                graphlet_counts_compare = NULL,
-                               graphlet_counts_ref= NULL) {
+                               graphlet_counts_ref = NULL) {
   ## ------------------------------------------------------------------------
   # Check arguments
   if (is.null(graph_1) && is.null(graphlet_counts_1)) {
@@ -293,11 +293,11 @@ netdis_one_to_many <- function(graph_1 = NULL,
   if (is.null(graphs_compare) && is.null(graphlet_counts_compare)) {
     stop("One of graph_2 and graphlet_counts_2 must be supplied.")
   }
-  
+
   ## ------------------------------------------------------------------------
   # Generate graphlet counts and bundle them into named list with format needed
   # for netdis_many_to_many.
-  
+
   if (is.null(graphlet_counts_1)) {
     graphlet_counts_1 <- count_graphlets_ego(
       graph_1,
@@ -309,7 +309,7 @@ netdis_one_to_many <- function(graph_1 = NULL,
     )
   }
   rm(graph_1)
-  
+
   if (is.null(graphlet_counts_compare)) {
     graphlet_counts_compare <- purrr::map(
       graphs_compare,
@@ -322,13 +322,13 @@ netdis_one_to_many <- function(graph_1 = NULL,
     )
   }
   rm(graphs_compare)
-  
+
   graphlet_counts <- append(graphlet_counts_compare,
-                            list(graph_1 = graphlet_counts_1),
-                            after = 0
+    list(graph_1 = graphlet_counts_1),
+    after = 0
   )
-  
-  if(!is.null(ref_graph)){
+
+  if (!is.null(ref_graph)) {
     if (!is.numeric(ref_graph) && is.null(graphlet_counts_ref)) {
       graphlet_counts_ref <- count_graphlets_ego(
         ref_graph,
@@ -341,7 +341,7 @@ netdis_one_to_many <- function(graph_1 = NULL,
       ref_graph <- NULL
     }
   }
-  
+
   ## ------------------------------------------------------------------------
   # calculate netdis
   result <- netdis_many_to_many(
@@ -358,7 +358,7 @@ netdis_one_to_many <- function(graph_1 = NULL,
     graphlet_counts = graphlet_counts,
     graphlet_counts_ref = graphlet_counts_ref
   )
-  
+
   ## ------------------------------------------------------------------------
   # restructure netdis_many_to_many output
   colnames(result$netdis) <- result$comp_spec$name_b
@@ -384,7 +384,7 @@ netdis_one_to_many <- function(graph_1 = NULL,
 #' Can be "many-to-many" (all pairwise combinations) or "one-to-many"
 #' (compare first graph in graphs to all other graphs.)
 #'
-#' @param max_graphlet_size Generate graphlets up to this size. Currently only 4 (default) and 5 are supported. 
+#' @param max_graphlet_size Generate graphlets up to this size. Currently only 4 (default) and 5 are supported.
 #'
 #' @param neighbourhood_size Ego network neighbourhood size (default 2).
 #'
@@ -393,7 +393,7 @@ netdis_one_to_many <- function(graph_1 = NULL,
 #'
 #' @param min_ego_edges Filter ego networks which have fewer
 #' than min_ego_edges edges (default 1).
-#' 
+#'
 #' @param binning_fn Function used to bin ego network densities. Takes edge \code{densities}
 #' as its single argument, and returns a named list including, the input \code{densities}, the resulting bin \code{breaks} (vector of density bin limits), and the vector \code{interval_indexes} which states to what bin each of the individual elements in \code{densities} belongs to.
 #' ego network). If \code{NULL}, then the method \code{binned_densities_adaptive} with
@@ -423,15 +423,15 @@ netdis_one_to_many <- function(graph_1 = NULL,
 #' additional column labelled "N" including the node count for
 #' each ego network.
 #'
-#' @param graphlet_counts_ref Pre-generated reference graphlet counts (default: NULL). Matrix containing counts 
+#' @param graphlet_counts_ref Pre-generated reference graphlet counts (default: NULL). Matrix containing counts
 #' of each graphlet (columns) for each ego-network (rows) in the input graph. Columns are labelled with
 #' graphlet IDs and rows are labelled with the ID of the central node in each
 #' ego-network. As well as graphlet counts, each matrix  must contain an
 #' additional column labelled "N" including the node count for
-#' each ego network. 
+#' each ego network.
 #' If the \code{graphlet_counts_ref} argument is defined then \code{ref_graph} will not
 #' be used.
-#' 
+#'
 #' @return Netdis statistics between query graphs for graphlet sizes
 #' up to and including max_graphlet_size.
 #'
@@ -448,14 +448,14 @@ netdis_many_to_many <- function(graphs = NULL,
                                 exp_counts_fn = NULL,
                                 graphlet_counts = NULL,
                                 graphlet_counts_ref = NULL) {
-  
+
   ## ------------------------------------------------------------------------
   # Check arguments and set functions appropriately
   if (is.null(graphs) && is.null(graphlet_counts)) {
     stop("One of graphs and graphlet_counts must be supplied.")
   }
-  
-  
+
+
   # Set default binning_fn if none supplied
   if (is.null(binning_fn)) {
     binning_fn <- purrr::partial(
@@ -464,7 +464,7 @@ netdis_many_to_many <- function(graphs = NULL,
       num_intervals = 100
     )
   }
-    
+
   # If no ref_graph supplied, default to geometric poisson unless user-defined
   # functions have been provided.
   if (is.null(ref_graph) && is.null(graphlet_counts_ref)) {
@@ -476,9 +476,9 @@ netdis_many_to_many <- function(graphs = NULL,
         netdis_expected_counts,
         scale_fn = NULL
       )
-    }  
-  # If a ref_graph value supplied (including a constant), default to approach
-  # from original netdis paper, unless user-defined functions provided.
+    }
+    # If a ref_graph value supplied (including a constant), default to approach
+    # from original netdis paper, unless user-defined functions provided.
   } else {
     if (is.null(bin_counts_fn)) {
       bin_counts_fn <- purrr::partial(
@@ -492,9 +492,9 @@ netdis_many_to_many <- function(graphs = NULL,
         netdis_expected_counts,
         scale_fn = count_graphlet_tuples
       )
-    }  
+    }
   }
-  
+
   ## ------------------------------------------------------------------------
   # Generate ego networks and count graphlets for query graphs.
   # But if graphlet counts have already been provided we can skip this step.
@@ -510,7 +510,7 @@ netdis_many_to_many <- function(graphs = NULL,
     )
   }
   rm(graphs)
-  
+
   ## ------------------------------------------------------------------------
   # Centred counts
   # If there are no graphlet_counts_ref, and a number has been passed as ref_graph, treat it as a constant expected
@@ -526,12 +526,12 @@ netdis_many_to_many <- function(graphs = NULL,
       exp_counts_fn = NULL,
       max_graphlet_size = max_graphlet_size
     )
-    
+
     ## ------------------------------------------------------------------------
     # If there are no graphlet_counts_ref, and If a reference graph passed, use it to calculate expected counts for all
     # query graphs.
   } else if (!is.null(ref_graph) || !is.null(graphlet_counts_ref)) {
-    
+
     # Generate ego networks and calculate graphlet counts
     # But if graphlet_counts_ref provided can skip this step
     if (is.null(graphlet_counts_ref)) {
@@ -545,22 +545,22 @@ netdis_many_to_many <- function(graphs = NULL,
       )
     }
     rm(ref_graph)
-    
+
     # Get ego-network densities
     densities_ref <- ego_network_density(graphlet_counts_ref)
-    
+
     # bin ref ego-network densities
     binned_densities <- binning_fn(densities_ref)
-    
+
     ref_ego_density_bins <- binned_densities$breaks
-    
+
     # Average ref graphlet counts across density bins
     ref_binned_graphlet_counts <- bin_counts_fn(
       graphlet_counts_ref,
       binned_densities$interval_indexes,
       max_graphlet_size = max_graphlet_size
     )
-    
+
     # Calculate centred counts using ref graph
     centred_graphlet_counts <- purrr::map(
       graphlet_counts,
@@ -572,7 +572,7 @@ netdis_many_to_many <- function(graphs = NULL,
       exp_counts_fn = exp_counts_fn,
       max_graphlet_size = max_graphlet_size
     )
-    
+
     ## ------------------------------------------------------------------------
     # If no reference passed, calculate expected counts using query networks
     # themselves. Geometric-Poisson GP #This is the function that creates an error for a graph with three connected nodes.
@@ -589,17 +589,17 @@ netdis_many_to_many <- function(graphs = NULL,
     )
   }
   rm(graphlet_counts)
-  
+
   ## ------------------------------------------------------------------------
   # Sum centred graphlet counts across all ego networks
   sum_graphlet_counts <- lapply(centred_graphlet_counts, colSums)
-  
+
   rm(centred_graphlet_counts)
-  
+
   ## ------------------------------------------------------------------------
   # Generate pairwise comparisons
   comp_spec <- cross_comparison_spec(sum_graphlet_counts, how = comparisons)
-  
+
   ## ------------------------------------------------------------------------
   # Calculate netdis statistics
   results <- parallel::mcmapply(
@@ -614,7 +614,7 @@ netdis_many_to_many <- function(graphs = NULL,
     comp_spec$index_b,
     SIMPLIFY = TRUE
   )
-  
+
   list(netdis = results, comp_spec = comp_spec)
 }
 
@@ -638,14 +638,14 @@ netdis <- function(centred_graphlet_count_vector_1, centred_graphlet_count_vecto
   ids <- graphlet_ids_for_size(graphlet_size)
   counts1 <- centred_graphlet_count_vector_1[ids]
   counts2 <- centred_graphlet_count_vector_2[ids]
-  
+
   # Calculate normalising constant
   norm_const <- sum(counts1^2 / sqrt(counts1^2 + counts2^2), na.rm = TRUE) *
     sum(counts2^2 / sqrt(counts1^2 + counts2^2), na.rm = TRUE)
   # Calculate intermediate "netD" statistic that falls within range -1..1
   netds2 <- (1 / sqrt(norm_const)) *
     sum((counts1 * counts2) /
-          sqrt(counts1^2 + counts2^2), na.rm = TRUE)
+      sqrt(counts1^2 + counts2^2), na.rm = TRUE)
   # Calculate corresponding "netd" Netdis statistic that falls within range 0..1
   0.5 * (1 - netds2)
 }
@@ -659,7 +659,7 @@ netdis <- function(centred_graphlet_count_vector_1, centred_graphlet_count_vecto
 #' @param centred_graphlet_count_vector_2 Centred Graphlet Counts vector for graph 2
 #' @param max_graphlet_size max graphlet size to calculate Netdis for.
 #' The size of a graphlet is the number of nodes it contains. Netdis is
-#' calculated for all graphlets from size 3 to size max_graphlet_size. Currently only 4 and 5 are supported. 
+#' calculated for all graphlets from size 3 to size max_graphlet_size. Currently only 4 and 5 are supported.
 #' @return Netdis statistic calculated using centred counts for graphlets of
 #' the specified size
 #' @export
@@ -668,15 +668,15 @@ netdis_uptok <- function(centred_graphlet_count_vector_1, centred_graphlet_count
   if ((max_graphlet_size > 5) | (max_graphlet_size < 3)) {
     stop("max_graphlet_size must be 3, 4 or 5.")
   }
-  
+
   netdis_statistics <- purrr::map(3:max_graphlet_size,
-                                  netdis,
-                                  centred_graphlet_count_vector_1 = centred_graphlet_count_vector_1,
-                                  centred_graphlet_count_vector_2 = centred_graphlet_count_vector_2
+    netdis,
+    centred_graphlet_count_vector_1 = centred_graphlet_count_vector_1,
+    centred_graphlet_count_vector_2 = centred_graphlet_count_vector_2
   )
-  
+
   netdis_statistics <- simplify2array(netdis_statistics)
-  
+
   names(netdis_statistics) <-
     sapply(
       "netdis",
@@ -684,7 +684,7 @@ netdis_uptok <- function(centred_graphlet_count_vector_1, centred_graphlet_count
       3:max_graphlet_size,
       sep = ""
     )
-  
+
   netdis_statistics
 }
 
@@ -728,31 +728,30 @@ netdis_uptok <- function(centred_graphlet_count_vector_1, centred_graphlet_count
 #' @return graphlet_counts minus exp_graphlet_counts for graphlets up to size
 #' max_graphlet_size.
 #' @export
-netdis_centred_graphlet_counts <- function(
-  graphlet_counts,
-  ref_ego_density_bins,
-  ref_binned_graphlet_counts,
-  binning_fn,
-  bin_counts_fn,
-  exp_counts_fn,
-  max_graphlet_size) {
-  
+netdis_centred_graphlet_counts <- function(graphlet_counts,
+                                           ref_ego_density_bins,
+                                           ref_binned_graphlet_counts,
+                                           binning_fn,
+                                           bin_counts_fn,
+                                           exp_counts_fn,
+                                           max_graphlet_size) {
+
   ## ------------------------------------------------------------------------
   # If a number has been passed as ref_binned_graphlet_counts, treat it as a
   # constant expected counts value (e.g. if ref_binned_graphlet_counts = 0
   # then no centring of counts).
   if (is.numeric(ref_binned_graphlet_counts) &&
-      length(ref_binned_graphlet_counts) == 1) {
+    length(ref_binned_graphlet_counts) == 1) {
     exp_graphlet_counts <- netdis_const_expected_counts(
       graphlet_counts,
       const = ref_binned_graphlet_counts
     )
-    
+
     ## ------------------------------------------------------------------------
     # If reference bins and counts passed, use them to calculate
     # expected counts
   } else if (!is.null(ref_ego_density_bins) &&
-             !is.null(ref_binned_graphlet_counts)) {
+    !is.null(ref_binned_graphlet_counts)) {
     # Calculate expected graphlet counts (using ref
     # graph ego network density bins)
     exp_graphlet_counts <- exp_counts_fn(
@@ -761,29 +760,29 @@ netdis_centred_graphlet_counts <- function(
       ref_binned_graphlet_counts,
       max_graphlet_size = max_graphlet_size
     )
-    
+
     ## ------------------------------------------------------------------------
     # If NULL passed as ref bins and counts, calculate expected counts using
     # query network itself. This should be GP.
   } else if (is.null(ref_ego_density_bins) &&
-             is.null(ref_binned_graphlet_counts)) {
+    is.null(ref_binned_graphlet_counts)) {
     # Get ego-network densities
     densities <- ego_network_density(graphlet_counts)
-    
+
     # bin ref ego-network densities
-     binned_densities <- binning_fn(densities)
-    
+    binned_densities <- binning_fn(densities)
+
     # extract bin breaks and indexes from binning results
     ego_density_bin_breaks <- binned_densities$breaks
     ego_density_bin_indexes <- binned_densities$interval_indexes
-    
+
     # Calculate expected counts in each bin
     binned_graphlet_counts <- bin_counts_fn(
       graphlet_counts,
       ego_density_bin_indexes,
       max_graphlet_size = max_graphlet_size
     )
-    
+
     # Calculate expected graphlet counts for each ego network
     exp_graphlet_counts <- exp_counts_fn(
       graphlet_counts,
@@ -791,7 +790,7 @@ netdis_centred_graphlet_counts <- function(
       binned_graphlet_counts,
       max_graphlet_size = max_graphlet_size
     )
-    
+
     ## ------------------------------------------------------------------------
     # Invalid combination of ref_ego_density_bins and ref_binned_graphlet_counts
   } else {
@@ -803,7 +802,7 @@ netdis_centred_graphlet_counts <- function(
          - Constant numeric ref_binned_graphlet_counts: Use as constant expected
          counts value.")
   }
-  
+
   ## ------------------------------------------------------------------------
   # Subtract expected counts from actual graphlet counts
   netdis_subtract_exp_counts(
@@ -824,16 +823,15 @@ netdis_centred_graphlet_counts <- function(
 #' nummber of ego networks (rows).
 #' @param max_graphlet_size Do the subtraction for graphlets up to this size. Currently only size 4 and 5 are supported.
 #' @export
-netdis_subtract_exp_counts <- function(
-  graphlet_counts,
-  exp_graphlet_counts,
-  max_graphlet_size) {
-  
+netdis_subtract_exp_counts <- function(graphlet_counts,
+                                       exp_graphlet_counts,
+                                       max_graphlet_size) {
+
   # select columns for graphlets up to size max_graphlet_size
   id <- graphlet_key(max_graphlet_size)$id
   graphlet_counts <- graphlet_counts[, id]
   exp_graphlet_counts <- exp_graphlet_counts[, id]
-  
+
   # Subtract expected counts from actual graphlet counts
   graphlet_counts - exp_graphlet_counts
 }
@@ -856,14 +854,13 @@ netdis_subtract_exp_counts <- function(
 #' \code{density_binned_reference_counts} values will be multiplied by.
 #'
 #' @export
-netdis_expected_counts <- function(
-  graphlet_counts,
-  density_breaks,
-  density_binned_reference_counts,
-  max_graphlet_size,
-  scale_fn = NULL) {
-  
-  
+netdis_expected_counts <- function(graphlet_counts,
+                                   density_breaks,
+                                   density_binned_reference_counts,
+                                   max_graphlet_size,
+                                   scale_fn = NULL) {
+
+
   # Map over query graph ego-networks, using reference graph statistics to
   # calculate expected graphlet counts for each ego-network.
   expected_graphlet_counts <- t(apply(
@@ -873,7 +870,7 @@ netdis_expected_counts <- function(
     density_binned_reference_counts = density_binned_reference_counts,
     scale_fn = scale_fn
   ))
-  
+
   expected_graphlet_counts
 }
 
@@ -899,15 +896,15 @@ netdis_expected_counts_ego <- function(graphlet_counts,
                                        density_breaks,
                                        density_binned_reference_counts,
                                        scale_fn = NULL) {
-  
+
   # Look up average scaled graphlet counts for graphs of similar density
   # in the reference graph
   query_density <- density_from_counts(graphlet_counts)
   matched_density_index <- interval_index(query_density, density_breaks)
-  
+
   matched_reference_counts <-
     density_binned_reference_counts[matched_density_index, ]
-  
+
   if (!is.null(scale_fn)) {
     # Scale reference counts e.g. by multiplying the
     # reference count for each graphlet by the number
@@ -943,17 +940,17 @@ mean_density_binned_graphlet_counts <- function(graphlet_counts,
     apply(graphlet_counts, MARGIN = 2, function(gc) {
       tapply(gc, INDEX = density_interval_indexes, FUN = agg_fn)
     })
-  
+
   # if only 1 bin (i.e. no binning) will be left with a 1D list.
   # convert it into a 2D list.
   if (is.null(dim(mean_density_binned_graphlet_counts))) {
     dim(mean_density_binned_graphlet_counts) <-
       c(1, length(mean_density_binned_graphlet_counts))
-    
+
     colnames(mean_density_binned_graphlet_counts) <-
       colnames(graphlet_counts)
   }
-  
+
   mean_density_binned_graphlet_counts
 }
 
@@ -996,13 +993,13 @@ density_binned_counts <- function(graphlet_counts,
     # by dividing by total number of k-tuples in
     # ego-network (where k is graphlet size)
     graphlet_counts <- scale_fn(graphlet_counts,
-                                max_graphlet_size = max_graphlet_size
+      max_graphlet_size = max_graphlet_size
     )
   }
-  
+
   mean_density_binned_graphlet_counts(graphlet_counts,
-                                      density_interval_indexes,
-                                      agg_fn = agg_fn
+    density_interval_indexes,
+    agg_fn = agg_fn
   )
 }
 
@@ -1021,46 +1018,45 @@ exp_counts_bin_gp <- function(bin_idx, graphlet_counts,
                               max_graphlet_size) {
   # extract ego networks belonging to input density bin index
   counts <- graphlet_counts[density_interval_indexes == bin_idx, ]
-  
+
   # mean graphlet counts in this density bin
   means <- colMeans(counts)
-  
+
   # subtract mean graphlet counts from actual graphlet counts
   mean_sub_counts <- sweep(counts, 2, means)
-  
+
   # variance in graphlet counts across ego networks in this density bin
   Vd_sq <- colSums(mean_sub_counts^2) / (nrow(mean_sub_counts) - 1)
-  
+
   # Dealing with zero variance HERE
   ind_zerovar <- (Vd_sq < .00000001)
-  if(sum(ind_zerovar) > 0) Vd_sq[ind_zerovar] <- 0.1
-  
+  if (sum(ind_zerovar) > 0) Vd_sq[ind_zerovar] <- 0.1
+
   # GP theta parameter for each graphlet id in this density bin
   theta_d <- 2 * means / (Vd_sq + means)
-  
+
   exp_counts_dk <- vector()
   for (k in 2:max_graphlet_size) {
     graphlet_idx <- graphlet_ids_for_size(k)
-    
+
     # GP lambda parameter for graphlet size k in this density bin
     lambda_dk <- mean(2 * means[graphlet_idx]^2 /
-                        (Vd_sq[graphlet_idx] + means[graphlet_idx]),
-                      na.rm = TRUE
+      (Vd_sq[graphlet_idx] + means[graphlet_idx]),
+    na.rm = TRUE
     )
-    
+
     # Expected counts for graphlet size k in this density bin
     exp_counts_dk <- append(
       exp_counts_dk,
       lambda_dk / theta_d[graphlet_idx]
     )
-    
   }
-  
+
   #  Dealing with divisions by zero.
   ind <- is.na(exp_counts_dk)
   ind <- ind | is.infinite(exp_counts_dk)
-  if(sum(ind) > 0) exp_counts_dk[ind & ind_zerovar[-1]] <- 0
-  
+  if (sum(ind) > 0) exp_counts_dk[ind & ind_zerovar[-1]] <- 0
+
 
   exp_counts_dk
 }
@@ -1076,7 +1072,7 @@ exp_counts_bin_gp <- function(bin_idx, graphlet_counts,
 density_binned_counts_gp <- function(graphlet_counts,
                                      density_interval_indexes,
                                      max_graphlet_size) {
-  
+
   # calculate expected counts for each density bin index
   nbins <- length(unique(density_interval_indexes))
   expected_counts_bin <- t(sapply(
@@ -1086,10 +1082,10 @@ density_binned_counts_gp <- function(graphlet_counts,
     density_interval_indexes = density_interval_indexes,
     max_graphlet_size = max_graphlet_size
   ))
-  
+
   # remove NAs caused by bins with zero counts for a graphlet
   expected_counts_bin[is.nan(expected_counts_bin)] <- 0
-  
+
   expected_counts_bin
 }
 
@@ -1137,10 +1133,10 @@ scale_graphlet_count <- function(graphlet_count, graphlet_tuples) {
 count_graphlet_tuples_ego <- function(graphlet_counts, max_graphlet_size) {
   graphlet_tuple_counts <-
     t(apply(graphlet_counts, 1,
-            count_graphlet_tuples,
-            max_graphlet_size = max_graphlet_size
+      count_graphlet_tuples,
+      max_graphlet_size = max_graphlet_size
     ))
-  
+
   graphlet_tuple_counts
 }
 
@@ -1177,12 +1173,12 @@ scale_graphlet_counts_ego <- function(graphlet_counts,
     graphlet_counts,
     max_graphlet_size = max_graphlet_size
   )
-  
+
   scaled_graphlet_counts <- scale_graphlet_count(
     graphlet_counts,
     ego_graphlet_tuples
   )
-  
+
   return(scaled_graphlet_counts)
 }
 
@@ -1197,12 +1193,12 @@ scale_graphlet_counts_ego <- function(graphlet_counts,
 count_graphlet_tuples <- function(graph_graphlet_counts, max_graphlet_size) {
   # extract node counts from graph_graphlet_counts
   N <- graph_graphlet_counts["N"]
-  
+
   graphlet_key <- graphlet_key(max_graphlet_size)
   graphlet_node_counts <- graphlet_key$node_count
-  
+
   graphlet_tuple_counts <- choose(N, graphlet_node_counts)
-  
+
   graphlet_tuple_counts <- stats::setNames(
     graphlet_tuple_counts,
     graphlet_key$id
diff --git a/R/measures_net_emd.R b/R/measures_net_emd.R
index 8246dfeb..ddcd1592 100755
--- a/R/measures_net_emd.R
+++ b/R/measures_net_emd.R
@@ -1,6 +1,6 @@
 #' NetEMD Network Earth Mover's Distance between a pair of networks.
 #'
-#' Calculates the network Earth Mover's Distance (EMD) between 
+#' Calculates the network Earth Mover's Distance (EMD) between
 #' two sets of network features. This is done by individually normalising the distribution
 #' of each feature so that they have unit mass and unit variance. Then the minimun EMD between the same pair of features (one for each corresponding graph) is calculated by considering all possible translations of the feature distributions. Finally the average over all features is reported.
 #' This is calculated as follows:
@@ -39,42 +39,42 @@
 #' the minimal EMD for each pair of histograms, \code{min_offsets}: the associated
 #' offsets giving the minimal EMD for each pair of histograms
 #' @examples
-#'  require(igraph)
-#'  graph_1 <- graph.lattice(c(8,8)) 
-#'  graph_2 <- graph.lattice(c(44,44)) 
-#'  netemd_one_to_one(graph_1=graph_1,graph_2=graph_2,feature_type="orbit",max_graphlet_size=5)
-#'  
-#'  #Providing a matrix of network features
-#'  props_a= count_orbits_per_node(graph = graph_1,max_graphlet_size = 5)
-#'  props_b= count_orbits_per_node(graph = graph_2,max_graphlet_size = 5)
-#'  
-#'  netemd_one_to_one(dhists_1=props_a, dhists_2=props_b,smoothing_window_width = 1)
-#'  
-#'  #Providing the network features as lists of dhist objects
-#'  dhists_1<- graph_features_to_histograms(props_a)
-#'  dhists_2<- graph_features_to_histograms(props_b)
-#'  
-#'  netemd_one_to_one(dhists_1=dhists_1, dhists_2=dhists_2)
-#'  
-#'  
-#'  # A variation of NetEmd: Using the Laplacian spectrum 
-#'  #Laplacian
-#'  Lapg_1 <- igraph::laplacian_matrix(graph = graph_1,normalized = FALSE,sparse = FALSE)
-#'  Lapg_2 <- igraph::laplacian_matrix(graph = graph_2,normalized = FALSE,sparse = FALSE)
-#'  
-#'  #Normalized Laplacian
-#'  NLapg_1 <- igraph::laplacian_matrix(graph = graph_1,normalized = TRUE,sparse = FALSE)
-#'  NLapg_2 <- igraph::laplacian_matrix(graph = graph_2,normalized = TRUE,sparse = FALSE)
-#'  
-#'  #Spectra (This may take a couple of minutes).
-#'  props_1 <- cbind(L.Spectra= eigen(Lapg_1)$values, NL.Spectra= eigen(NLapg_1)$values) 
-#'  props_2 <- cbind(L.Spectra= eigen(Lapg_2)$values, NL.Spectra= eigen(NLapg_2)$values) 
-#'  
-#'  netemd_one_to_one(dhists_1 = props_1,dhists_2 = props_2,smoothing_window_width = 0)#Use of smoothing window 1 is given for discrete integer distributions. If the network features are considered continuous variables smoothing_window_width equal to zero is recommended.
-#'  
+#' require(igraph)
+#' graph_1 <- graph.lattice(c(8, 8))
+#' graph_2 <- graph.lattice(c(44, 44))
+#' netemd_one_to_one(graph_1 = graph_1, graph_2 = graph_2, feature_type = "orbit", max_graphlet_size = 5)
+#'
+#' # Providing a matrix of network features
+#' props_a <- count_orbits_per_node(graph = graph_1, max_graphlet_size = 5)
+#' props_b <- count_orbits_per_node(graph = graph_2, max_graphlet_size = 5)
+#'
+#' netemd_one_to_one(dhists_1 = props_a, dhists_2 = props_b, smoothing_window_width = 1)
+#'
+#' # Providing the network features as lists of dhist objects
+#' dhists_1 <- graph_features_to_histograms(props_a)
+#' dhists_2 <- graph_features_to_histograms(props_b)
+#'
+#' netemd_one_to_one(dhists_1 = dhists_1, dhists_2 = dhists_2)
+#'
+#'
+#' # A variation of NetEmd: Using the Laplacian spectrum
+#' # Laplacian
+#' Lapg_1 <- igraph::laplacian_matrix(graph = graph_1, normalized = FALSE, sparse = FALSE)
+#' Lapg_2 <- igraph::laplacian_matrix(graph = graph_2, normalized = FALSE, sparse = FALSE)
+#'
+#' # Normalized Laplacian
+#' NLapg_1 <- igraph::laplacian_matrix(graph = graph_1, normalized = TRUE, sparse = FALSE)
+#' NLapg_2 <- igraph::laplacian_matrix(graph = graph_2, normalized = TRUE, sparse = FALSE)
+#'
+#' # Spectra (This may take a couple of minutes).
+#' props_1 <- cbind(L.Spectra = eigen(Lapg_1)$values, NL.Spectra = eigen(NLapg_1)$values)
+#' props_2 <- cbind(L.Spectra = eigen(Lapg_2)$values, NL.Spectra = eigen(NLapg_2)$values)
+#'
+#' netemd_one_to_one(dhists_1 = props_1, dhists_2 = props_2, smoothing_window_width = 0) # Use of smoothing window 1 is given for discrete integer distributions. If the network features are considered continuous variables smoothing_window_width equal to zero is recommended.
+#'
 #' @export
-netemd_one_to_one <- function(graph_1=NULL,graph_2=NULL,dhists_1=NULL, dhists_2=NULL, method = "optimise",
-                              return_details = FALSE, smoothing_window_width = 0,feature_type="orbit",max_graphlet_size = 5,ego_neighbourhood_size = 0) {
+netemd_one_to_one <- function(graph_1 = NULL, graph_2 = NULL, dhists_1 = NULL, dhists_2 = NULL, method = "optimise",
+                              return_details = FALSE, smoothing_window_width = 0, feature_type = "orbit", max_graphlet_size = 5, ego_neighbourhood_size = 0) {
   ## ------------------------------------------------------------------------
   # Check arguments 1
   if (!igraph::is.igraph(graph_1) & is.null(dhists_1)) {
@@ -86,44 +86,50 @@ netemd_one_to_one <- function(graph_1=NULL,graph_2=NULL,dhists_1=NULL, dhists_2=
   ## ------------------------------------------------------------------------
   # Check arguments 2
   # If dhists_1 is a matrix of network features then transform them to dhist objects.
-  if(is.matrix(dhists_1)){
+  if (is.matrix(dhists_1)) {
     dhists_1 <- graph_features_to_histograms(dhists_1)
   }
-  if(is.matrix(dhists_2)){
+  if (is.matrix(dhists_2)) {
     dhists_2 <- graph_features_to_histograms(dhists_2)
   }
   ## ------------------------------------------------------------------------
   # Check arguments 3
-  #If input is graph then get graphlet counts
-  if(igraph::is.igraph(graph_1)){
-    if(!is.null(dhists_1)){warning("dhists_1 will be calculated from graph_1.")}
-    dhists_1 <- gdd(graph = graph_1, feature_type = feature_type,
-                    max_graphlet_size = max_graphlet_size,
-                    ego_neighbourhood_size = ego_neighbourhood_size
+  # If input is graph then get graphlet counts
+  if (igraph::is.igraph(graph_1)) {
+    if (!is.null(dhists_1)) {
+      warning("dhists_1 will be calculated from graph_1.")
+    }
+    dhists_1 <- gdd(
+      graph = graph_1, feature_type = feature_type,
+      max_graphlet_size = max_graphlet_size,
+      ego_neighbourhood_size = ego_neighbourhood_size
     )
   }
-  if(igraph::is.igraph(graph_2)){
-    if(!is.null(dhists_2)){warning("dhists_2 will be calculated from graph_2.")}
-    dhists_2 <- gdd(graph = graph_2, feature_type = feature_type,
-                    max_graphlet_size = max_graphlet_size,
-                    ego_neighbourhood_size = ego_neighbourhood_size
+  if (igraph::is.igraph(graph_2)) {
+    if (!is.null(dhists_2)) {
+      warning("dhists_2 will be calculated from graph_2.")
+    }
+    dhists_2 <- gdd(
+      graph = graph_2, feature_type = feature_type,
+      max_graphlet_size = max_graphlet_size,
+      ego_neighbourhood_size = ego_neighbourhood_size
     )
   }
-  
-  rm(graph_1,graph_2)
+
+  rm(graph_1, graph_2)
   ## ------------------------------------------------------------------------
   # Require either a pair of "dhist" discrete histograms or two lists of "dhist"
   # discrete histograms
   pair_of_dhist_lists <- all(purrr::map_lgl(dhists_1, is_dhist)) && all(purrr::map_lgl(dhists_2, is_dhist))
-  
+
   # If input is two lists of "dhist" discrete histograms, determine the minimum
   # EMD and associated offset for pairs of histograms taken from the same
   # position in each list
   if (pair_of_dhist_lists) {
     details <- purrr::map2(dhists_1, dhists_2, function(dhist1, dhist2) {
       netemd_single_pair(dhist1, dhist2,
-                         method = method,
-                         smoothing_window_width = smoothing_window_width
+        method = method,
+        smoothing_window_width = smoothing_window_width
       )
     })
     # Collect the minimum EMDs and associated offsets for all histogram pairs
@@ -143,14 +149,14 @@ netemd_one_to_one <- function(graph_1=NULL,graph_2=NULL,dhists_1=NULL, dhists_2=
     } else {
       return(arithmetic_mean)
     }
-  }
-  else {
+  } else {
     # Wrap each member of a single pair of histograms is a list and recursively
     # call this net_emd function. This ensures they are treated the same.
-    return(netemd_one_to_one(dhists_1 = list(dhists_1), dhists_2 = list(dhists_2),
-                             method = method,
-                             return_details = return_details,
-                             smoothing_window_width = smoothing_window_width
+    return(netemd_one_to_one(
+      dhists_1 = list(dhists_1), dhists_2 = list(dhists_2),
+      method = method,
+      return_details = return_details,
+      smoothing_window_width = smoothing_window_width
     ))
   }
 }
@@ -195,10 +201,10 @@ netemd_one_to_one <- function(graph_1=NULL,graph_2=NULL,dhists_1=NULL, dhists_2=
 #' used to compute the NetEMD, \code{min_offsets}: the associated offsets giving
 #' the minimal EMD for each GDD
 #' @export
-netemd_many_to_many<- function(graphs=NULL,dhists=NULL, method = "optimise", smoothing_window_width = 0,
-                               return_details = FALSE, mc.cores = getOption("mc.cores", 2L),feature_type="orbit",max_graphlet_size = 5,ego_neighbourhood_size = 0) {
-  if(max_graphlet_size > 4 & mc.cores > 1) print(paste("This function will compute orbits of graphlets up to size 5 using ", mc.cores," cores. Depending on the density and size of the graphs, this may lead to a large compsumption of RAM."))
-  
+netemd_many_to_many <- function(graphs = NULL, dhists = NULL, method = "optimise", smoothing_window_width = 0,
+                                return_details = FALSE, mc.cores = getOption("mc.cores", 2L), feature_type = "orbit", max_graphlet_size = 5, ego_neighbourhood_size = 0) {
+  if (max_graphlet_size > 4 & mc.cores > 1) print(paste("This function will compute orbits of graphlets up to size 5 using ", mc.cores, " cores. Depending on the density and size of the graphs, this may lead to a large compsumption of RAM."))
+
   # NOTE: mcapply only works on unix-like systems with system level forking
   # capability. This means it will work on Linux and OSX, but not Windows.
   # For now, we just revert to single threaded operation on Windows
@@ -211,59 +217,63 @@ netemd_many_to_many<- function(graphs=NULL,dhists=NULL, method = "optimise", smo
   ## ------------------------------------------------------------------------
   # Check arguments 1
   which_imput_type <- NULL
-  if(!is.null(graphs) & is.null(dhists)){
-    if ( !all(( unlist(sapply(X = graphs, FUN = igraph::is.igraph)) ) )  ) {
+  if (!is.null(graphs) & is.null(dhists)) {
+    if (!all((unlist(sapply(X = graphs, FUN = igraph::is.igraph))))) {
       stop("Graphs need to be igraph graph objects, or a list of dhists network features should be supplied.")
     }
     which_imput_type <- "Graphs"
   }
-  if (!is.null(dhists) ) {
-    if (all(( unlist(sapply(X = dhists, FUN = is.matrix)) ) )  ) {
+  if (!is.null(dhists)) {
+    if (all((unlist(sapply(X = dhists, FUN = is.matrix))))) {
       which_imput_type <- "Matrix"
-    } 
-    if ( all(( unlist(sapply(X = dhists, FUN = 
-                             function(l){ all(( unlist(sapply(X = l, FUN = is_dhist)) ) ) }
-    )) ) )  ) {
+    }
+    if (all((unlist(sapply(
+      X = dhists, FUN =
+        function(l) {
+          all((unlist(sapply(X = l, FUN = is_dhist))))
+        }
+    ))))) {
       which_imput_type <- "dhist"
     }
-    if(is.null(which_imput_type)){
+    if (is.null(which_imput_type)) {
       warning("dhists does not conform to a Matrix or dhist class for all elmenents/netwroks in the list.")
     }
   }
   ## ------------------------------------------------------------------------
   # Check arguments 2
   # If dhists is a list of matrices of network features then transform them to dhist objects.
-  if(which_imput_type == "Matrix"){
-    dhists <- sapply(X = dhists,FUN = graph_features_to_histograms, simplify = FALSE )
+  if (which_imput_type == "Matrix") {
+    dhists <- sapply(X = dhists, FUN = graph_features_to_histograms, simplify = FALSE)
   }
   ## ------------------------------------------------------------------------
   # Check arguments 3
-  #If input is graph then get graphlet counts
-  if(which_imput_type == "Graphs"){
+  # If input is graph then get graphlet counts
+  if (which_imput_type == "Graphs") {
     dhists <- parallel::mcmapply(gdd, graphs,
-                                 MoreArgs =
-                                   list(
-                                     feature_type = feature_type,
-                                     max_graphlet_size = max_graphlet_size,
-                                     ego_neighbourhood_size = ego_neighbourhood_size
-                                   ),
-                                 SIMPLIFY = FALSE, mc.cores = mc.cores
+      MoreArgs =
+        list(
+          feature_type = feature_type,
+          max_graphlet_size = max_graphlet_size,
+          ego_neighbourhood_size = ego_neighbourhood_size
+        ),
+      SIMPLIFY = FALSE, mc.cores = mc.cores
     )
   }
   rm(graphs)
   ## ------------------------------------------------------------------------
   # Check arguments 4
-  #cross_comparison_spec was coded to require names!
-  if(is.null(names(dhists))){
-    names(dhists) <- paste("Net",1:length(dhists),sep = "")
+  # cross_comparison_spec was coded to require names!
+  if (is.null(names(dhists))) {
+    names(dhists) <- paste("Net", 1:length(dhists), sep = "")
   }
   ## ------------------------------------------------------------------------
   comp_spec <- cross_comparison_spec(dhists)
   num_features <- length(dhists[[1]])
   out <- purrr::simplify(parallel::mcmapply(function(index_a, index_b) {
-    netemd_one_to_one(dhists_1 =  dhists[[index_a]], dhists_2 =  dhists[[index_b]],
-                      method = method, return_details = return_details,
-                      smoothing_window_width = smoothing_window_width
+    netemd_one_to_one(
+      dhists_1 = dhists[[index_a]], dhists_2 = dhists[[index_b]],
+      method = method, return_details = return_details,
+      smoothing_window_width = smoothing_window_width
     )
   }, comp_spec$index_a, comp_spec$index_b, SIMPLIFY = FALSE, mc.cores = mc.cores))
   if (return_details) {
@@ -306,16 +316,16 @@ netemd_many_to_many<- function(graphs=NULL,dhists=NULL, method = "optimise", smo
 #' @return A list with the following named elements
 #' \code{net_emd}: the NetEMD for the set of histogram pairs, \code{min_offsets}: the associated
 #' offsets giving the minimal EMD for each pair of histograms and \code{min_offset_std}: Offset used in the standardised histograms.
-#' @examples 
-#'  require(igraph)
-#'  goldstd_1 <- graph.lattice(c(8,8)) 
-#'  goldstd_2 <- graph.lattice(c(44,44)) 
-#'  props_1 <- count_orbits_per_node(graph = goldstd_1,max_graphlet_size = 5)
-#'  props_2 <- count_orbits_per_node(graph = goldstd_2,max_graphlet_size = 5)
-#'  dhists_1<- graph_features_to_histograms(props_1)
-#'  dhists_2<- graph_features_to_histograms(props_2)
-#'  # Obtain the minimum NetEMD_edges between the histograms 
-#'  netemd_single_pair(dhists_1[[1]],dhists_2[[1]],method = "optimise",smoothing_window_width = 0)
+#' @examples
+#' require(igraph)
+#' goldstd_1 <- graph.lattice(c(8, 8))
+#' goldstd_2 <- graph.lattice(c(44, 44))
+#' props_1 <- count_orbits_per_node(graph = goldstd_1, max_graphlet_size = 5)
+#' props_2 <- count_orbits_per_node(graph = goldstd_2, max_graphlet_size = 5)
+#' dhists_1 <- graph_features_to_histograms(props_1)
+#' dhists_2 <- graph_features_to_histograms(props_2)
+#' # Obtain the minimum NetEMD_edges between the histograms
+#' netemd_single_pair(dhists_1[[1]], dhists_2[[1]], method = "optimise", smoothing_window_width = 0)
 #' @export
 netemd_single_pair <- function(dhist1, dhist2, method = "optimise",
                                smoothing_window_width = 0) {
@@ -335,14 +345,14 @@ netemd_single_pair <- function(dhist1, dhist2, method = "optimise",
     dhist1 <- as_smoothed_dhist(dhist1, smoothing_window_width)
     dhist2 <- as_smoothed_dhist(dhist2, smoothing_window_width)
   }
-  
+
   # Store means and variances to calculate offset later
   mean1 <- dhist_mean_location(dhist1)
   mean2 <- dhist_mean_location(dhist2)
-  
+
   var1 <- dhist_variance(dhist1)
   var2 <- dhist_variance(dhist2)
-  
+
   # Mean centre histograms. This helps with numerical stability as, after
   # variance normalisation, the differences between locations are often small.
   # We want to avoid calculating small differences between large numbers as
@@ -351,11 +361,11 @@ netemd_single_pair <- function(dhist1, dhist2, method = "optimise",
   # clustered around zero, rather than some potentially large mean location.
   dhist1 <- mean_centre_dhist(dhist1)
   dhist2 <- mean_centre_dhist(dhist2)
-  
+
   # Normalise histogram to unit mass and unit variance
   dhist1_norm <- normalise_dhist_variance(normalise_dhist_mass(dhist1))
   dhist2_norm <- normalise_dhist_variance(normalise_dhist_mass(dhist2))
-  
+
   # Calculate minimal EMD
   result <- min_emd(dhist1_norm, dhist2_norm, method = method)
   # As we mean-centred the histograms prior to passing to min_emd(), the offset
diff --git a/R/orca_interface.R b/R/orca_interface.R
index 130595a9..51614203 100644
--- a/R/orca_interface.R
+++ b/R/orca_interface.R
@@ -67,10 +67,10 @@ indexed_edges_to_graph <- function(indexed_edges) {
 #' previous alterations have been made
 #' @return A named list of simplified igraph graph object, with the name of each
 #' graph set to the name of the file it was read from.
-#' @examples 
+#' @examples
 #' # Set source directory for Virus protein-protein interaction edge files stored in the netdist package.
 #' source_dir <- system.file(file.path("extdata", "VRPINS"), package = "netdist")
-#' print(source_dir) 
+#' print(source_dir)
 #' # Load query graphs as igraph objects
 #' graph_1 <- read_simple_graph(file.path(source_dir, "EBV.txt"), format = "ncol")
 #' graph_1
@@ -256,8 +256,7 @@ gdd <- function(graph, feature_type = "orbit", max_graphlet_size = 4,
     out <- count_graphlets_per_node(graph,
       max_graphlet_size = max_graphlet_size
     )
-  }
-  else {
+  } else {
     stop("gdd: unrecognised feature_type")
   }
   graph_features_to_histograms(out)
diff --git a/tests/testthat/test_emd.R b/tests/testthat/test_emd.R
index 960a00c0..9db131ee 100644
--- a/tests/testthat/test_emd.R
+++ b/tests/testthat/test_emd.R
@@ -10,7 +10,7 @@ test_that("cost_matrix returns all zeros when all bin locations are identical",
   expect_equal(cost_matrix(bin_centres1, bin_centres2), expected)
 })
 
-test_that("cost_matrix returns zeros along diagonal when both sets of bin 
+test_that("cost_matrix returns zeros along diagonal when both sets of bin
           locations are the same", {
   bin_centres1 <- c(-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5)
   bin_centres2 <- bin_centres1
@@ -18,8 +18,8 @@ test_that("cost_matrix returns zeros along diagonal when both sets of bin
   expect_equal(diag(cost_matrix(bin_centres1, bin_centres2)), expected)
 })
 
-test_that("cost_matrix returns zeros along diagonal and taxicab distance from 
-          all zeros for all other elements when both sets of bin locations are  
+test_that("cost_matrix returns zeros along diagonal and taxicab distance from
+          all zeros for all other elements when both sets of bin locations are
           the same and are a sequence of consecutive integers", {
   bin_centres1 <- c(-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5)
   bin_centres2 <- bin_centres1
@@ -28,7 +28,7 @@ test_that("cost_matrix returns zeros along diagonal and taxicab distance from
   expect_equal(cost_matrix(bin_centres1, bin_centres2), expected)
 })
 
-test_that("cost_matrix is correct size when the two histograms are of different 
+test_that("cost_matrix is correct size when the two histograms are of different
           lengths", {
   bin_centres1 <- c(1, 2, 3, 4, 5, 6, 7)
   bin_centres2 <- c(8, 9, 10)
@@ -41,7 +41,7 @@ test_that("cost_matrix is correct size when the two histograms are of different
 
 context("EMD: EMD")
 # EMD: Property-based tests
-test_that("EMD methods return 0 when comparing a 1D feature distribution to 
+test_that("EMD methods return 0 when comparing a 1D feature distribution to
           itself", {
   bin_masses1 <- c(0, 1, 2, 3, 4, 5, 4, 3, 2, 1, 0)
   bin_masses2 <- bin_masses1
@@ -59,7 +59,7 @@ test_that("EMD methods return 0 when comparing a 1D feature distribution to
   expect_equal(emd(histogram1, histogram2), expected)
 })
 
-test_that("EMD methods return numBins/2 when offsetting a symmetric discrete 
+test_that("EMD methods return numBins/2 when offsetting a symmetric discrete
           triangle distribution by 1", {
   cost_fn <- function(triangle_width) {
     move_dist <- ceiling((triangle_width + 1) / 2)
@@ -191,7 +191,7 @@ test_that("EMD methods return numBins/2 when offsetting a symmetric discrete
   expect_equal(emd(histogram1, histogram2), expected)
 })
 
-test_that("EMD methods return same result for densely and sparsely specified 
+test_that("EMD methods return same result for densely and sparsely specified
           bins", {
   sparse_bin_masses1 <- c(1, 1, 1, 1, 1, 1)
   sparse_bin_masses2 <- c(1, 1, 1, 1, 1, 1)
@@ -242,7 +242,7 @@ test_that("EMD methods return same result for densely and sparsely specified
   )
 })
 
-test_that("EMD methods return same result when order of densely specified bins 
+test_that("EMD methods return same result when order of densely specified bins
           is changed", {
   bin_masses1 <- c(1, 1, 1, 1, 0, 0, 0, 0, 0)
   bin_masses2 <- c(0, 0, 0, 0, 0, 1, 1, 1, 1)
@@ -284,7 +284,7 @@ test_that("EMD methods return same result when order of densely specified bins
   )
 })
 
-test_that("EMD methods return same result when order of sparsely specified bins 
+test_that("EMD methods return same result when order of sparsely specified bins
           is changed", {
   bin_masses1 <- c(1, 1, 1, 1, 1, 1)
   bin_masses2 <- c(1, 1, 1, 1, 1, 1)
@@ -392,7 +392,7 @@ test_that("min_emd_<method> methods correctly compare a non-offset 1D feature
   expect_equal(actual_optimise, expected)
 })
 
-test_that("min_emd_<method> methods correctly compare an offset 1D feature 
+test_that("min_emd_<method> methods correctly compare an offset 1D feature
           distribution to itself", {
   offset <- 10
   bin_masses1 <- c(0, 1, 2, 3, 4, 5, 4, 3, 2, 1, 0)
diff --git a/tests/testthat/test_measures_net_emd.R b/tests/testthat/test_measures_net_emd.R
index 6a99c71b..d7aeb68c 100644
--- a/tests/testthat/test_measures_net_emd.R
+++ b/tests/testthat/test_measures_net_emd.R
@@ -22,8 +22,9 @@ expect_equal(self_net_emd(histogram, shift = 0, "optimise"), expected)
 expect_equal(self_net_emd(histogram, shift = 0, "exhaustive"), expected)
 
 expect_self_netemd_correct <- function(histogram, shift, method,
-                                        return_details = FALSE) {
-  self_net_emd <- netemd_one_to_one(dhists_1 = histogram, dhists_2 = shift_dhist(histogram, shift),
+                                       return_details = FALSE) {
+  self_net_emd <- netemd_one_to_one(
+    dhists_1 = histogram, dhists_2 = shift_dhist(histogram, shift),
     method = method, return_details = return_details
   )
   loc <- histogram$locations
@@ -199,8 +200,7 @@ test_that("net_emd returns min_emd = 0 and min_offset = 0 when comparing any
 
   expect_self_netemd_correct <-
     function(histogram, shift, method, return_details = FALSE) {
-      self_net_emd <- netemd_one_to_one(dhists_1 = histogram, dhists_2 = shift_dhist(histogram, shift),method = method, return_details = return_details
-      )
+      self_net_emd <- netemd_one_to_one(dhists_1 = histogram, dhists_2 = shift_dhist(histogram, shift), method = method, return_details = return_details)
       loc <- histogram$locations
       mass <- histogram$masses
       var <- sum(loc * loc * mass) / sum(mass) - (sum(loc * mass) / sum(mass))^2
@@ -292,7 +292,8 @@ test_that("net_emd return 0 when comparing graphlet orbit degree distributions
   # Map over virus PPI networks
   purrr::walk(virus_gdd, function(gdd) {
     purrr::walk(gdd, function(gdd_Ox) {
-      expect_equalish(netemd_one_to_one(dhists_1 = gdd_Ox, dhists_2 = gdd_Ox,
+      expect_equalish(netemd_one_to_one(
+        dhists_1 = gdd_Ox, dhists_2 = gdd_Ox,
         method = "optimise",
         smoothing_window_width = 0
       ), 0)
@@ -346,7 +347,8 @@ test_that("net_emd return 0 when comparing graphlet orbit degree distributions
   # Map over random graphs
   purrr::walk(random_gdd, function(gdd) {
     purrr::walk(gdd, function(gdd_Ox) {
-      expect_equalish(netemd_one_to_one(dhists_1 = gdd_Ox, dhists_2 = gdd_Ox,
+      expect_equalish(netemd_one_to_one(
+        dhists_1 = gdd_Ox, dhists_2 = gdd_Ox,
         method = "optimise",
         smoothing_window_width = 0
       ), 0)
@@ -403,11 +405,11 @@ test_that("netemd_many_to_many works", {
   expected_netemd_fn <- function(gdds) {
     list(
       netemds = c(
-        netemd_one_to_one(dhists_1 = gdds$EBV, dhists_2 =  gdds$ECL), netemd_one_to_one(dhists_1 =gdds$EBV, dhists_2 = gdds$HSV),
-        netemd_one_to_one(dhists_1 = gdds$EBV, dhists_2 = gdds$KSHV), netemd_one_to_one(dhists_1 =gdds$EBV, dhists_2 = gdds$VZV),
-        netemd_one_to_one(dhists_1 = gdds$ECL, dhists_2 = gdds$HSV), netemd_one_to_one(dhists_1 =gdds$ECL, dhists_2 = gdds$KSHV),
-        netemd_one_to_one(dhists_1 = gdds$ECL, dhists_2 = gdds$VZV), netemd_one_to_one(dhists_1 =gdds$HSV, dhists_2 = gdds$KSHV),
-        netemd_one_to_one(dhists_1 = gdds$HSV, dhists_2 = gdds$VZV), netemd_one_to_one(dhists_1 =gdds$KSHV, dhists_2 = gdds$VZV)
+        netemd_one_to_one(dhists_1 = gdds$EBV, dhists_2 = gdds$ECL), netemd_one_to_one(dhists_1 = gdds$EBV, dhists_2 = gdds$HSV),
+        netemd_one_to_one(dhists_1 = gdds$EBV, dhists_2 = gdds$KSHV), netemd_one_to_one(dhists_1 = gdds$EBV, dhists_2 = gdds$VZV),
+        netemd_one_to_one(dhists_1 = gdds$ECL, dhists_2 = gdds$HSV), netemd_one_to_one(dhists_1 = gdds$ECL, dhists_2 = gdds$KSHV),
+        netemd_one_to_one(dhists_1 = gdds$ECL, dhists_2 = gdds$VZV), netemd_one_to_one(dhists_1 = gdds$HSV, dhists_2 = gdds$KSHV),
+        netemd_one_to_one(dhists_1 = gdds$HSV, dhists_2 = gdds$VZV), netemd_one_to_one(dhists_1 = gdds$KSHV, dhists_2 = gdds$VZV)
       ),
       comp_spec = cross_comparison_spec(gdds)
     )
@@ -415,7 +417,7 @@ test_that("netemd_many_to_many works", {
 
   # Comparison function for clarity
   compare_fn <- function(gdds) {
-    expect_equal(netemd_many_to_many(dhists=gdds), expected_netemd_fn(gdds))
+    expect_equal(netemd_many_to_many(dhists = gdds), expected_netemd_fn(gdds))
   }
 
   # Map over test parameters, comparing actual gdds to expected

From 1082d295b33fb91db46160455e05fd124922754b Mon Sep 17 00:00:00 2001
From: jack89roberts <jack89roberts@users.noreply.github.com>
Date: Mon, 6 Jun 2022 20:11:47 +0000
Subject: [PATCH 57/84] Update documentation

---
 man/netdis_many_to_many.Rd |  4 +--
 man/netdis_one_to_one.Rd   | 62 +++++++++++++++++-----------------
 man/netemd_one_to_one.Rd   | 68 +++++++++++++++++++-------------------
 man/netemd_single_pair.Rd  | 18 +++++-----
 man/read_simple_graphs.Rd  |  2 +-
 5 files changed, 77 insertions(+), 77 deletions(-)

diff --git a/man/netdis_many_to_many.Rd b/man/netdis_many_to_many.Rd
index ef60e0f6..12f5c057 100644
--- a/man/netdis_many_to_many.Rd
+++ b/man/netdis_many_to_many.Rd
@@ -75,12 +75,12 @@ ego-network. As well as graphlet counts, each matrix  must contain an
 additional column labelled "N" including the node count for
 each ego network.}
 
-\item{graphlet_counts_ref}{Pre-generated reference graphlet counts (default: NULL). Matrix containing counts 
+\item{graphlet_counts_ref}{Pre-generated reference graphlet counts (default: NULL). Matrix containing counts
 of each graphlet (columns) for each ego-network (rows) in the input graph. Columns are labelled with
 graphlet IDs and rows are labelled with the ID of the central node in each
 ego-network. As well as graphlet counts, each matrix  must contain an
 additional column labelled "N" including the node count for
-each ego network. 
+each ego network.
 If the \code{graphlet_counts_ref} argument is defined then \code{ref_graph} will not
 be used.}
 }
diff --git a/man/netdis_one_to_one.Rd b/man/netdis_one_to_one.Rd
index 28b8a259..8f3b5208 100644
--- a/man/netdis_one_to_one.Rd
+++ b/man/netdis_one_to_one.Rd
@@ -103,41 +103,41 @@ Calculates the different variants of the network dissimilarity statistic Netdis
 \examples{
 require(netdist)
 require(igraph)
-#Set source directory for Virus PPI graph edge files stored in the netdist package.
+# Set source directory for Virus PPI graph edge files stored in the netdist package.
 source_dir <- system.file(file.path("extdata", "VRPINS"), package = "netdist")
 # Load query graphs as igraph objects
-graph_1 <- read_simple_graph(file.path(source_dir, "EBV.txt"),format = "ncol")
-graph_2 <- read_simple_graph(file.path(source_dir, "ECL.txt"),format = "ncol")
+graph_1 <- read_simple_graph(file.path(source_dir, "EBV.txt"), format = "ncol")
+graph_2 <- read_simple_graph(file.path(source_dir, "ECL.txt"), format = "ncol")
 
-#Netdis variant using the Geometric Poisson approximation to remove the background expectation of each network.
-netdis_one_to_one(graph_1= graph_1, graph_2= graph_2,  ref_graph = NULL) #This option will focus on detecting more general and global discrepancies between the ego-network structures.
+# Netdis variant using the Geometric Poisson approximation to remove the background expectation of each network.
+netdis_one_to_one(graph_1 = graph_1, graph_2 = graph_2, ref_graph = NULL) # This option will focus on detecting more general and global discrepancies between the ego-network structures.
 
-#Comparing the networks via their observed ego counts without centering them (equivalent to using expectation equal to zero). This option, will focus on detecting small discrepancies.
-netdis_one_to_one(graph_1= graph_1, graph_2= graph_2,  ref_graph = 0)
+# Comparing the networks via their observed ego counts without centering them (equivalent to using expectation equal to zero). This option, will focus on detecting small discrepancies.
+netdis_one_to_one(graph_1 = graph_1, graph_2 = graph_2, ref_graph = 0)
 
 # Example of the use of netdis with a reference graph.This option will focus on detecting discrepancies between the networks relative to the ego-network structure of the reference network / gold-standard.
-# Two lattice networks of different sizes are used for this example. 
- goldstd_1 <- graph.lattice(c(8,8)) #A reference net
- goldstd_2 <- graph.lattice(c(44,44)) #A reference net
- 
- netdis_one_to_one(graph_1= graph_1, graph_2= graph_2,  ref_graph = goldstd_1)
- netdis_one_to_one(graph_1= graph_1, graph_2= graph_2,  ref_graph = goldstd_2)
- 
- 
- #Providing pre-calculated subgraph counts.
- 
- props_1 <- count_graphlets_ego(graph = graph_1)
- props_2 <- count_graphlets_ego(graph = graph_2)
- props_goldstd_1 <- count_graphlets_ego(graph = goldstd_1)
- props_goldstd_2 <- count_graphlets_ego(graph = goldstd_2)
- 
-#Netdis Geometric-Poisson.
-netdis_one_to_one(graphlet_counts_1= props_1,graphlet_counts_2= props_2, ref_graph = NULL)
-
-#Netdis Zero Expectation.
-netdis_one_to_one(graphlet_counts_1= props_1,graphlet_counts_2= props_2, ref_graph = 0)
-
-#Netdis using gold-standard network
-netdis_one_to_one(graphlet_counts_1= props_1,graphlet_counts_2= props_2, graphlet_counts_ref = props_goldstd_1)
-netdis_one_to_one(graphlet_counts_1= props_1,graphlet_counts_2= props_2, graphlet_counts_ref = props_goldstd_2)
+# Two lattice networks of different sizes are used for this example.
+goldstd_1 <- graph.lattice(c(8, 8)) # A reference net
+goldstd_2 <- graph.lattice(c(44, 44)) # A reference net
+
+netdis_one_to_one(graph_1 = graph_1, graph_2 = graph_2, ref_graph = goldstd_1)
+netdis_one_to_one(graph_1 = graph_1, graph_2 = graph_2, ref_graph = goldstd_2)
+
+
+# Providing pre-calculated subgraph counts.
+
+props_1 <- count_graphlets_ego(graph = graph_1)
+props_2 <- count_graphlets_ego(graph = graph_2)
+props_goldstd_1 <- count_graphlets_ego(graph = goldstd_1)
+props_goldstd_2 <- count_graphlets_ego(graph = goldstd_2)
+
+# Netdis Geometric-Poisson.
+netdis_one_to_one(graphlet_counts_1 = props_1, graphlet_counts_2 = props_2, ref_graph = NULL)
+
+# Netdis Zero Expectation.
+netdis_one_to_one(graphlet_counts_1 = props_1, graphlet_counts_2 = props_2, ref_graph = 0)
+
+# Netdis using gold-standard network
+netdis_one_to_one(graphlet_counts_1 = props_1, graphlet_counts_2 = props_2, graphlet_counts_ref = props_goldstd_1)
+netdis_one_to_one(graphlet_counts_1 = props_1, graphlet_counts_2 = props_2, graphlet_counts_ref = props_goldstd_2)
 }
diff --git a/man/netemd_one_to_one.Rd b/man/netemd_one_to_one.Rd
index 59f4b04e..caebd1e7 100644
--- a/man/netemd_one_to_one.Rd
+++ b/man/netemd_one_to_one.Rd
@@ -62,7 +62,7 @@ the minimal EMD for each pair of histograms, \code{min_offsets}: the associated
 offsets giving the minimal EMD for each pair of histograms
 }
 \description{
-Calculates the network Earth Mover's Distance (EMD) between 
+Calculates the network Earth Mover's Distance (EMD) between
 two sets of network features. This is done by individually normalising the distribution
 of each feature so that they have unit mass and unit variance. Then the minimun EMD between the same pair of features (one for each corresponding graph) is calculated by considering all possible translations of the feature distributions. Finally the average over all features is reported.
 This is calculated as follows:
@@ -71,37 +71,37 @@ This is calculated as follows:
   3. Take the average minimum EMD across all features.
 }
 \examples{
- require(igraph)
- graph_1 <- graph.lattice(c(8,8)) 
- graph_2 <- graph.lattice(c(44,44)) 
- netemd_one_to_one(graph_1=graph_1,graph_2=graph_2,feature_type="orbit",max_graphlet_size=5)
- 
- #Providing a matrix of network features
- props_a= count_orbits_per_node(graph = graph_1,max_graphlet_size = 5)
- props_b= count_orbits_per_node(graph = graph_2,max_graphlet_size = 5)
- 
- netemd_one_to_one(dhists_1=props_a, dhists_2=props_b,smoothing_window_width = 1)
- 
- #Providing the network features as lists of dhist objects
- dhists_1<- graph_features_to_histograms(props_a)
- dhists_2<- graph_features_to_histograms(props_b)
- 
- netemd_one_to_one(dhists_1=dhists_1, dhists_2=dhists_2)
- 
- 
- # A variation of NetEmd: Using the Laplacian spectrum 
- #Laplacian
- Lapg_1 <- igraph::laplacian_matrix(graph = graph_1,normalized = FALSE,sparse = FALSE)
- Lapg_2 <- igraph::laplacian_matrix(graph = graph_2,normalized = FALSE,sparse = FALSE)
- 
- #Normalized Laplacian
- NLapg_1 <- igraph::laplacian_matrix(graph = graph_1,normalized = TRUE,sparse = FALSE)
- NLapg_2 <- igraph::laplacian_matrix(graph = graph_2,normalized = TRUE,sparse = FALSE)
- 
- #Spectra (This may take a couple of minutes).
- props_1 <- cbind(L.Spectra= eigen(Lapg_1)$values, NL.Spectra= eigen(NLapg_1)$values) 
- props_2 <- cbind(L.Spectra= eigen(Lapg_2)$values, NL.Spectra= eigen(NLapg_2)$values) 
- 
- netemd_one_to_one(dhists_1 = props_1,dhists_2 = props_2,smoothing_window_width = 0)#Use of smoothing window 1 is given for discrete integer distributions. If the network features are considered continuous variables smoothing_window_width equal to zero is recommended.
- 
+require(igraph)
+graph_1 <- graph.lattice(c(8, 8))
+graph_2 <- graph.lattice(c(44, 44))
+netemd_one_to_one(graph_1 = graph_1, graph_2 = graph_2, feature_type = "orbit", max_graphlet_size = 5)
+
+# Providing a matrix of network features
+props_a <- count_orbits_per_node(graph = graph_1, max_graphlet_size = 5)
+props_b <- count_orbits_per_node(graph = graph_2, max_graphlet_size = 5)
+
+netemd_one_to_one(dhists_1 = props_a, dhists_2 = props_b, smoothing_window_width = 1)
+
+# Providing the network features as lists of dhist objects
+dhists_1 <- graph_features_to_histograms(props_a)
+dhists_2 <- graph_features_to_histograms(props_b)
+
+netemd_one_to_one(dhists_1 = dhists_1, dhists_2 = dhists_2)
+
+
+# A variation of NetEmd: Using the Laplacian spectrum
+# Laplacian
+Lapg_1 <- igraph::laplacian_matrix(graph = graph_1, normalized = FALSE, sparse = FALSE)
+Lapg_2 <- igraph::laplacian_matrix(graph = graph_2, normalized = FALSE, sparse = FALSE)
+
+# Normalized Laplacian
+NLapg_1 <- igraph::laplacian_matrix(graph = graph_1, normalized = TRUE, sparse = FALSE)
+NLapg_2 <- igraph::laplacian_matrix(graph = graph_2, normalized = TRUE, sparse = FALSE)
+
+# Spectra (This may take a couple of minutes).
+props_1 <- cbind(L.Spectra = eigen(Lapg_1)$values, NL.Spectra = eigen(NLapg_1)$values)
+props_2 <- cbind(L.Spectra = eigen(Lapg_2)$values, NL.Spectra = eigen(NLapg_2)$values)
+
+netemd_one_to_one(dhists_1 = props_1, dhists_2 = props_2, smoothing_window_width = 0) # Use of smoothing window 1 is given for discrete integer distributions. If the network features are considered continuous variables smoothing_window_width equal to zero is recommended.
+
 }
diff --git a/man/netemd_single_pair.Rd b/man/netemd_single_pair.Rd
index 1158d2f5..5cf3b3ed 100644
--- a/man/netemd_single_pair.Rd
+++ b/man/netemd_single_pair.Rd
@@ -43,13 +43,13 @@ This is calculated as follows:
   2. Find the minimum EMD between the histograms
 }
 \examples{
- require(igraph)
- goldstd_1 <- graph.lattice(c(8,8)) 
- goldstd_2 <- graph.lattice(c(44,44)) 
- props_1 <- count_orbits_per_node(graph = goldstd_1,max_graphlet_size = 5)
- props_2 <- count_orbits_per_node(graph = goldstd_2,max_graphlet_size = 5)
- dhists_1<- graph_features_to_histograms(props_1)
- dhists_2<- graph_features_to_histograms(props_2)
- # Obtain the minimum NetEMD_edges between the histograms 
- netemd_single_pair(dhists_1[[1]],dhists_2[[1]],method = "optimise",smoothing_window_width = 0)
+require(igraph)
+goldstd_1 <- graph.lattice(c(8, 8))
+goldstd_2 <- graph.lattice(c(44, 44))
+props_1 <- count_orbits_per_node(graph = goldstd_1, max_graphlet_size = 5)
+props_2 <- count_orbits_per_node(graph = goldstd_2, max_graphlet_size = 5)
+dhists_1 <- graph_features_to_histograms(props_1)
+dhists_2 <- graph_features_to_histograms(props_2)
+# Obtain the minimum NetEMD_edges between the histograms
+netemd_single_pair(dhists_1[[1]], dhists_2[[1]], method = "optimise", smoothing_window_width = 0)
 }
diff --git a/man/read_simple_graphs.Rd b/man/read_simple_graphs.Rd
index 57d45b86..95416894 100644
--- a/man/read_simple_graphs.Rd
+++ b/man/read_simple_graphs.Rd
@@ -52,7 +52,7 @@ following order:
 \examples{
 # Set source directory for Virus protein-protein interaction edge files stored in the netdist package.
 source_dir <- system.file(file.path("extdata", "VRPINS"), package = "netdist")
-print(source_dir) 
+print(source_dir)
 # Load query graphs as igraph objects
 graph_1 <- read_simple_graph(file.path(source_dir, "EBV.txt"), format = "ncol")
 graph_1

From f44076ca751ac3806f764a33db77fda59c5ca81c Mon Sep 17 00:00:00 2001
From: jack89roberts <jroberts@turing.ac.uk>
Date: Mon, 6 Jun 2022 21:35:15 +0100
Subject: [PATCH 58/84] move quality checks into separate workflow

---
 .Rbuildignore                  |  1 +
 .github/workflows/build.yml    | 25 +--------------------
 .github/workflows/quality.yaml | 40 ++++++++++++++++++++++++++++++++++
 .lintr                         |  1 +
 4 files changed, 43 insertions(+), 24 deletions(-)
 create mode 100644 .github/workflows/quality.yaml
 create mode 100644 .lintr

diff --git a/.Rbuildignore b/.Rbuildignore
index aa09cc1d..6555a0db 100755
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -5,3 +5,4 @@
 ^appveyor\.yml$
 ^doc$
 ^Meta$
+^\.github$
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index e38577d3..160d59eb 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -20,7 +20,7 @@ jobs:
       fail-fast: false
       matrix:
         config:
-          - {os: ubuntu-latest,   r: 'release'}  # run coverage, linting, and update docs on this platform only
+          - {os: ubuntu-latest,   r: 'release'}
           - {os: ubuntu-latest,   r: 'oldrel'}
           - {os: ubuntu-latest,   r: 'devel'}
           - {os: macOS-latest,   r: 'release'}
@@ -51,14 +51,6 @@ jobs:
           extra-packages: |
             any::devtools
 
-      - uses: r-lib/actions/setup-r-dependencies@v2
-        if: ${{ (matrix.config.os == 'ubuntu-latest') && (matrix.config.r == 'release') }}
-        with:
-          extra-packages: |
-            any::covr
-            any::lintr
-            any::styler
-
       - name: Document
         run: devtools::document()
         shell: Rscript {0}
@@ -73,21 +65,6 @@ jobs:
           git pull --ff-only
           git push origin
 
-      - name: Check style with styler
-        if: ${{ (matrix.config.os == 'ubuntu-latest') && (matrix.config.r == 'release') }}
-        run: styler::style_pkg(dry='fail')
-        shell: Rscript {0}
-
-      - name: Check style with lintr
-        if: ${{ (matrix.config.os == 'ubuntu-latest') && (matrix.config.r == 'release') }}
-        run: lintr::lint_package()
-        shell: Rscript {0}
-
       - name: Check package with devtools
         run: devtools::check()
         shell: Rscript {0}
-
-      - name: Check coverage
-        if: ${{ (matrix.config.os == 'ubuntu-latest') && (matrix.config.r == 'release') }}
-        run: covr::codecov(quiet = FALSE)
-        shell: Rscript {0}
diff --git a/.github/workflows/quality.yaml b/.github/workflows/quality.yaml
new file mode 100644
index 00000000..7a53a048
--- /dev/null
+++ b/.github/workflows/quality.yaml
@@ -0,0 +1,40 @@
+# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
+# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
+on:
+  push:
+    branches: [main, master]
+  pull_request:
+    branches: [main, master]
+
+name: Quality checks
+
+jobs:
+  lint-project:
+    runs-on: ubuntu-latest
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+    steps:
+      - uses: actions/checkout@v2
+
+      - uses: r-lib/actions/setup-r@v2
+        with:
+          use-public-rspm: true
+
+      - uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          extra-packages: |
+            any::covr
+            any::lintr
+            any::styler
+
+      - name: Styler
+        run: styler::style_pkg(dry='fail')
+        shell: Rscript {0}
+
+      - name: Lintr
+        run: lintr::lint_package()
+        shell: Rscript {0}
+
+      - name: Coverage
+        run: covr::codecov(quiet = FALSE)
+        shell: Rscript {0}
diff --git a/.lintr b/.lintr
new file mode 100644
index 00000000..f2c1b9b3
--- /dev/null
+++ b/.lintr
@@ -0,0 +1 @@
+error_on_lint: TRUE

From caff555a6043958e093288ffe14b50d29e461a00 Mon Sep 17 00:00:00 2001
From: jack89roberts <jroberts@turing.ac.uk>
Date: Mon, 6 Jun 2022 21:47:35 +0100
Subject: [PATCH 59/84] quality checks badge

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 380c78b6..f2ecc2bf 100755
--- a/README.md
+++ b/README.md
@@ -8,6 +8,7 @@ Until this package hits release 1.0 anything can change with no notice.
 [![GitHub release](https://img.shields.io/github/release/alan-turing-institute/network-comparison.svg)](https://github.com/alan-turing-institute/network-comparison/releases/latest)
 [![Build](https://github.com/alan-turing-institute/network-comparison/actions/workflows/main.yml/badge.svg)](https://github.com/alan-turing-institute/network-comparison/actions/workflows/main.yml)
 
+[![Quality checks](https://github.com/alan-turing-institute/network-comparison/actions/workflows/quality.yaml/badge.svg)](https://github.com/alan-turing-institute/network-comparison/actions/workflows/quality.yaml)
 [![Codecov](https://img.shields.io/codecov/c/github/alan-turing-institute/network-comparison/master.svg)](https://codecov.io/gh/alan-turing-institute/network-comparison?branch=master)
 [![license](https://img.shields.io/github/license/alan-turing-institute/network-comparison.svg)](https://github.com/alan-turing-institute/network-comparison/edit/master/LICENSE)
 [![Github All Releases](https://img.shields.io/github/downloads/alan-turing-institute/network-comparison/total.svg)](https://github.com/alan-turing-institute/network-comparison/releases/latest)

From 7123f0330367a824b3e7f19f4717458415890b28 Mon Sep 17 00:00:00 2001
From: jack89roberts <jroberts@turing.ac.uk>
Date: Mon, 6 Jun 2022 22:29:16 +0100
Subject: [PATCH 60/84] start fixing line lengths to 80 chars

---
 R/dhist.R            |  82 ++++++++-----
 R/measures_net_dis.R | 276 ++++++++++++++++++++++++++++---------------
 2 files changed, 234 insertions(+), 124 deletions(-)

diff --git a/R/dhist.R b/R/dhist.R
index 862afccd..4ea7a237 100644
--- a/R/dhist.R
+++ b/R/dhist.R
@@ -9,23 +9,27 @@
 #' location
 #' @param smoothing_window_width If greater than 0, the discrete histogram will
 #' be treated as having the mass at each location "smoothed" uniformly across
-#' a bin centred on the location and having width = \code{smoothing_window_width}
-#' (default = \code{0} - no smoothing)
+#' a bin centred on the location and having
+#' width = \code{smoothing_window_width} (default = \code{0} - no smoothing)
 #' @param sorted Whether or not to return a discrete histogram with locations
 #' and masses sorted by ascending mass (default = \code{TRUE})
 #' @return A sparse discrete histogram. Format is a \code{dhist} object, which
 #' is a list of class \code{dhist} with the following named elements:
 #' \itemize{
 #'   \item \code{locations}: A 1D numeric vector of discrete locations
-#'   \item \code{masses}: A 1D numeric vector of the mass present at each location
+#'   \item \code{masses}: A 1D numeric vector of the mass present at each
+#'   location
 #' }
 #' Note that locations where no mass is present are not included in the returned
 #' \code{dhist} object. Mass in these discrete histograms is treated as being
-#' present precisely at the specified location. Discrete histograms should not be used
-#' for data where observations have been grouped into bins representing ranges
-#' of observation values.
+#' present precisely at the specified location. Discrete histograms should not
+#' be used for data where observations have been grouped into bins representing
+#' ranges of observation values.
 #' @export
-dhist <- function(locations, masses, smoothing_window_width = 0, sorted = TRUE) {
+dhist <- function(locations,
+                  masses,
+                  smoothing_window_width = 0,
+                  sorted = TRUE) {
   if (!is_numeric_vector_1d(locations)) {
     stop("Bin locations must be provided as a 1D numeric vector")
   }
@@ -76,9 +80,11 @@ update_dhist <-
 #' @param dhist A discrete histogram as a \code{dhist} object
 #' @param smoothing_window_width If greater than 0, the discrete histogram will
 #' be treated as having the mass at each location "smoothed" uniformly across
-#' a bin centred on the location and having width = \code{smoothing_window_width}
-#' @return A copy of a \code{dhist} object with its \code{smoothing_window_width}
-#' attribute set  to the value provided \code{smoothing_window_width} parameter.
+#' a bin centred on the location and having
+#' width = \code{smoothing_window_width}
+#' @return A copy of a \code{dhist} object with its
+#' \code{smoothing_window_width} attribute set to the value provided
+#' \code{smoothing_window_width} parameter.
 #' @export
 as_smoothed_dhist <- function(dhist, smoothing_window_width) {
   dhist <- update_dhist(dhist, smoothing_window_width = smoothing_window_width)
@@ -90,8 +96,8 @@ as_smoothed_dhist <- function(dhist, smoothing_window_width) {
 #' Returns an "unsmoothed" copy of a \code{dhist} object with its
 #' \code{smoothing_window_width} attribute set to 0.
 #' @param dhist A discrete histogram as a \code{dhist} object
-#' @return A copy of a \code{dhist} object with its \code{smoothing_window_width}
-#' attribute set to 0.
+#' @return A copy of a \code{dhist} object with its
+#' \code{smoothing_window_width} attribute set to 0.
 #' @export
 as_unsmoothed_dhist <- function(dhist) {
   dhist <- update_dhist(dhist, smoothing_window_width = 0)
@@ -101,8 +107,8 @@ as_unsmoothed_dhist <- function(dhist) {
 #' Check if an object is a \code{dhist} discrete histogram
 #'
 #' Checks if the input object is of class \code{dhist}. If \code{fast_check} is
-#' \code{TRUE} then the only check is whether the object has a class attribute of
-#' \code{dhist}. If \code{fast_check} is \code{FALSE} (default), then checks
+#' \code{TRUE} then the only check is whether the object has a class attribute
+#' of \code{dhist}. If \code{fast_check} is \code{FALSE} (default), then checks
 #' are also made to ensure that the object has the structure required of a
 #' \code{dhist} object.
 #' @param x An arbitrary object
@@ -111,8 +117,8 @@ as_unsmoothed_dhist <- function(dhist) {
 #' is set to \code{dhist} (default = \code{TRUE})
 #' @export
 is_dhist <- function(x, fast_check = TRUE) {
-  # Quick check that relies on user not to construct variables with "dhist" class
-  # that do not have the required elements
+  # Quick check that relies on user not to construct variables with "dhist"
+  # class that do not have the required elements
   has_class_attr <- (class(x) == "dhist")
   if (fast_check) {
     # Early return if fast check requested
@@ -133,13 +139,15 @@ is_dhist <- function(x, fast_check = TRUE) {
 
 #' Discrete histogram from observations (Pure R slow version)
 #'
-#' Generate a sparse discrete histogram from a set of discrete numeric observations
+#' Generate a sparse discrete histogram from a set of discrete numeric
+#' observations
 #' @param observations A vector of discrete numeric observations
 #' @return A sparse discrete histogram. Format is a \code{dhist} object, which
 #' is a list of class \code{dhist} with the following named elements:
 #' \itemize{
 #'   \item \code{locations}: A 1D numeric vector of discrete locations
-#'   \item \code{masses}: A 1D numeric vector of the mass present at each location
+#'   \item \code{masses}: A 1D numeric vector of the mass present at each
+#'   location
 #' }
 #' @export
 dhist_from_obs_slow <- function(observations) {
@@ -162,13 +170,15 @@ dhist_from_obs_slow <- function(observations) {
 
 #' Discrete histogram from observations
 #'
-#' Generate a sparse discrete histogram from a set of discrete numeric observations
+#' Generate a sparse discrete histogram from a set of discrete numeric
+#' observations
 #' @param observations A vector of discrete numeric observations
 #' @return A sparse discrete histogram. Format is a \code{dhist} object, which
 #' is a list of class \code{dhist} with the following named elements:
 #' \itemize{
 #'   \item \code{locations}: A 1D numeric vector of discrete locations
-#'   \item \code{masses}: A 1D numeric vector of the mass present at each location
+#'   \item \code{masses}: A 1D numeric vector of the mass present at each
+#'   location
 #' }
 #' @export
 dhist_from_obs <- function(observations) {
@@ -299,7 +309,8 @@ area_between_dhist_ecmfs <- function(dhist_ecmf1, dhist_ecmf2) {
     stop("ECMFs must have the same type")
   }
   ecmf_type <- ecmf_type1
-  # Determine all possible locations where either ECMF changes gradient ("knots")
+  # Determine all possible locations where either ECMF changes gradient
+  # ("knots")
   x1 <- ecmf_knots(dhist_ecmf1)
   x2 <- ecmf_knots(dhist_ecmf2)
   x <- sort(union(x1, x2))
@@ -413,7 +424,11 @@ area_between_offset_ecmfs <- function(ecmf1, ecmf2, offset) {
 #' increasing (default) or decreasing order of location
 #' @export
 sort_dhist <- function(dhist, decreasing = FALSE) {
-  sorted_indexes <- sort(dhist$locations, decreasing = decreasing, index.return = TRUE)$ix
+  sorted_indexes <- sort(
+    dhist$locations,
+    decreasing = decreasing,
+    index.return = TRUE
+  )$ix
   dhist$masses <- dhist$masses[sorted_indexes]
   dhist$locations <- dhist$locations[sorted_indexes]
   return(dhist)
@@ -463,12 +478,14 @@ dhist_variance <- function(dhist) {
     # For unsmoothed discrete histograms, the mass associated with each location
     # is located precisely at the lcoation. Therefore cariance (i.e. E[X^2])
     # is the mass-weighted sum of the mean-centred locations
-    variance <- sum(dhist$masses * (mean_centred_locations)^2) / sum(dhist$masses)
+    variance <- sum(dhist$masses * (mean_centred_locations)^2) /
+                sum(dhist$masses)
   } else {
-    # For smoothed histograms, the mass associated with each location is "smoothed"
-    # uniformly across a bin centred on the location with width = smoothing_window_width
-    # Variance (i.e. E[X^2]) is therefore the mass-weighted sum of the integrals
-    # of x^2 over the mean-centred bins at each location.
+    # For smoothed histograms, the mass associated with each location is
+    # "smoothed" uniformly across a bin centred on the location with
+    # width = smoothing_window_width Variance (i.e. E[X^2]) is therefore the
+    # mass-weighted sum of the integrals of x^2 over the mean-centred bins at
+    # each location.
     hw <- dhist$smoothing_window_width / 2
     bin_lowers <- mean_centred_locations - hw
     bin_uppers <- mean_centred_locations + hw
@@ -540,13 +557,17 @@ normalise_dhist_variance <- function(dhist) {
     std_dev <- dhist_std(dhist)
     centred_locations <- (dhist$locations - dhist_mean_location(dhist))
     normalised_centred_locations <- centred_locations / std_dev
-    normalised_locations <- normalised_centred_locations + dhist_mean_location(dhist)
+    normalised_locations <- normalised_centred_locations +
+                            dhist_mean_location(dhist)
     dhist <- update_dhist(dhist, locations = normalised_locations)
     # If smoothing_window_width not zero, then update it to reflect the variance
     # normalisation
     if (dhist$smoothing_window_width != 0) {
       normalised_smoothing_window_width <- dhist$smoothing_window_width / std_dev
-      dhist <- update_dhist(dhist, smoothing_window_width = normalised_smoothing_window_width)
+      dhist <- update_dhist(
+        dhist,
+        smoothing_window_width = normalised_smoothing_window_width
+      )
     }
   }
   return(dhist)
@@ -584,7 +605,8 @@ harmonise_dhist_locations <- function(dhist1, dhist2) {
 #'
 #' Check if a variable is a 1D numeric vector by checking that:
 #' \itemize{
-#'   \item \code{is_numeric(input)}: Input is vector, matrix, array or list of numbers
+#'   \item \code{is_numeric(input)}: Input is vector, matrix, array or list of
+#'   numbers
 #'   \item \code{is_null(dim(input))}: Input is not a matrix or array
 #' }
 #' @param input Arbitrary object
diff --git a/R/measures_net_dis.R b/R/measures_net_dis.R
index 2b0bbc21..0ae66fd3 100644
--- a/R/measures_net_dis.R
+++ b/R/measures_net_dis.R
@@ -1,11 +1,23 @@
 #' Netdis between two graphs
 #'
-#' Calculates the different variants of the network dissimilarity statistic Netdis between two graphs. The variants currently supported are Netdis using a gold-standard network, Netdis using no expecations (\code{ref_graph = 0}), and Netdis using a Geometric Poisson  approximation for the expectation (\code{ref_graph = NULL}).
+#' Calculates the different variants of the network dissimilarity statistic
+#' Netdis between two graphs. The variants currently supported are Netdis using
+#' a gold-standard network, Netdis using no expecations (\code{ref_graph = 0}),
+#' and Netdis using a Geometric Poisson  approximation for the expectation
+#' (\code{ref_graph = NULL}).
 #'
 #'
-#' @param graph_1 A simple graph object from the \code{igraph} package. \code{graph_1} can be set to \code{NULL} (default) if \code{graphlet_counts_1} is provided. If both \code{graph_1} and \code{graphlet_counts_1} are not \code{NULL}, then only \code{graphlet_counts_1} will be considered.
+#' @param graph_1 A simple graph object from the \code{igraph} package.
+#' \code{graph_1} can be set to \code{NULL} (default) if
+#' \code{graphlet_counts_1} is provided. If both \code{graph_1} and
+#' \code{graphlet_counts_1} are not \code{NULL}, then only
+#' \code{graphlet_counts_1} will be considered.
 #'
-#' @param graph_2 A simple graph object from the \code{igraph} package. \code{graph_2} can be set to \code{NULL} (default) if \code{graphlet_counts_2} is provided. If both \code{graph_2} and \code{graphlet_counts_2} are not \code{NULL}, then only \code{graphlet_counts_2} will be considered.
+#' @param graph_2 A simple graph object from the \code{igraph} package.
+#' \code{graph_2} can be set to \code{NULL} (default) if
+#' \code{graphlet_counts_2} is provided. If both \code{graph_2} and
+#' \code{graphlet_counts_2} are not \code{NULL}, then only
+#' \code{graphlet_counts_2} will be considered.
 #'
 #' @param graphlet_counts_1 Pre-generated graphlet counts for the first query
 #' graph.  Matrix containing counts of each graphlet (columns) for
@@ -13,9 +25,9 @@
 #' graphlet IDs and rows are labelled with the ID of the central node in each
 #' ego-network. As well as graphlet counts, each matrix  must contain an
 #' additional column labelled "N" including the node count for
-#' each ego network. (default: NULL).
-#' If the \code{graphlet_counts_1} argument is defined then
-#' \code{graph_1} will not be used. These counts can be obtained with \code{count_graphlets_ego}.
+#' each ego network. If the \code{graphlet_counts_1} argument is defined then
+#' \code{graph_1} will not be used. These counts can be obtained with
+#' \code{count_graphlets_ego}. (default: NULL).
 #'
 #'
 #' @param graphlet_counts_2 Pre-generated graphlet counts for the second query
@@ -26,15 +38,17 @@
 #' additional column labelled "N" including the node count for
 #' each ego network. (default: NULL).
 #' If the \code{graphlet_counts_2} argument is defined then
-#' \code{graph_2} will not be used. These counts can be obtained with \code{count_graphlets_ego}.
+#' \code{graph_2} will not be used. These counts can be obtained with
+#' \code{count_graphlets_ego}.
 #'
 #' @param ref_graph Controls how expected counts are calculated. Either:
 #' 1) A numeric value - used as a constant expected counts value for all query
 #' graphs .
 #' 2) A simplified \code{igraph} object - used as a reference graph from which
 #' expected counts are calculated for all query graphs.
-#' 3) NULL (Default) - Used for Netdis-GP, where the expected counts will be calculated based on the properties of the
-#' query graphs themselves. (Geometric-Poisson approximation).
+#' 3) NULL (Default) - Used for Netdis-GP, where the expected counts will be
+#' calculated based on the properties of the query graphs themselves
+#' (Geometric-Poisson approximation).
 #'
 #' @param graphlet_counts_ref Pre-generated reference graphlet counts.
 #' Matrix containing counts of each graphlet (columns) for
@@ -42,11 +56,11 @@
 #' graphlet IDs and rows are labelled with the ID of the central node in each
 #' ego-network. As well as graphlet counts, each matrix  must contain an
 #' additional column labelled "N" including the node count for
-#' each ego network. (default: NULL).
-#' If the \code{graphlet_counts_ref} argument is defined then \code{ref_graph} will not
-#' be used.
+#' each ego network. If the \code{graphlet_counts_ref} argument is defined then
+#' \code{ref_graph} will not be used. (default: NULL).
 #'
-#' @param max_graphlet_size Generate graphlets up to this size. Currently only 4 (default) and 5 are supported.
+#' @param max_graphlet_size Generate graphlets up to this size. Currently only
+#' 4 (default) and 5 are supported.
 #'
 #' @param neighbourhood_size Ego network neighborhood size (default: 2).
 #'
@@ -56,25 +70,31 @@
 #' @param min_ego_edges Filter ego networks which have fewer
 #' than min_ego_edges edges (default: 1).
 #'
-#' @param binning_fn Function used to bin ego network densities. Takes edge \code{densities}
-#' as its single argument, and returns a named list including, the input \code{densities}, the resulting bin \code{breaks} (vector of density bin limits), and the vector \code{interval_indexes} which states to what bin each of the individual elements in \code{densities} belongs to.
-#' ego network). If \code{NULL}, then the method \code{binned_densities_adaptive} with
-#' \code{min_counts_per_interval = 5} and \code{num_intervals = 100} is used
-#' (Default: NULL).
+#' @param binning_fn Function used to bin ego network densities. Takes edge
+#' \code{densities} as its single argument, and returns a named list including,
+#' the input \code{densities}, the resulting bin \code{breaks} (vector of
+#' density bin limits), and the vector \code{interval_indexes} which states to
+#' what bin each of the individual elements in \code{densities} belongs to.
+#' ego network). If \code{NULL}, then the method
+#' \code{binned_densities_adaptive} with \code{min_counts_per_interval = 5}
+#' and \code{num_intervals = 100} is used (Default: NULL).
 #'
 #' @param bin_counts_fn Function used to calculate expected graphlet counts in
 #' each density bin. Takes \code{graphlet_counts}, \code{interval_indexes}
-#' (bin indexes) and \code{max_graphlet_size} as arguments. If \code{bin_counts_fn} is \code{NULL}, (default), it will apply
-#' either the approach from the original Netdis paper, or the respective Geometric-Poisson approximation; depending on the
-#' values of \code{ref_graph} and \code{graphlet_counts_ref}.
+#' (bin indexes) and \code{max_graphlet_size} as arguments. If
+#' \code{bin_counts_fn} is \code{NULL}, (default), it will apply either the
+#' approach from the original Netdis paper, or the respective Geometric-Poisson
+#' approximation; depending on the values of \code{ref_graph} and
+#' \code{graphlet_counts_ref}.
 #'
 #' @param exp_counts_fn Function used to map from binned reference counts to
 #' expected counts for each graphlet in each ego network of the query graphs.
 #' Takes \code{ego_networks}, \code{density_bin_breaks},
 #' \code{binned_graphlet_counts}, and \code{max_graphlet_size} as arguments.
 #' If \code{exp_counts_fn} is \code{NULL}, (default), it will apply
-#' either the approach from the original Netdis paper, or the respective Geometric-Poisson approximation; depending on the
-#' values of \code{ref_graph} and \code{graphlet_counts_ref}.
+#' either the approach from the original Netdis paper, or the respective
+#' Geometric-Poisson approximation; depending on the values of \code{ref_graph}
+#' and \code{graphlet_counts_ref}.
 #'
 #' @return Netdis statistics between graph_1 and graph_2 for graphlet sizes
 #' up to and including max_graphlet_size.
@@ -82,25 +102,49 @@
 #' @examples
 #' require(netdist)
 #' require(igraph)
-#' # Set source directory for Virus PPI graph edge files stored in the netdist package.
-#' source_dir <- system.file(file.path("extdata", "VRPINS"), package = "netdist")
+#' # Set source directory for Virus PPI graph edge files stored in the
+#' # netdist package.
+#' source_dir <- system.file(
+#'     file.path("extdata", "VRPINS"),
+#'     package = "netdist"
+#' )
 #' # Load query graphs as igraph objects
-#' graph_1 <- read_simple_graph(file.path(source_dir, "EBV.txt"), format = "ncol")
-#' graph_2 <- read_simple_graph(file.path(source_dir, "ECL.txt"), format = "ncol")
-#'
-#' # Netdis variant using the Geometric Poisson approximation to remove the background expectation of each network.
-#' netdis_one_to_one(graph_1 = graph_1, graph_2 = graph_2, ref_graph = NULL) # This option will focus on detecting more general and global discrepancies between the ego-network structures.
-#'
-#' # Comparing the networks via their observed ego counts without centering them (equivalent to using expectation equal to zero). This option, will focus on detecting small discrepancies.
+#' graph_1 <- read_simple_graph(
+#'     file.path(source_dir, "EBV.txt"),
+#'     format = "ncol"
+#' )
+#' graph_2 <- read_simple_graph(
+#'     file.path(source_dir, "ECL.txt"),
+#'     format = "ncol"
+#' )
+#'
+#' # Netdis variant using the Geometric Poisson approximation to remove the
+#' # background expectation of each network. This option will focus on detecting
+#' # more general and global discrepancies between the ego-network structures.
+#' netdis_one_to_one(graph_1 = graph_1, graph_2 = graph_2, ref_graph = NULL)
+#'
+#' # Comparing the networks via their observed ego counts without centering them
+#' # (equivalent to using expectation equal to zero). This option, will focus on
+#' # detecting small discrepancies.
 #' netdis_one_to_one(graph_1 = graph_1, graph_2 = graph_2, ref_graph = 0)
 #'
-#' # Example of the use of netdis with a reference graph.This option will focus on detecting discrepancies between the networks relative to the ego-network structure of the reference network / gold-standard.
+#' # Example of the use of netdis with a reference graph.This option will focus
+#' # on detecting discrepancies between the networks relative to the ego-network
+#' # structure of the reference network / gold-standard.
 #' # Two lattice networks of different sizes are used for this example.
 #' goldstd_1 <- graph.lattice(c(8, 8)) # A reference net
 #' goldstd_2 <- graph.lattice(c(44, 44)) # A reference net
 #'
-#' netdis_one_to_one(graph_1 = graph_1, graph_2 = graph_2, ref_graph = goldstd_1)
-#' netdis_one_to_one(graph_1 = graph_1, graph_2 = graph_2, ref_graph = goldstd_2)
+#' netdis_one_to_one(
+#'     graph_1 = graph_1,
+#'     graph_2 = graph_2,
+#'     ref_graph = goldstd_1
+#' )
+#' netdis_one_to_one(
+#'     graph_1 = graph_1,
+#'     graph_2 = graph_2,
+#'     ref_graph = goldstd_2
+#' )
 #'
 #'
 #' # Providing pre-calculated subgraph counts.
@@ -111,14 +155,30 @@
 #' props_goldstd_2 <- count_graphlets_ego(graph = goldstd_2)
 #'
 #' # Netdis Geometric-Poisson.
-#' netdis_one_to_one(graphlet_counts_1 = props_1, graphlet_counts_2 = props_2, ref_graph = NULL)
+#' netdis_one_to_one(
+#'     graphlet_counts_1 = props_1,
+#'     graphlet_counts_2 = props_2,
+#'     ref_graph = NULL
+#' )
 #'
 #' # Netdis Zero Expectation.
-#' netdis_one_to_one(graphlet_counts_1 = props_1, graphlet_counts_2 = props_2, ref_graph = 0)
+#' netdis_one_to_one(
+#'     graphlet_counts_1 = props_1,
+#'     graphlet_counts_2 = props_2,
+#'     ref_graph = 0
+#' )
 #'
 #' # Netdis using gold-standard network
-#' netdis_one_to_one(graphlet_counts_1 = props_1, graphlet_counts_2 = props_2, graphlet_counts_ref = props_goldstd_1)
-#' netdis_one_to_one(graphlet_counts_1 = props_1, graphlet_counts_2 = props_2, graphlet_counts_ref = props_goldstd_2)
+#' netdis_one_to_one(
+#'     graphlet_counts_1 = props_1,
+#'     graphlet_counts_2 = props_2,
+#'     graphlet_counts_ref = props_goldstd_1
+#' )
+#' netdis_one_to_one(
+#'     graphlet_counts_1 = props_1,
+#'     graphlet_counts_2 = props_2,
+#'     graphlet_counts_ref = props_goldstd_2
+#' )
 #' @export
 netdis_one_to_one <- function(graph_1 = NULL,
                               graph_2 = NULL,
@@ -226,7 +286,8 @@ netdis_one_to_one <- function(graph_1 = NULL,
 #' 3) NULL - Expected counts will be calculated based on the properties of the
 #' query graphs themselves.
 #'
-#' @param max_graphlet_size Generate graphlets up to this size. Currently only 4 and 5 are supported.
+#' @param max_graphlet_size Generate graphlets up to this size. Currently only 4
+#' and 5 are supported.
 #'
 #' @param neighbourhood_size Ego network neighbourhood size.
 #'
@@ -236,25 +297,31 @@ netdis_one_to_one <- function(graph_1 = NULL,
 #' @param min_ego_edges Filter ego networks which have fewer
 #' than min_ego_edges edges.
 #'
-#' @param binning_fn Function used to bin ego network densities. Takes edge \code{densities}
-#' as its single argument, and returns a named list including, the input \code{densities}, the resulting bin \code{breaks} (vector of density bin limits), and the vector \code{interval_indexes} which states to what bin each of the individual elements in \code{densities} belongs to.
-#' ego network). If \code{NULL}, then the method \code{binned_densities_adaptive} with
-#' \code{min_counts_per_interval = 5} and \code{num_intervals = 100} is used
-#' (Default: NULL).
+#' @param binning_fn Function used to bin ego network densities. Takes edge
+#' \code{densities} as its single argument, and returns a named list including,
+#' the input \code{densities}, the resulting bin \code{breaks} (vector of
+#' density bin limits), and the vector \code{interval_indexes} which states to
+#' what bin each of the individual elements in \code{densities} belongs to.
+#' ego network). If \code{NULL}, then the method
+#' \code{binned_densities_adaptive} with \code{min_counts_per_interval = 5} and
+#' \code{num_intervals = 100} is used (Default: NULL).
 #'
 #' @param bin_counts_fn Function used to calculate expected graphlet counts in
 #' each density bin. Takes \code{graphlet_counts}, \code{interval_indexes}
-#' (bin indexes) and \code{max_graphlet_size} as arguments. If \code{bin_counts_fn} is \code{NULL}, (default),
-#'  it will apply either the approach from the original Netdis paper, or the respective Geometric-Poisson
-#'   approximation; depending on the values of \code{ref_graph} and \code{graphlet_counts_ref}.
+#' (bin indexes) and \code{max_graphlet_size} as arguments. If
+#' \code{bin_counts_fn} is \code{NULL}, (default), it will apply either the
+#' approach from the original Netdis paper, or the respective Geometric-Poisson
+#' approximation; depending on the values of \code{ref_graph} and
+#' \code{graphlet_counts_ref}.
 #'
 #' @param exp_counts_fn Function used to map from binned reference counts to
 #' expected counts for each graphlet in each ego network of the query graphs.
 #' Takes \code{ego_networks}, \code{density_bin_breaks},
 #' \code{binned_graphlet_counts}, and \code{max_graphlet_size} as arguments.
 #' If \code{exp_counts_fn} is \code{NULL}, (default), it will apply
-#' either the approach from the original Netdis paper, or the respective Geometric-Poisson approximation; depending on the
-#' values of \code{ref_graph} and \code{graphlet_counts_ref}.
+#' either the approach from the original Netdis paper, or the respective
+#' Geometric-Poisson approximation; depending on the values of \code{ref_graph}
+#' and \code{graphlet_counts_ref}.
 #'
 #'
 #' @param graphlet_counts_1 Pre-generated graphlet counts for the first query
@@ -377,14 +444,15 @@ netdis_one_to_many <- function(graph_1 = NULL,
 #' graphs.
 #' 2) A simplified \code{igraph} object - used as a reference graph from which
 #' expected counts are calculated for all query graphs.
-#' 3) NULL (default) - Expected counts will be calculated based on the properties of the
-#' query graphs themselves. (Geometric-Poisson approximation).
+#' 3) NULL (default) - Expected counts will be calculated based on the
+#' properties of the query graphs themselves. (Geometric-Poisson approximation).
 #'
 #' @param comparisons Which comparisons to perform between graphs.
 #' Can be "many-to-many" (all pairwise combinations) or "one-to-many"
 #' (compare first graph in graphs to all other graphs.)
 #'
-#' @param max_graphlet_size Generate graphlets up to this size. Currently only 4 (default) and 5 are supported.
+#' @param max_graphlet_size Generate graphlets up to this size. Currently only 4
+#' (default) and 5 are supported.
 #'
 #' @param neighbourhood_size Ego network neighbourhood size (default 2).
 #'
@@ -394,24 +462,31 @@ netdis_one_to_many <- function(graph_1 = NULL,
 #' @param min_ego_edges Filter ego networks which have fewer
 #' than min_ego_edges edges (default 1).
 #'
-#' @param binning_fn Function used to bin ego network densities. Takes edge \code{densities}
-#' as its single argument, and returns a named list including, the input \code{densities}, the resulting bin \code{breaks} (vector of density bin limits), and the vector \code{interval_indexes} which states to what bin each of the individual elements in \code{densities} belongs to.
-#' ego network). If \code{NULL}, then the method \code{binned_densities_adaptive} with
-#' \code{min_counts_per_interval = 5} and \code{num_intervals = 100} is used (default: NULL).
+#' @param binning_fn Function used to bin ego network densities. Takes edge
+#' \code{densities} as its single argument, and returns a named list including,
+#' the input \code{densities}, the resulting bin \code{breaks} (vector of
+#' density bin limits), and the vector \code{interval_indexes} which states to
+#' what bin each of the individual elements in \code{densities} belongs to.
+#' ego network). If \code{NULL}, then the method
+#' \code{binned_densities_adaptive} with \code{min_counts_per_interval = 5} and
+#' \code{num_intervals = 100} is used (default: NULL).
 #'
 #' @param bin_counts_fn Function used to calculate expected graphlet counts in
 #' each density bin. Takes \code{graphlet_counts}, \code{interval_indexes}
-#' (bin indexes) and \code{max_graphlet_size} as arguments. If \code{bin_counts_fn} is \code{NULL}, (default),
-#' it will apply either the approach from the original Netdis paper, or the respective Geometric-Poisson
-#' approximation; depending on the values of \code{ref_graph} and \code{graphlet_counts_ref}.
+#' (bin indexes) and \code{max_graphlet_size} as arguments.
+#' If \code{bin_counts_fn} is \code{NULL}, (default), it will apply either the
+#' approach from the original Netdis paper, or the respective Geometric-Poisson
+#' approximation; depending on the values of \code{ref_graph} and
+#' \code{graphlet_counts_ref}.
 #'
 #' @param exp_counts_fn Function used to map from binned reference counts to
 #' expected counts for each graphlet in each ego network of the query graphs.
 #' Takes \code{ego_networks}, \code{density_bin_breaks},
 #' \code{binned_graphlet_counts}, and \code{max_graphlet_size} as arguments.
 #' If \code{exp_counts_fn} is \code{NULL}, (default), it will apply
-#' either the approach from the original Netdis paper, or the respective Geometric-Poisson approximation; depending on the
-#' values of \code{ref_graph} and \code{graphlet_counts_ref}.
+#' either the approach from the original Netdis paper, or the respective
+#' Geometric-Poisson approximation; depending on the values of \code{ref_graph}
+#' and \code{graphlet_counts_ref}.
 #'
 #' @param graphlet_counts Pre-generated graphlet counts (default: NULL). If the
 #' \code{graphlet_counts} argument is defined then \code{graphs} will not be
@@ -423,14 +498,14 @@ netdis_one_to_many <- function(graph_1 = NULL,
 #' additional column labelled "N" including the node count for
 #' each ego network.
 #'
-#' @param graphlet_counts_ref Pre-generated reference graphlet counts (default: NULL). Matrix containing counts
-#' of each graphlet (columns) for each ego-network (rows) in the input graph. Columns are labelled with
-#' graphlet IDs and rows are labelled with the ID of the central node in each
-#' ego-network. As well as graphlet counts, each matrix  must contain an
-#' additional column labelled "N" including the node count for
-#' each ego network.
-#' If the \code{graphlet_counts_ref} argument is defined then \code{ref_graph} will not
-#' be used.
+#' @param graphlet_counts_ref Pre-generated reference graphlet counts (default:
+#' NULL). Matrix containing counts of each graphlet (columns) for each
+#' ego-network (rows) in the input graph. Columns are labelled with graphlet IDs
+#' and rows are labelled with the ID of the central node in each ego-network. As
+#' well as graphlet counts, each matrix  must contain an additional column
+#' labelled "N" including the node count for each ego network.
+#' If the \code{graphlet_counts_ref} argument is defined then \code{ref_graph}
+#' will not be used.
 #'
 #' @return Netdis statistics between query graphs for graphlet sizes
 #' up to and including max_graphlet_size.
@@ -513,8 +588,9 @@ netdis_many_to_many <- function(graphs = NULL,
 
   ## ------------------------------------------------------------------------
   # Centred counts
-  # If there are no graphlet_counts_ref, and a number has been passed as ref_graph, treat it as a constant expected
-  # counts value (e.g. if ref_graph = 0 then no centring of counts).
+  # If there are no graphlet_counts_ref, and a number has been passed as
+  # ref_graph, treat it as a constant expected counts value (e.g. if
+  # ref_graph = 0 then no centring of counts).
   if (is.numeric(ref_graph) && length(ref_graph) == 1 && is.null(graphlet_counts_ref)) {
     centred_graphlet_counts <- purrr::map(
       graphlet_counts,
@@ -528,8 +604,8 @@ netdis_many_to_many <- function(graphs = NULL,
     )
 
     ## ------------------------------------------------------------------------
-    # If there are no graphlet_counts_ref, and If a reference graph passed, use it to calculate expected counts for all
-    # query graphs.
+    # If there are no graphlet_counts_ref, and If a reference graph passed, use
+    # it to calculate expected counts for all query graphs.
   } else if (!is.null(ref_graph) || !is.null(graphlet_counts_ref)) {
 
     # Generate ego networks and calculate graphlet counts
@@ -575,7 +651,8 @@ netdis_many_to_many <- function(graphs = NULL,
 
     ## ------------------------------------------------------------------------
     # If no reference passed, calculate expected counts using query networks
-    # themselves. Geometric-Poisson GP #This is the function that creates an error for a graph with three connected nodes.
+    # themselves. Geometric-Poisson GP #This is the function that creates an
+    # error for a graph with three connected nodes.
   } else {
     centred_graphlet_counts <- purrr::map(
       graphlet_counts,
@@ -623,15 +700,18 @@ netdis_many_to_many <- function(graphs = NULL,
 #' Calculate Netdis statistic between two graphs from their Centred Graphlet
 #' Counts (generated using \code{netdis_centred_graphlet_counts}) for graphlets
 #' of size \code{graphlet_size}.
-#' @param centred_graphlet_count_vector_1 Centred Graphlet Counts vector for graph 1
-#' @param centred_graphlet_count_vector_2 Centred Graphlet Counts vector for graph 2
+#' @param centred_graphlet_count_vector_1 Centred Graphlet Counts vector for
+#' graph 1
+#' @param centred_graphlet_count_vector_2 Centred Graphlet Counts vector for
+#' graph 2
 #' @param graphlet_size The size of graphlets to use for the Netdis calculation
 #' (only counts for graphlets of the specified size will be used). The size of
 #' a graphlet is the number of nodes it contains.
 #' @return Netdis statistic calculated using centred counts for graphlets of
 #' the specified size
 #' @export
-netdis <- function(centred_graphlet_count_vector_1, centred_graphlet_count_vector_2,
+netdis <- function(centred_graphlet_count_vector_1,
+                   centred_graphlet_count_vector_2,
                    graphlet_size) {
   # Select subset of centred counts corresponding to graphlets of the
   # specified size
@@ -655,15 +735,19 @@ netdis <- function(centred_graphlet_count_vector_1, centred_graphlet_count_vecto
 #' Calculate Netdis statistic between two graphs from their Centred Graphlet
 #' Counts (generated using \code{netdis_centred_graphlet_counts}) for all
 #' graphlet sizes up to \code{max_graphlet_size}.
-#' @param centred_graphlet_count_vector_1 Centred Graphlet Counts  vector for graph 1
-#' @param centred_graphlet_count_vector_2 Centred Graphlet Counts vector for graph 2
+#' @param centred_graphlet_count_vector_1 Centred Graphlet Counts  vector for
+#' graph 1
+#' @param centred_graphlet_count_vector_2 Centred Graphlet Counts vector for
+#' graph 2
 #' @param max_graphlet_size max graphlet size to calculate Netdis for.
 #' The size of a graphlet is the number of nodes it contains. Netdis is
-#' calculated for all graphlets from size 3 to size max_graphlet_size. Currently only 4 and 5 are supported.
+#' calculated for all graphlets from size 3 to size max_graphlet_size. Currently
+#' only 4 and 5 are supported.
 #' @return Netdis statistic calculated using centred counts for graphlets of
 #' the specified size
 #' @export
-netdis_uptok <- function(centred_graphlet_count_vector_1, centred_graphlet_count_vector_2,
+netdis_uptok <- function(centred_graphlet_count_vector_1,
+                         centred_graphlet_count_vector_2,
                          max_graphlet_size) {
   if ((max_graphlet_size > 5) | (max_graphlet_size < 3)) {
     stop("max_graphlet_size must be 3, 4 or 5.")
@@ -709,8 +793,8 @@ netdis_uptok <- function(centred_graphlet_count_vector_1, centred_graphlet_count
 #' @param binning_fn Function used to bin ego network densities. Only needed if
 #' \code{ref_ego_density_bins} and \code{ref_binned_graphlet_counts} are
 #' \code{NULL}. Takes densities as its single argument, and returns a named list
-#' including keys \code{breaks} (vector of bin edges) and \code{interval_indexes}
-#' (density bin index for each ego network).
+#' including keys \code{breaks} (vector of bin edges) and
+#' \code{interval_indexes} (density bin index for each ego network).
 #'
 #' @param bin_counts_fn Function used to calculate expected graphlet counts in
 #' each density bin. Only needed if  \code{ref_ego_density_bins} and
@@ -723,7 +807,8 @@ netdis_uptok <- function(centred_graphlet_count_vector_1, centred_graphlet_count
 #' Takes \code{ego_networks}, \code{density_bin_breaks},
 #' \code{binned_graphlet_counts}, and \code{max_graphlet_size} as arguments.
 #'
-#' @param max_graphlet_size max graphlet size to calculate centred counts for. Currently only size 4 and 5 are supported.
+#' @param max_graphlet_size max graphlet size to calculate centred counts for.
+#' Currently only size 4 and 5 are supported.
 #'
 #' @return graphlet_counts minus exp_graphlet_counts for graphlets up to size
 #' max_graphlet_size.
@@ -797,8 +882,8 @@ netdis_centred_graphlet_counts <- function(graphlet_counts,
     stop("Invalid combination of ref_ego_density_bins and
          ref_binned_graphlet_counts. Options are:
          - Both NULL: calculate expected counts using query network.
-         - Vector of bin edges and matrix of binned counts: Reference graph values
-         for calculating expected counts.
+         - Vector of bin edges and matrix of binned counts: Reference graph
+         values for calculating expected counts.
          - Constant numeric ref_binned_graphlet_counts: Use as constant expected
          counts value.")
   }
@@ -821,7 +906,8 @@ netdis_centred_graphlet_counts <- function(graphlet_counts,
 #' nummber of ego networks (rows).
 #' @param exp_graphlet_counts Matrix of expected graphlet counts (columns) for a
 #' nummber of ego networks (rows).
-#' @param max_graphlet_size Do the subtraction for graphlets up to this size. Currently only size 4 and 5 are supported.
+#' @param max_graphlet_size Do the subtraction for graphlets up to this size.
+#' Currently only size 4 and 5 are supported.
 #' @export
 netdis_subtract_exp_counts <- function(graphlet_counts,
                                        exp_graphlet_counts,
@@ -847,7 +933,8 @@ netdis_subtract_exp_counts <- function(graphlet_counts,
 #' @param density_binned_reference_counts Reference network graphlet counts for
 #' each density bin.
 #' @param max_graphlet_size Determines the maximum size of graphlets to count.
-#' Only graphlets containing up to \code{max_graphlet_size} nodes are counted. Currently only size 4 and 5 are supported.
+#' Only graphlets containing up to \code{max_graphlet_size} nodes are counted.
+#' Currently only size 4 and 5 are supported.
 #' @param scale_fn Optional function to scale calculated expected counts, taking
 #' \code{graphlet_counts} and \code{max_graphlet_size} as arguments,
 #' and returning a scale factor that the looked up
@@ -882,7 +969,8 @@ netdis_expected_counts <- function(graphlet_counts,
 #'
 #' @param graphlet_counts Node and graphlet counts for an ego network.
 #' @param max_graphlet_size Determines the maximum size of graphlets to count.
-#' Only graphlets containing up to \code{max_graphlet_size} nodes are counted. Currently only size 4 and 5 are supported.
+#' Only graphlets containing up to \code{max_graphlet_size} nodes are counted.
+#' Currently only size 4 and 5 are supported.
 #' @param density_breaks Density values defining bin edges.
 #' @param density_binned_reference_counts Reference network graphlet counts for
 #' each density bin.
@@ -1011,8 +1099,8 @@ density_binned_counts <- function(graphlet_counts,
 #' @param graphlet_counts Graphlet counts for a number of ego_networks.
 #' @param density_interval_indexes Density bin indexes for each ego network in
 #' \code{graphlet_counts}.
-#' @param max_graphlet_size Determines the maximum size of graphlets. Currently only size 4 and 5 are supported.
-#' included in graphlet_counts.
+#' @param max_graphlet_size Determines the maximum size of graphlets. Currently
+#' only size 4 and 5 are supported. included in graphlet_counts.
 exp_counts_bin_gp <- function(bin_idx, graphlet_counts,
                               density_interval_indexes,
                               max_graphlet_size) {
@@ -1066,8 +1154,8 @@ exp_counts_bin_gp <- function(bin_idx, graphlet_counts,
 #' @param graphlet_counts Graphlet counts for a number of ego_networks.
 #' @param density_interval_indexes Density bin index for
 #' each ego network.
-#' @param max_graphlet_size Determines the maximum size of graphlets. Currently only size 4 and 5 are supported.
-#' included in graphlet_counts.
+#' @param max_graphlet_size Determines the maximum size of graphlets. Currently
+#' only size 4 and 5 are supported. included in graphlet_counts.
 #' @export
 density_binned_counts_gp <- function(graphlet_counts,
                                      density_interval_indexes,

From ff2e549e41439ec74ab43e9c4eb778ba05e90d19 Mon Sep 17 00:00:00 2001
From: jack89roberts <jroberts@turing.ac.uk>
Date: Mon, 6 Jun 2022 22:31:28 +0100
Subject: [PATCH 61/84] styler

---
 R/dhist.R            |  4 ++--
 R/measures_net_dis.R | 52 +++++++++++++++++++++++---------------------
 2 files changed, 29 insertions(+), 27 deletions(-)

diff --git a/R/dhist.R b/R/dhist.R
index 4ea7a237..0ff24b9a 100644
--- a/R/dhist.R
+++ b/R/dhist.R
@@ -479,7 +479,7 @@ dhist_variance <- function(dhist) {
     # is located precisely at the lcoation. Therefore cariance (i.e. E[X^2])
     # is the mass-weighted sum of the mean-centred locations
     variance <- sum(dhist$masses * (mean_centred_locations)^2) /
-                sum(dhist$masses)
+      sum(dhist$masses)
   } else {
     # For smoothed histograms, the mass associated with each location is
     # "smoothed" uniformly across a bin centred on the location with
@@ -558,7 +558,7 @@ normalise_dhist_variance <- function(dhist) {
     centred_locations <- (dhist$locations - dhist_mean_location(dhist))
     normalised_centred_locations <- centred_locations / std_dev
     normalised_locations <- normalised_centred_locations +
-                            dhist_mean_location(dhist)
+      dhist_mean_location(dhist)
     dhist <- update_dhist(dhist, locations = normalised_locations)
     # If smoothing_window_width not zero, then update it to reflect the variance
     # normalisation
diff --git a/R/measures_net_dis.R b/R/measures_net_dis.R
index 0ae66fd3..51c689b3 100644
--- a/R/measures_net_dis.R
+++ b/R/measures_net_dis.R
@@ -105,17 +105,17 @@
 #' # Set source directory for Virus PPI graph edge files stored in the
 #' # netdist package.
 #' source_dir <- system.file(
-#'     file.path("extdata", "VRPINS"),
-#'     package = "netdist"
+#'   file.path("extdata", "VRPINS"),
+#'   package = "netdist"
 #' )
 #' # Load query graphs as igraph objects
 #' graph_1 <- read_simple_graph(
-#'     file.path(source_dir, "EBV.txt"),
-#'     format = "ncol"
+#'   file.path(source_dir, "EBV.txt"),
+#'   format = "ncol"
 #' )
 #' graph_2 <- read_simple_graph(
-#'     file.path(source_dir, "ECL.txt"),
-#'     format = "ncol"
+#'   file.path(source_dir, "ECL.txt"),
+#'   format = "ncol"
 #' )
 #'
 #' # Netdis variant using the Geometric Poisson approximation to remove the
@@ -136,14 +136,14 @@
 #' goldstd_2 <- graph.lattice(c(44, 44)) # A reference net
 #'
 #' netdis_one_to_one(
-#'     graph_1 = graph_1,
-#'     graph_2 = graph_2,
-#'     ref_graph = goldstd_1
+#'   graph_1 = graph_1,
+#'   graph_2 = graph_2,
+#'   ref_graph = goldstd_1
 #' )
 #' netdis_one_to_one(
-#'     graph_1 = graph_1,
-#'     graph_2 = graph_2,
-#'     ref_graph = goldstd_2
+#'   graph_1 = graph_1,
+#'   graph_2 = graph_2,
+#'   ref_graph = goldstd_2
 #' )
 #'
 #'
@@ -156,28 +156,28 @@
 #'
 #' # Netdis Geometric-Poisson.
 #' netdis_one_to_one(
-#'     graphlet_counts_1 = props_1,
-#'     graphlet_counts_2 = props_2,
-#'     ref_graph = NULL
+#'   graphlet_counts_1 = props_1,
+#'   graphlet_counts_2 = props_2,
+#'   ref_graph = NULL
 #' )
 #'
 #' # Netdis Zero Expectation.
 #' netdis_one_to_one(
-#'     graphlet_counts_1 = props_1,
-#'     graphlet_counts_2 = props_2,
-#'     ref_graph = 0
+#'   graphlet_counts_1 = props_1,
+#'   graphlet_counts_2 = props_2,
+#'   ref_graph = 0
 #' )
 #'
 #' # Netdis using gold-standard network
 #' netdis_one_to_one(
-#'     graphlet_counts_1 = props_1,
-#'     graphlet_counts_2 = props_2,
-#'     graphlet_counts_ref = props_goldstd_1
+#'   graphlet_counts_1 = props_1,
+#'   graphlet_counts_2 = props_2,
+#'   graphlet_counts_ref = props_goldstd_1
 #' )
 #' netdis_one_to_one(
-#'     graphlet_counts_1 = props_1,
-#'     graphlet_counts_2 = props_2,
-#'     graphlet_counts_ref = props_goldstd_2
+#'   graphlet_counts_1 = props_1,
+#'   graphlet_counts_2 = props_2,
+#'   graphlet_counts_ref = props_goldstd_2
 #' )
 #' @export
 netdis_one_to_one <- function(graph_1 = NULL,
@@ -591,7 +591,9 @@ netdis_many_to_many <- function(graphs = NULL,
   # If there are no graphlet_counts_ref, and a number has been passed as
   # ref_graph, treat it as a constant expected counts value (e.g. if
   # ref_graph = 0 then no centring of counts).
-  if (is.numeric(ref_graph) && length(ref_graph) == 1 && is.null(graphlet_counts_ref)) {
+  if (is.numeric(ref_graph) &&
+    length(ref_graph) == 1 &&
+    is.null(graphlet_counts_ref)) {
     centred_graphlet_counts <- purrr::map(
       graphlet_counts,
       netdis_centred_graphlet_counts,

From 8726fe10b8c9f9d90ed626409d2026aa79468eb3 Mon Sep 17 00:00:00 2001
From: jack89roberts <jack89roberts@users.noreply.github.com>
Date: Mon, 6 Jun 2022 21:34:34 +0000
Subject: [PATCH 62/84] Update documentation

---
 man/as_smoothed_dhist.Rd              |   8 +-
 man/as_unsmoothed_dhist.Rd            |   4 +-
 man/density_binned_counts_gp.Rd       |   4 +-
 man/dhist.Rd                          |  13 +--
 man/dhist_from_obs.Rd                 |   6 +-
 man/dhist_from_obs_slow.Rd            |   6 +-
 man/exp_counts_bin_gp.Rd              |   4 +-
 man/is_dhist.Rd                       |   4 +-
 man/is_numeric_vector_1d.Rd           |   3 +-
 man/netdis.Rd                         |   6 +-
 man/netdis_centred_graphlet_counts.Rd |   7 +-
 man/netdis_expected_counts.Rd         |   3 +-
 man/netdis_expected_counts_ego.Rd     |   3 +-
 man/netdis_many_to_many.Rd            |  48 +++++----
 man/netdis_one_to_many.Rd             |  29 +++---
 man/netdis_one_to_one.Rd              | 136 +++++++++++++++++++-------
 man/netdis_subtract_exp_counts.Rd     |   3 +-
 man/netdis_uptok.Rd                   |   9 +-
 18 files changed, 194 insertions(+), 102 deletions(-)

diff --git a/man/as_smoothed_dhist.Rd b/man/as_smoothed_dhist.Rd
index 1dfc0c14..f81847dd 100644
--- a/man/as_smoothed_dhist.Rd
+++ b/man/as_smoothed_dhist.Rd
@@ -11,11 +11,13 @@ as_smoothed_dhist(dhist, smoothing_window_width)
 
 \item{smoothing_window_width}{If greater than 0, the discrete histogram will
 be treated as having the mass at each location "smoothed" uniformly across
-a bin centred on the location and having width = \code{smoothing_window_width}}
+a bin centred on the location and having
+width = \code{smoothing_window_width}}
 }
 \value{
-A copy of a \code{dhist} object with its \code{smoothing_window_width}
-attribute set  to the value provided \code{smoothing_window_width} parameter.
+A copy of a \code{dhist} object with its
+\code{smoothing_window_width} attribute set to the value provided
+\code{smoothing_window_width} parameter.
 }
 \description{
 Returns a "smoothed" copy of a \code{dhist} object with its
diff --git a/man/as_unsmoothed_dhist.Rd b/man/as_unsmoothed_dhist.Rd
index d4304810..4d2c250c 100644
--- a/man/as_unsmoothed_dhist.Rd
+++ b/man/as_unsmoothed_dhist.Rd
@@ -10,8 +10,8 @@ as_unsmoothed_dhist(dhist)
 \item{dhist}{A discrete histogram as a \code{dhist} object}
 }
 \value{
-A copy of a \code{dhist} object with its \code{smoothing_window_width}
-attribute set to 0.
+A copy of a \code{dhist} object with its
+\code{smoothing_window_width} attribute set to 0.
 }
 \description{
 Returns an "unsmoothed" copy of a \code{dhist} object with its
diff --git a/man/density_binned_counts_gp.Rd b/man/density_binned_counts_gp.Rd
index 0dcd867e..cf722709 100644
--- a/man/density_binned_counts_gp.Rd
+++ b/man/density_binned_counts_gp.Rd
@@ -17,8 +17,8 @@ density_binned_counts_gp(
 \item{density_interval_indexes}{Density bin index for
 each ego network.}
 
-\item{max_graphlet_size}{Determines the maximum size of graphlets. Currently only size 4 and 5 are supported.
-included in graphlet_counts.}
+\item{max_graphlet_size}{Determines the maximum size of graphlets. Currently
+only size 4 and 5 are supported. included in graphlet_counts.}
 }
 \description{
 Calculate expected counts in density bins using the
diff --git a/man/dhist.Rd b/man/dhist.Rd
index 4566df12..701f3385 100644
--- a/man/dhist.Rd
+++ b/man/dhist.Rd
@@ -15,8 +15,8 @@ location}
 
 \item{smoothing_window_width}{If greater than 0, the discrete histogram will
 be treated as having the mass at each location "smoothed" uniformly across
-a bin centred on the location and having width = \code{smoothing_window_width}
-(default = \code{0} - no smoothing)}
+a bin centred on the location and having
+width = \code{smoothing_window_width} (default = \code{0} - no smoothing)}
 
 \item{sorted}{Whether or not to return a discrete histogram with locations
 and masses sorted by ascending mass (default = \code{TRUE})}
@@ -26,13 +26,14 @@ A sparse discrete histogram. Format is a \code{dhist} object, which
 is a list of class \code{dhist} with the following named elements:
 \itemize{
   \item \code{locations}: A 1D numeric vector of discrete locations
-  \item \code{masses}: A 1D numeric vector of the mass present at each location
+  \item \code{masses}: A 1D numeric vector of the mass present at each
+  location
 }
 Note that locations where no mass is present are not included in the returned
 \code{dhist} object. Mass in these discrete histograms is treated as being
-present precisely at the specified location. Discrete histograms should not be used
-for data where observations have been grouped into bins representing ranges
-of observation values.
+present precisely at the specified location. Discrete histograms should not
+be used for data where observations have been grouped into bins representing
+ranges of observation values.
 }
 \description{
 Creates a discrete histogram object of class \code{dhist}, with bin
diff --git a/man/dhist_from_obs.Rd b/man/dhist_from_obs.Rd
index 584da0b8..f2df2ffd 100644
--- a/man/dhist_from_obs.Rd
+++ b/man/dhist_from_obs.Rd
@@ -14,9 +14,11 @@ A sparse discrete histogram. Format is a \code{dhist} object, which
 is a list of class \code{dhist} with the following named elements:
 \itemize{
   \item \code{locations}: A 1D numeric vector of discrete locations
-  \item \code{masses}: A 1D numeric vector of the mass present at each location
+  \item \code{masses}: A 1D numeric vector of the mass present at each
+  location
 }
 }
 \description{
-Generate a sparse discrete histogram from a set of discrete numeric observations
+Generate a sparse discrete histogram from a set of discrete numeric
+observations
 }
diff --git a/man/dhist_from_obs_slow.Rd b/man/dhist_from_obs_slow.Rd
index 39afc81e..e40b78fb 100644
--- a/man/dhist_from_obs_slow.Rd
+++ b/man/dhist_from_obs_slow.Rd
@@ -14,9 +14,11 @@ A sparse discrete histogram. Format is a \code{dhist} object, which
 is a list of class \code{dhist} with the following named elements:
 \itemize{
   \item \code{locations}: A 1D numeric vector of discrete locations
-  \item \code{masses}: A 1D numeric vector of the mass present at each location
+  \item \code{masses}: A 1D numeric vector of the mass present at each
+  location
 }
 }
 \description{
-Generate a sparse discrete histogram from a set of discrete numeric observations
+Generate a sparse discrete histogram from a set of discrete numeric
+observations
 }
diff --git a/man/exp_counts_bin_gp.Rd b/man/exp_counts_bin_gp.Rd
index dc3bc3c5..4ed9ea90 100644
--- a/man/exp_counts_bin_gp.Rd
+++ b/man/exp_counts_bin_gp.Rd
@@ -22,8 +22,8 @@ exp_counts_bin_gp(
 \item{density_interval_indexes}{Density bin indexes for each ego network in
 \code{graphlet_counts}.}
 
-\item{max_graphlet_size}{Determines the maximum size of graphlets. Currently only size 4 and 5 are supported.
-included in graphlet_counts.}
+\item{max_graphlet_size}{Determines the maximum size of graphlets. Currently
+only size 4 and 5 are supported. included in graphlet_counts.}
 }
 \description{
 INTERNAL FUNCTION - DO NOT CALL DIRECTLY
diff --git a/man/is_dhist.Rd b/man/is_dhist.Rd
index 1f550d6f..114e35e0 100644
--- a/man/is_dhist.Rd
+++ b/man/is_dhist.Rd
@@ -15,8 +15,8 @@ is set to \code{dhist} (default = \code{TRUE})}
 }
 \description{
 Checks if the input object is of class \code{dhist}. If \code{fast_check} is
-\code{TRUE} then the only check is whether the object has a class attribute of
-\code{dhist}. If \code{fast_check} is \code{FALSE} (default), then checks
+\code{TRUE} then the only check is whether the object has a class attribute
+of \code{dhist}. If \code{fast_check} is \code{FALSE} (default), then checks
 are also made to ensure that the object has the structure required of a
 \code{dhist} object.
 }
diff --git a/man/is_numeric_vector_1d.Rd b/man/is_numeric_vector_1d.Rd
index 02f41145..9ad1cdd5 100644
--- a/man/is_numeric_vector_1d.Rd
+++ b/man/is_numeric_vector_1d.Rd
@@ -15,7 +15,8 @@ TRUE if input is a 1D numeric vector. FALSE otherwise.
 \description{
 Check if a variable is a 1D numeric vector by checking that:
 \itemize{
-  \item \code{is_numeric(input)}: Input is vector, matrix, array or list of numbers
+  \item \code{is_numeric(input)}: Input is vector, matrix, array or list of
+  numbers
   \item \code{is_null(dim(input))}: Input is not a matrix or array
 }
 }
diff --git a/man/netdis.Rd b/man/netdis.Rd
index cc091357..6f71662a 100644
--- a/man/netdis.Rd
+++ b/man/netdis.Rd
@@ -11,9 +11,11 @@ netdis(
 )
 }
 \arguments{
-\item{centred_graphlet_count_vector_1}{Centred Graphlet Counts vector for graph 1}
+\item{centred_graphlet_count_vector_1}{Centred Graphlet Counts vector for
+graph 1}
 
-\item{centred_graphlet_count_vector_2}{Centred Graphlet Counts vector for graph 2}
+\item{centred_graphlet_count_vector_2}{Centred Graphlet Counts vector for
+graph 2}
 
 \item{graphlet_size}{The size of graphlets to use for the Netdis calculation
 (only counts for graphlets of the specified size will be used). The size of
diff --git a/man/netdis_centred_graphlet_counts.Rd b/man/netdis_centred_graphlet_counts.Rd
index eb3779cd..91b0ac40 100644
--- a/man/netdis_centred_graphlet_counts.Rd
+++ b/man/netdis_centred_graphlet_counts.Rd
@@ -30,8 +30,8 @@ or a constant numeric value to subtract from all graphlet counts.}
 \item{binning_fn}{Function used to bin ego network densities. Only needed if
 \code{ref_ego_density_bins} and \code{ref_binned_graphlet_counts} are
 \code{NULL}. Takes densities as its single argument, and returns a named list
-including keys \code{breaks} (vector of bin edges) and \code{interval_indexes}
-(density bin index for each ego network).}
+including keys \code{breaks} (vector of bin edges) and
+\code{interval_indexes} (density bin index for each ego network).}
 
 \item{bin_counts_fn}{Function used to calculate expected graphlet counts in
 each density bin. Only needed if  \code{ref_ego_density_bins} and
@@ -44,7 +44,8 @@ expected counts for each graphlet in each ego network of the query graphs.
 Takes \code{ego_networks}, \code{density_bin_breaks},
 \code{binned_graphlet_counts}, and \code{max_graphlet_size} as arguments.}
 
-\item{max_graphlet_size}{max graphlet size to calculate centred counts for. Currently only size 4 and 5 are supported.}
+\item{max_graphlet_size}{max graphlet size to calculate centred counts for.
+Currently only size 4 and 5 are supported.}
 }
 \value{
 graphlet_counts minus exp_graphlet_counts for graphlets up to size
diff --git a/man/netdis_expected_counts.Rd b/man/netdis_expected_counts.Rd
index f9ee0968..eed67af7 100644
--- a/man/netdis_expected_counts.Rd
+++ b/man/netdis_expected_counts.Rd
@@ -22,7 +22,8 @@ nummber of ego networks (rows).}
 each density bin.}
 
 \item{max_graphlet_size}{Determines the maximum size of graphlets to count.
-Only graphlets containing up to \code{max_graphlet_size} nodes are counted. Currently only size 4 and 5 are supported.}
+Only graphlets containing up to \code{max_graphlet_size} nodes are counted.
+Currently only size 4 and 5 are supported.}
 
 \item{scale_fn}{Optional function to scale calculated expected counts, taking
 \code{graphlet_counts} and \code{max_graphlet_size} as arguments,
diff --git a/man/netdis_expected_counts_ego.Rd b/man/netdis_expected_counts_ego.Rd
index b565ecab..d2935c27 100644
--- a/man/netdis_expected_counts_ego.Rd
+++ b/man/netdis_expected_counts_ego.Rd
@@ -17,7 +17,8 @@ netdis_expected_counts_ego(
 \item{graphlet_counts}{Node and graphlet counts for an ego network.}
 
 \item{max_graphlet_size}{Determines the maximum size of graphlets to count.
-Only graphlets containing up to \code{max_graphlet_size} nodes are counted. Currently only size 4 and 5 are supported.}
+Only graphlets containing up to \code{max_graphlet_size} nodes are counted.
+Currently only size 4 and 5 are supported.}
 
 \item{density_breaks}{Density values defining bin edges.}
 
diff --git a/man/netdis_many_to_many.Rd b/man/netdis_many_to_many.Rd
index 12f5c057..9313d7e5 100644
--- a/man/netdis_many_to_many.Rd
+++ b/man/netdis_many_to_many.Rd
@@ -29,14 +29,15 @@ obtained by using \code{read_simple_graphs}.}
 graphs.
 2) A simplified \code{igraph} object - used as a reference graph from which
 expected counts are calculated for all query graphs.
-3) NULL (default) - Expected counts will be calculated based on the properties of the
-query graphs themselves. (Geometric-Poisson approximation).}
+3) NULL (default) - Expected counts will be calculated based on the
+properties of the query graphs themselves. (Geometric-Poisson approximation).}
 
 \item{comparisons}{Which comparisons to perform between graphs.
 Can be "many-to-many" (all pairwise combinations) or "one-to-many"
 (compare first graph in graphs to all other graphs.)}
 
-\item{max_graphlet_size}{Generate graphlets up to this size. Currently only 4 (default) and 5 are supported.}
+\item{max_graphlet_size}{Generate graphlets up to this size. Currently only 4
+(default) and 5 are supported.}
 
 \item{neighbourhood_size}{Ego network neighbourhood size (default 2).}
 
@@ -46,24 +47,31 @@ than min_ego_nodes nodes (default 3).}
 \item{min_ego_edges}{Filter ego networks which have fewer
 than min_ego_edges edges (default 1).}
 
-\item{binning_fn}{Function used to bin ego network densities. Takes edge \code{densities}
-as its single argument, and returns a named list including, the input \code{densities}, the resulting bin \code{breaks} (vector of density bin limits), and the vector \code{interval_indexes} which states to what bin each of the individual elements in \code{densities} belongs to.
-ego network). If \code{NULL}, then the method \code{binned_densities_adaptive} with
-\code{min_counts_per_interval = 5} and \code{num_intervals = 100} is used (default: NULL).}
+\item{binning_fn}{Function used to bin ego network densities. Takes edge
+\code{densities} as its single argument, and returns a named list including,
+the input \code{densities}, the resulting bin \code{breaks} (vector of
+density bin limits), and the vector \code{interval_indexes} which states to
+what bin each of the individual elements in \code{densities} belongs to.
+ego network). If \code{NULL}, then the method
+\code{binned_densities_adaptive} with \code{min_counts_per_interval = 5} and
+\code{num_intervals = 100} is used (default: NULL).}
 
 \item{bin_counts_fn}{Function used to calculate expected graphlet counts in
 each density bin. Takes \code{graphlet_counts}, \code{interval_indexes}
-(bin indexes) and \code{max_graphlet_size} as arguments. If \code{bin_counts_fn} is \code{NULL}, (default),
-it will apply either the approach from the original Netdis paper, or the respective Geometric-Poisson
-approximation; depending on the values of \code{ref_graph} and \code{graphlet_counts_ref}.}
+(bin indexes) and \code{max_graphlet_size} as arguments.
+If \code{bin_counts_fn} is \code{NULL}, (default), it will apply either the
+approach from the original Netdis paper, or the respective Geometric-Poisson
+approximation; depending on the values of \code{ref_graph} and
+\code{graphlet_counts_ref}.}
 
 \item{exp_counts_fn}{Function used to map from binned reference counts to
 expected counts for each graphlet in each ego network of the query graphs.
 Takes \code{ego_networks}, \code{density_bin_breaks},
 \code{binned_graphlet_counts}, and \code{max_graphlet_size} as arguments.
 If \code{exp_counts_fn} is \code{NULL}, (default), it will apply
-either the approach from the original Netdis paper, or the respective Geometric-Poisson approximation; depending on the
-values of \code{ref_graph} and \code{graphlet_counts_ref}.}
+either the approach from the original Netdis paper, or the respective
+Geometric-Poisson approximation; depending on the values of \code{ref_graph}
+and \code{graphlet_counts_ref}.}
 
 \item{graphlet_counts}{Pre-generated graphlet counts (default: NULL). If the
 \code{graphlet_counts} argument is defined then \code{graphs} will not be
@@ -75,14 +83,14 @@ ego-network. As well as graphlet counts, each matrix  must contain an
 additional column labelled "N" including the node count for
 each ego network.}
 
-\item{graphlet_counts_ref}{Pre-generated reference graphlet counts (default: NULL). Matrix containing counts
-of each graphlet (columns) for each ego-network (rows) in the input graph. Columns are labelled with
-graphlet IDs and rows are labelled with the ID of the central node in each
-ego-network. As well as graphlet counts, each matrix  must contain an
-additional column labelled "N" including the node count for
-each ego network.
-If the \code{graphlet_counts_ref} argument is defined then \code{ref_graph} will not
-be used.}
+\item{graphlet_counts_ref}{Pre-generated reference graphlet counts (default:
+NULL). Matrix containing counts of each graphlet (columns) for each
+ego-network (rows) in the input graph. Columns are labelled with graphlet IDs
+and rows are labelled with the ID of the central node in each ego-network. As
+well as graphlet counts, each matrix  must contain an additional column
+labelled "N" including the node count for each ego network.
+If the \code{graphlet_counts_ref} argument is defined then \code{ref_graph}
+will not be used.}
 }
 \value{
 Netdis statistics between query graphs for graphlet sizes
diff --git a/man/netdis_one_to_many.Rd b/man/netdis_one_to_many.Rd
index c50cd4f4..bfc2344a 100644
--- a/man/netdis_one_to_many.Rd
+++ b/man/netdis_one_to_many.Rd
@@ -35,7 +35,8 @@ expected counts are calculated for all query graphs.
 3) NULL - Expected counts will be calculated based on the properties of the
 query graphs themselves.}
 
-\item{max_graphlet_size}{Generate graphlets up to this size. Currently only 4 and 5 are supported.}
+\item{max_graphlet_size}{Generate graphlets up to this size. Currently only 4
+and 5 are supported.}
 
 \item{neighbourhood_size}{Ego network neighbourhood size.}
 
@@ -45,25 +46,31 @@ than min_ego_nodes nodes.}
 \item{min_ego_edges}{Filter ego networks which have fewer
 than min_ego_edges edges.}
 
-\item{binning_fn}{Function used to bin ego network densities. Takes edge \code{densities}
-as its single argument, and returns a named list including, the input \code{densities}, the resulting bin \code{breaks} (vector of density bin limits), and the vector \code{interval_indexes} which states to what bin each of the individual elements in \code{densities} belongs to.
-ego network). If \code{NULL}, then the method \code{binned_densities_adaptive} with
-\code{min_counts_per_interval = 5} and \code{num_intervals = 100} is used
-(Default: NULL).}
+\item{binning_fn}{Function used to bin ego network densities. Takes edge
+\code{densities} as its single argument, and returns a named list including,
+the input \code{densities}, the resulting bin \code{breaks} (vector of
+density bin limits), and the vector \code{interval_indexes} which states to
+what bin each of the individual elements in \code{densities} belongs to.
+ego network). If \code{NULL}, then the method
+\code{binned_densities_adaptive} with \code{min_counts_per_interval = 5} and
+\code{num_intervals = 100} is used (Default: NULL).}
 
 \item{bin_counts_fn}{Function used to calculate expected graphlet counts in
 each density bin. Takes \code{graphlet_counts}, \code{interval_indexes}
-(bin indexes) and \code{max_graphlet_size} as arguments. If \code{bin_counts_fn} is \code{NULL}, (default),
- it will apply either the approach from the original Netdis paper, or the respective Geometric-Poisson
-  approximation; depending on the values of \code{ref_graph} and \code{graphlet_counts_ref}.}
+(bin indexes) and \code{max_graphlet_size} as arguments. If
+\code{bin_counts_fn} is \code{NULL}, (default), it will apply either the
+approach from the original Netdis paper, or the respective Geometric-Poisson
+approximation; depending on the values of \code{ref_graph} and
+\code{graphlet_counts_ref}.}
 
 \item{exp_counts_fn}{Function used to map from binned reference counts to
 expected counts for each graphlet in each ego network of the query graphs.
 Takes \code{ego_networks}, \code{density_bin_breaks},
 \code{binned_graphlet_counts}, and \code{max_graphlet_size} as arguments.
 If \code{exp_counts_fn} is \code{NULL}, (default), it will apply
-either the approach from the original Netdis paper, or the respective Geometric-Poisson approximation; depending on the
-values of \code{ref_graph} and \code{graphlet_counts_ref}.}
+either the approach from the original Netdis paper, or the respective
+Geometric-Poisson approximation; depending on the values of \code{ref_graph}
+and \code{graphlet_counts_ref}.}
 
 \item{graphlet_counts_1}{Pre-generated graphlet counts for the first query
 graph. If the \code{graphlet_counts_1} argument is defined then
diff --git a/man/netdis_one_to_one.Rd b/man/netdis_one_to_one.Rd
index 8f3b5208..63474c02 100644
--- a/man/netdis_one_to_one.Rd
+++ b/man/netdis_one_to_one.Rd
@@ -21,19 +21,29 @@ netdis_one_to_one(
 )
 }
 \arguments{
-\item{graph_1}{A simple graph object from the \code{igraph} package. \code{graph_1} can be set to \code{NULL} (default) if \code{graphlet_counts_1} is provided. If both \code{graph_1} and \code{graphlet_counts_1} are not \code{NULL}, then only \code{graphlet_counts_1} will be considered.}
-
-\item{graph_2}{A simple graph object from the \code{igraph} package. \code{graph_2} can be set to \code{NULL} (default) if \code{graphlet_counts_2} is provided. If both \code{graph_2} and \code{graphlet_counts_2} are not \code{NULL}, then only \code{graphlet_counts_2} will be considered.}
+\item{graph_1}{A simple graph object from the \code{igraph} package.
+\code{graph_1} can be set to \code{NULL} (default) if
+\code{graphlet_counts_1} is provided. If both \code{graph_1} and
+\code{graphlet_counts_1} are not \code{NULL}, then only
+\code{graphlet_counts_1} will be considered.}
+
+\item{graph_2}{A simple graph object from the \code{igraph} package.
+\code{graph_2} can be set to \code{NULL} (default) if
+\code{graphlet_counts_2} is provided. If both \code{graph_2} and
+\code{graphlet_counts_2} are not \code{NULL}, then only
+\code{graphlet_counts_2} will be considered.}
 
 \item{ref_graph}{Controls how expected counts are calculated. Either:
 1) A numeric value - used as a constant expected counts value for all query
 graphs .
 2) A simplified \code{igraph} object - used as a reference graph from which
 expected counts are calculated for all query graphs.
-3) NULL (Default) - Used for Netdis-GP, where the expected counts will be calculated based on the properties of the
-query graphs themselves. (Geometric-Poisson approximation).}
+3) NULL (Default) - Used for Netdis-GP, where the expected counts will be
+calculated based on the properties of the query graphs themselves
+(Geometric-Poisson approximation).}
 
-\item{max_graphlet_size}{Generate graphlets up to this size. Currently only 4 (default) and 5 are supported.}
+\item{max_graphlet_size}{Generate graphlets up to this size. Currently only
+4 (default) and 5 are supported.}
 
 \item{neighbourhood_size}{Ego network neighborhood size (default: 2).}
 
@@ -43,25 +53,31 @@ than min_ego_nodes nodes (default: 3).}
 \item{min_ego_edges}{Filter ego networks which have fewer
 than min_ego_edges edges (default: 1).}
 
-\item{binning_fn}{Function used to bin ego network densities. Takes edge \code{densities}
-as its single argument, and returns a named list including, the input \code{densities}, the resulting bin \code{breaks} (vector of density bin limits), and the vector \code{interval_indexes} which states to what bin each of the individual elements in \code{densities} belongs to.
-ego network). If \code{NULL}, then the method \code{binned_densities_adaptive} with
-\code{min_counts_per_interval = 5} and \code{num_intervals = 100} is used
-(Default: NULL).}
+\item{binning_fn}{Function used to bin ego network densities. Takes edge
+\code{densities} as its single argument, and returns a named list including,
+the input \code{densities}, the resulting bin \code{breaks} (vector of
+density bin limits), and the vector \code{interval_indexes} which states to
+what bin each of the individual elements in \code{densities} belongs to.
+ego network). If \code{NULL}, then the method
+\code{binned_densities_adaptive} with \code{min_counts_per_interval = 5}
+and \code{num_intervals = 100} is used (Default: NULL).}
 
 \item{bin_counts_fn}{Function used to calculate expected graphlet counts in
 each density bin. Takes \code{graphlet_counts}, \code{interval_indexes}
-(bin indexes) and \code{max_graphlet_size} as arguments. If \code{bin_counts_fn} is \code{NULL}, (default), it will apply
-either the approach from the original Netdis paper, or the respective Geometric-Poisson approximation; depending on the
-values of \code{ref_graph} and \code{graphlet_counts_ref}.}
+(bin indexes) and \code{max_graphlet_size} as arguments. If
+\code{bin_counts_fn} is \code{NULL}, (default), it will apply either the
+approach from the original Netdis paper, or the respective Geometric-Poisson
+approximation; depending on the values of \code{ref_graph} and
+\code{graphlet_counts_ref}.}
 
 \item{exp_counts_fn}{Function used to map from binned reference counts to
 expected counts for each graphlet in each ego network of the query graphs.
 Takes \code{ego_networks}, \code{density_bin_breaks},
 \code{binned_graphlet_counts}, and \code{max_graphlet_size} as arguments.
 If \code{exp_counts_fn} is \code{NULL}, (default), it will apply
-either the approach from the original Netdis paper, or the respective Geometric-Poisson approximation; depending on the
-values of \code{ref_graph} and \code{graphlet_counts_ref}.}
+either the approach from the original Netdis paper, or the respective
+Geometric-Poisson approximation; depending on the values of \code{ref_graph}
+and \code{graphlet_counts_ref}.}
 
 \item{graphlet_counts_1}{Pre-generated graphlet counts for the first query
 graph.  Matrix containing counts of each graphlet (columns) for
@@ -69,9 +85,9 @@ each ego-network (rows) in the input graph. Columns are labelled with
 graphlet IDs and rows are labelled with the ID of the central node in each
 ego-network. As well as graphlet counts, each matrix  must contain an
 additional column labelled "N" including the node count for
-each ego network. (default: NULL).
-If the \code{graphlet_counts_1} argument is defined then
-\code{graph_1} will not be used. These counts can be obtained with \code{count_graphlets_ego}.}
+each ego network. If the \code{graphlet_counts_1} argument is defined then
+\code{graph_1} will not be used. These counts can be obtained with
+\code{count_graphlets_ego}. (default: NULL).}
 
 \item{graphlet_counts_2}{Pre-generated graphlet counts for the second query
 graph. Matrix containing counts of each graphlet (columns) for
@@ -81,7 +97,8 @@ ego-network. As well as graphlet counts, each matrix  must contain an
 additional column labelled "N" including the node count for
 each ego network. (default: NULL).
 If the \code{graphlet_counts_2} argument is defined then
-\code{graph_2} will not be used. These counts can be obtained with \code{count_graphlets_ego}.}
+\code{graph_2} will not be used. These counts can be obtained with
+\code{count_graphlets_ego}.}
 
 \item{graphlet_counts_ref}{Pre-generated reference graphlet counts.
 Matrix containing counts of each graphlet (columns) for
@@ -89,39 +106,66 @@ each ego-network (rows) in the reference graph. Columns are labelled with
 graphlet IDs and rows are labelled with the ID of the central node in each
 ego-network. As well as graphlet counts, each matrix  must contain an
 additional column labelled "N" including the node count for
-each ego network. (default: NULL).
-If the \code{graphlet_counts_ref} argument is defined then \code{ref_graph} will not
-be used.}
+each ego network. If the \code{graphlet_counts_ref} argument is defined then
+\code{ref_graph} will not be used. (default: NULL).}
 }
 \value{
 Netdis statistics between graph_1 and graph_2 for graphlet sizes
 up to and including max_graphlet_size.
 }
 \description{
-Calculates the different variants of the network dissimilarity statistic Netdis between two graphs. The variants currently supported are Netdis using a gold-standard network, Netdis using no expecations (\code{ref_graph = 0}), and Netdis using a Geometric Poisson  approximation for the expectation (\code{ref_graph = NULL}).
+Calculates the different variants of the network dissimilarity statistic
+Netdis between two graphs. The variants currently supported are Netdis using
+a gold-standard network, Netdis using no expecations (\code{ref_graph = 0}),
+and Netdis using a Geometric Poisson  approximation for the expectation
+(\code{ref_graph = NULL}).
 }
 \examples{
 require(netdist)
 require(igraph)
-# Set source directory for Virus PPI graph edge files stored in the netdist package.
-source_dir <- system.file(file.path("extdata", "VRPINS"), package = "netdist")
+# Set source directory for Virus PPI graph edge files stored in the
+# netdist package.
+source_dir <- system.file(
+  file.path("extdata", "VRPINS"),
+  package = "netdist"
+)
 # Load query graphs as igraph objects
-graph_1 <- read_simple_graph(file.path(source_dir, "EBV.txt"), format = "ncol")
-graph_2 <- read_simple_graph(file.path(source_dir, "ECL.txt"), format = "ncol")
+graph_1 <- read_simple_graph(
+  file.path(source_dir, "EBV.txt"),
+  format = "ncol"
+)
+graph_2 <- read_simple_graph(
+  file.path(source_dir, "ECL.txt"),
+  format = "ncol"
+)
 
-# Netdis variant using the Geometric Poisson approximation to remove the background expectation of each network.
-netdis_one_to_one(graph_1 = graph_1, graph_2 = graph_2, ref_graph = NULL) # This option will focus on detecting more general and global discrepancies between the ego-network structures.
+# Netdis variant using the Geometric Poisson approximation to remove the
+# background expectation of each network. This option will focus on detecting
+# more general and global discrepancies between the ego-network structures.
+netdis_one_to_one(graph_1 = graph_1, graph_2 = graph_2, ref_graph = NULL)
 
-# Comparing the networks via their observed ego counts without centering them (equivalent to using expectation equal to zero). This option, will focus on detecting small discrepancies.
+# Comparing the networks via their observed ego counts without centering them
+# (equivalent to using expectation equal to zero). This option, will focus on
+# detecting small discrepancies.
 netdis_one_to_one(graph_1 = graph_1, graph_2 = graph_2, ref_graph = 0)
 
-# Example of the use of netdis with a reference graph.This option will focus on detecting discrepancies between the networks relative to the ego-network structure of the reference network / gold-standard.
+# Example of the use of netdis with a reference graph.This option will focus
+# on detecting discrepancies between the networks relative to the ego-network
+# structure of the reference network / gold-standard.
 # Two lattice networks of different sizes are used for this example.
 goldstd_1 <- graph.lattice(c(8, 8)) # A reference net
 goldstd_2 <- graph.lattice(c(44, 44)) # A reference net
 
-netdis_one_to_one(graph_1 = graph_1, graph_2 = graph_2, ref_graph = goldstd_1)
-netdis_one_to_one(graph_1 = graph_1, graph_2 = graph_2, ref_graph = goldstd_2)
+netdis_one_to_one(
+  graph_1 = graph_1,
+  graph_2 = graph_2,
+  ref_graph = goldstd_1
+)
+netdis_one_to_one(
+  graph_1 = graph_1,
+  graph_2 = graph_2,
+  ref_graph = goldstd_2
+)
 
 
 # Providing pre-calculated subgraph counts.
@@ -132,12 +176,28 @@ props_goldstd_1 <- count_graphlets_ego(graph = goldstd_1)
 props_goldstd_2 <- count_graphlets_ego(graph = goldstd_2)
 
 # Netdis Geometric-Poisson.
-netdis_one_to_one(graphlet_counts_1 = props_1, graphlet_counts_2 = props_2, ref_graph = NULL)
+netdis_one_to_one(
+  graphlet_counts_1 = props_1,
+  graphlet_counts_2 = props_2,
+  ref_graph = NULL
+)
 
 # Netdis Zero Expectation.
-netdis_one_to_one(graphlet_counts_1 = props_1, graphlet_counts_2 = props_2, ref_graph = 0)
+netdis_one_to_one(
+  graphlet_counts_1 = props_1,
+  graphlet_counts_2 = props_2,
+  ref_graph = 0
+)
 
 # Netdis using gold-standard network
-netdis_one_to_one(graphlet_counts_1 = props_1, graphlet_counts_2 = props_2, graphlet_counts_ref = props_goldstd_1)
-netdis_one_to_one(graphlet_counts_1 = props_1, graphlet_counts_2 = props_2, graphlet_counts_ref = props_goldstd_2)
+netdis_one_to_one(
+  graphlet_counts_1 = props_1,
+  graphlet_counts_2 = props_2,
+  graphlet_counts_ref = props_goldstd_1
+)
+netdis_one_to_one(
+  graphlet_counts_1 = props_1,
+  graphlet_counts_2 = props_2,
+  graphlet_counts_ref = props_goldstd_2
+)
 }
diff --git a/man/netdis_subtract_exp_counts.Rd b/man/netdis_subtract_exp_counts.Rd
index c03aa42f..3540c12f 100644
--- a/man/netdis_subtract_exp_counts.Rd
+++ b/man/netdis_subtract_exp_counts.Rd
@@ -17,7 +17,8 @@ nummber of ego networks (rows).}
 \item{exp_graphlet_counts}{Matrix of expected graphlet counts (columns) for a
 nummber of ego networks (rows).}
 
-\item{max_graphlet_size}{Do the subtraction for graphlets up to this size. Currently only size 4 and 5 are supported.}
+\item{max_graphlet_size}{Do the subtraction for graphlets up to this size.
+Currently only size 4 and 5 are supported.}
 }
 \description{
 Subtract expected graphlet counts from actual graphlet counts.
diff --git a/man/netdis_uptok.Rd b/man/netdis_uptok.Rd
index 3ab881c9..98ddf371 100644
--- a/man/netdis_uptok.Rd
+++ b/man/netdis_uptok.Rd
@@ -11,13 +11,16 @@ netdis_uptok(
 )
 }
 \arguments{
-\item{centred_graphlet_count_vector_1}{Centred Graphlet Counts  vector for graph 1}
+\item{centred_graphlet_count_vector_1}{Centred Graphlet Counts  vector for
+graph 1}
 
-\item{centred_graphlet_count_vector_2}{Centred Graphlet Counts vector for graph 2}
+\item{centred_graphlet_count_vector_2}{Centred Graphlet Counts vector for
+graph 2}
 
 \item{max_graphlet_size}{max graphlet size to calculate Netdis for.
 The size of a graphlet is the number of nodes it contains. Netdis is
-calculated for all graphlets from size 3 to size max_graphlet_size. Currently only 4 and 5 are supported.}
+calculated for all graphlets from size 3 to size max_graphlet_size. Currently
+only 4 and 5 are supported.}
 }
 \value{
 Netdis statistic calculated using centred counts for graphlets of

From 644ef1110d70978a0be0702331d6b1363cc54f52 Mon Sep 17 00:00:00 2001
From: jack89roberts <jroberts@turing.ac.uk>
Date: Tue, 7 Jun 2022 12:17:01 +0100
Subject: [PATCH 63/84] fix line length R and tests directories

---
 R/PlottingFunctions.R                   |   85 +-
 R/RcppExports.R                         |    8 +-
 R/emd.R                                 |   58 +-
 R/graph_binning.R                       |    4 +-
 R/measures_net_emd.R                    |  289 +++--
 R/net_emd_speed_benchmark.R             |   14 +-
 R/orca_interface.R                      |   52 +-
 tests/testthat/test_dhist.R             | 1165 ++++++++++--------
 tests/testthat/test_emd.R               |   21 +-
 tests/testthat/test_graph_binning.R     |  185 ++-
 tests/testthat/test_measures_net_dis.R  | 1157 ++++++++++--------
 tests/testthat/test_measures_net_emd.R  |  167 ++-
 tests/testthat/test_orca_interface.R    | 1484 +++++++++++------------
 tests/testthat/test_utility_functions.R |  216 +++-
 14 files changed, 2824 insertions(+), 2081 deletions(-)

diff --git a/R/PlottingFunctions.R b/R/PlottingFunctions.R
index b4f9dbf8..5219dee3 100644
--- a/R/PlottingFunctions.R
+++ b/R/PlottingFunctions.R
@@ -1,28 +1,55 @@
 
 #' Heatmap of Netdis many-to-many comparisons
 #'
-#' Provides a heatmap and dendrogram for the network comparisons via \code{pheatmap}.
+#' Provides a heatmap and dendrogram for the network comparisons via
+#' \code{pheatmap}.
 #'
 #' @param netdislist Default output of \code{netdis_many_to_many}.
 #'
-#' @param whatrow Selection of the row in \code{netdis_many_to_many$comp_spec} to be used for plotting.
+#' @param whatrow Selection of the row in \code{netdis_many_to_many$comp_spec}
+#' to be used for plotting.
 #'
-#' @param clustering_method Clustering method as allowed in the \code{pheatmap} function from the \code{pheatmap} package. The dendrogram will appear if \code{docluster} is TRUE (default).
+#' @param clustering_method Clustering method as allowed in the \code{pheatmap}
+#' function from the \code{pheatmap} package. The dendrogram will appear if
+#' \code{docluster} is TRUE (default).
 #'
 #' @param main Title of the plot.
 #'
-#' @param docluster controls the order of the rows and columns. If TRUE (default) the rows and columns will be reordered to create the dendrogram. If FALSE, then only the heatmap is drawn.
+#' @param docluster controls the order of the rows and columns. If TRUE
+#' (default) the rows and columns will be reordered to create the dendrogram. If
+#' FALSE, then only the heatmap is drawn.
 #'
-#' @return Provides a heatmap and dendrogram for the network comparisons via \code{pheatmap}.
+#' @return Provides a heatmap and dendrogram for the network comparisons via
+#' \code{pheatmap}.
 #' @export
 
-netdis.plot <- function(netdislist, whatrow = c(1, 2)[2], clustering_method = "ward.D", main = "Nedis", docluster = TRUE) {
-  adjmat <- cross_comp_to_matrix(measure = netdislist$netdis[whatrow, ], cross_comparison_spec = netdislist$comp_spec)
+netdis.plot <- function(netdislist,
+                        whatrow = c(1, 2)[2],
+                        clustering_method = "ward.D",
+                        main = "Nedis",
+                        docluster = TRUE) {
+  adjmat <- cross_comp_to_matrix(
+    measure = netdislist$netdis[whatrow, ],
+    cross_comparison_spec = netdislist$comp_spec
+  )
   vnames <- rownames(adjmat)
 
   legend1 <- seq(min(adjmat), max(adjmat), length.out = 5)
   levels1 <- round(legend1, digits = 2)
-  pheatmap::pheatmap(mat = as.dist(adjmat), cluster_rows = docluster, cluster_cols = docluster, clustering_method = clustering_method, angle_col = 45, main = main, treeheight_row = 80, labels_row = vnames, labels_col = vnames, display_numbers = TRUE, legend_breaks = legend1, legend_labels = levels1)
+  pheatmap::pheatmap(
+    mat = as.dist(adjmat),
+    cluster_rows = docluster,
+    cluster_cols = docluster,
+    clustering_method = clustering_method,
+    angle_col = 45,
+    main = main,
+    treeheight_row = 80,
+    labels_row = vnames,
+    labels_col = vnames,
+    display_numbers = TRUE,
+    legend_breaks = legend1,
+    legend_labels = levels1
+  )
 }
 
 
@@ -30,26 +57,52 @@ netdis.plot <- function(netdislist, whatrow = c(1, 2)[2], clustering_method = "w
 
 #' Heatmap of NetEmd many-to-many comparisons
 #'
-#' Provides a heatmap and dendrogram for the network comparisons via \code{pheatmap}.
+#' Provides a heatmap and dendrogram for the network comparisons via
+#' \code{pheatmap}.
 #'
 #' @param netdislist Default output of \code{netdis_many_to_many}.
 #'
-#' @param whatrow Selection of the row in \code{netdis_many_to_many$comp_spec} to be used for plotting.
+#' @param whatrow Selection of the row in \code{netdis_many_to_many$comp_spec}
+#' to be used for plotting.
 #'
-#' @param clustering_method Clustering method as allowed in the \code{pheatmap} function from the \code{pheatmap} package. The dendrogram will appear if \code{docluster} is TRUE (default).
+#' @param clustering_method Clustering method as allowed in the \code{pheatmap}
+#' function from the \code{pheatmap} package. The dendrogram will appear if
+#' \code{docluster} is TRUE (default).
 #'
 #' @param main Title of the plot.
 #'
-#' @param docluster controls the order of the rows and columns. If TRUE (default) the rows and columns will be reordered to create the dendrogram. If FALSE, then only the heatmap is drawn.
+#' @param docluster controls the order of the rows and columns. If TRUE
+#' (default) the rows and columns will be reordered to create the dendrogram. If
+#' FALSE, then only the heatmap is drawn.
 #'
-#' @return Provides a heat map and dendrogram for the network comparisons via \code{pheatmap}.
+#' @return Provides a heat map and dendrogram for the network comparisons via
+#' \code{pheatmap}.
 #' @export
 
-netemd.plot <- function(netemdlist, clustering_method = "ward.D", main = "NetEmd", docluster = TRUE) {
-  adjmat <- cross_comp_to_matrix(measure = netemdlist$netemds, cross_comparison_spec = netemdlist$comp_spec)
+netemd.plot <- function(netemdlist,
+                        clustering_method = "ward.D",
+                        main = "NetEmd",
+                        docluster = TRUE) {
+  adjmat <- cross_comp_to_matrix(
+    measure = netemdlist$netemds,
+    cross_comparison_spec = netemdlist$comp_spec
+  )
   vnames <- rownames(adjmat)
 
   legend1 <- seq(min(adjmat), max(adjmat), length.out = 5)
   levels1 <- round(legend1, digits = 2)
-  pheatmap::pheatmap(mat = as.dist(adjmat), cluster_rows = docluster, cluster_cols = docluster, clustering_method = clustering_method, angle_col = 45, main = main, treeheight_row = 80, labels_row = vnames, labels_col = vnames, display_numbers = TRUE, legend_breaks = legend1, legend_labels = levels1)
+  pheatmap::pheatmap(
+    mat = as.dist(adjmat),
+    cluster_rows = docluster,
+    cluster_cols = docluster,
+    clustering_method = clustering_method,
+    angle_col = 45,
+    main = main,
+    treeheight_row = 80,
+    labels_row = vnames,
+    labels_col = vnames,
+    display_numbers = TRUE,
+    legend_breaks = legend1,
+    legend_labels = levels1
+  )
 }
diff --git a/R/RcppExports.R b/R/RcppExports.R
index b7d0c2dc..f1932442 100644
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -22,6 +22,12 @@ counts_from_observations <- function(features) {
 #'
 #' @export
 emd_fast_no_smoothing <- function(locations1, values1, locations2, values2) {
-    .Call(`_netdist_emd_fast_no_smoothing`, locations1, values1, locations2, values2)
+    .Call(
+        `_netdist_emd_fast_no_smoothing`,
+        locations1,
+        values1,
+        locations2,
+        values2
+    )
 }
 
diff --git a/R/emd.R b/R/emd.R
index da99cb36..02208756 100644
--- a/R/emd.R
+++ b/R/emd.R
@@ -9,8 +9,8 @@
 #' offsets for each pair of histograms. Default is "optimise" to use
 #' R's built-in \code{stats::optimise} method to efficiently find the offset
 #' with the minimal EMD. However, this is not guaranteed to find the global
-#' minimum if multiple local minima EMDs exist. You can alternatively specify the
-#' "exhaustive" method, which will exhaustively evaluate the EMD between the
+#' minimum if multiple local minima EMDs exist. You can alternatively specify
+#' the "exhaustive" method, which will exhaustively evaluate the EMD between the
 #' histograms at all offsets that are candidates for the minimal EMD.
 #' @return Earth Mover's Distance between the two discrete histograms
 #' @export
@@ -43,7 +43,8 @@ min_emd <- function(dhist1, dhist2, method = "optimise") {
 #' @export
 min_emd_optimise_fast <- function(dhist1, dhist2) {
   # Can we run the C++ fast implementation (only works with no smoothing)?
-  if ((dhist1$smoothing_window_width == 0) && (dhist2$smoothing_window_width == 0)) {
+  if ((dhist1$smoothing_window_width == 0) &&
+    (dhist2$smoothing_window_width == 0)) {
     # Determine minimum and maximum offset of range in which histograms overlap
     # (based on sliding histogram 1)
     min_offset <- min(dhist2$locations) - max(dhist1$locations)
@@ -147,10 +148,10 @@ min_emd_optimise <- function(dhist1, dhist2) {
 #' to ensure that the offset with the global minimum EMD is found.
 #'
 #' This is because of the piece-wise linear nature of the two ECMFs. Between any
-#' two offsets where knots from the two ECMFs align, EMD will be either constant,
-#' or uniformly increasing or decreasing. Therefore, there the EMD between two
-#' sets of aligned knots cannot be smaller than the EMD at one or other of the
-#' bounding offsets.
+#' two offsets where knots from the two ECMFs align, EMD will be either
+#' constant, or uniformly increasing or decreasing. Therefore, there the EMD
+#' between two sets of aligned knots cannot be smaller than the EMD at one or
+#' other of the bounding offsets.
 #' @param dhist1 A \code{dhist} discrete histogram object
 #' @param dhist2 A \code{dhist} discrete histogram object
 #' @return Earth Mover's Distance between the two discrete histograms
@@ -240,15 +241,16 @@ emd <- function(dhist1, dhist2) {
 #' Distance between the two histograms by summing the absolute difference
 #' between the two cumulative histograms.
 #' @references
-#' Calculation of the Wasserstein Distance Between Probability Distributions on the Line
-#' S. S. Vallender, Theory of Probability & Its Applications 1974 18:4, 784-786
-#' \url{http://dx.doi.org/10.1137/1118101}
+#' Calculation of the Wasserstein Distance Between Probability Distributions on
+#' the Line S. S. Vallender, Theory of Probability & Its Applications 1974 18:4,
+#' 784-786 \url{http://dx.doi.org/10.1137/1118101}
 #' @param dhist1 A discrete histogram as a \code{dhist} object
 #' @param dhist2 A discrete histogram as a \code{dhist} object
 #' @return Earth Mover's Distance between the two input histograms
 #' @export
 emd_cs <- function(dhist1, dhist2) {
-  # Generate Empirical Cumulative Mass Functions (ECMFs) for each discrete histogram
+  # Generate Empirical Cumulative Mass Functions (ECMFs) for each discrete
+  # histogram
   ecmf1 <- dhist_ecmf(dhist1)
   ecmf2 <- dhist_ecmf(dhist2)
   # Calculate the area between the two ECMFs
@@ -278,10 +280,16 @@ emd_lp <- function(bin_masses1, bin_masses2, bin_centres1, bin_centres2) {
   # the bin_mass and bin_centre vectors for each histogram must have the same
   # length.
   if (length(bin_centres1) != num_bins1) {
-    stop("Number of bin masses and bin centres provided for histogram 1 must be equal")
+    stop(
+      "Number of bin masses and bin centres provided for histogram 1 must ",
+      "be equal"
+    )
   }
   if (length(bin_centres2) != num_bins2) {
-    stop("Number of bin masses and bin centres provided for histogram 2 must be equal")
+    stop(
+      "Number of bin masses and bin centres provided for histogram 2 must ",
+      "be equal"
+    )
   }
 
   # Generate cost matrix
@@ -290,10 +298,16 @@ emd_lp <- function(bin_masses1, bin_masses2, bin_centres1, bin_centres2) {
   # Linear Programming solver requires all bin masses and transportation costs
   # to be integers to generate correct answer
   if (!isTRUE(all.equal(bin_masses1, floor(bin_masses1)))) {
-    stop("All bin masses for histogram 1 must be integers for accurate Linear Programming solution")
+    stop(
+      "All bin masses for histogram 1 must be integers for accurate Linear ",
+      "Programming solution"
+    )
   }
   if (!isTRUE(all.equal(bin_masses2, floor(bin_masses2)))) {
-    stop("All bin masses for histogram 2 must be integers for accurate Linear Programming solution")
+    stop(
+      "All bin masses for histogram 2 must be integers for accurate ",
+      "Linear Programming solution"
+    )
   }
   if (!isTRUE(all.equal(cost_mat, floor(cost_mat)))) {
     stop("All costs must be integers for accurate Linear Programming solution")
@@ -319,8 +333,18 @@ cost_matrix <- function(bin_centres1, bin_centres2) {
   # Calculate distances between all bins in network 1 and all bins in network 2
   num_bins1 <- length(bin_centres1)
   num_bins2 <- length(bin_centres2)
-  loc_mat1 <- matrix(bin_centres1, nrow = num_bins1, ncol = num_bins2, byrow = FALSE)
-  loc_mat2 <- matrix(bin_centres2, nrow = num_bins1, ncol = num_bins2, byrow = TRUE)
+  loc_mat1 <- matrix(
+    bin_centres1,
+    nrow = num_bins1,
+    ncol = num_bins2,
+    byrow = FALSE
+  )
+  loc_mat2 <- matrix(
+    bin_centres2,
+    nrow = num_bins1,
+    ncol = num_bins2,
+    byrow = TRUE
+  )
   cost_mat <- abs(loc_mat1 - loc_mat2)
   return(cost_mat)
 }
diff --git a/R/graph_binning.R b/R/graph_binning.R
index 482fc524..e75da79b 100644
--- a/R/graph_binning.R
+++ b/R/graph_binning.R
@@ -10,7 +10,9 @@
 binned_densities_adaptive <- function(densities,
                                       min_counts_per_interval,
                                       num_intervals) {
-  if (length(densities) < min_counts_per_interval) min_counts_per_interval <- length(densities)
+  if (length(densities) < min_counts_per_interval) {
+    min_counts_per_interval <- length(densities)
+  }
   breaks <- adaptive_breaks(densities,
     min_count = min_counts_per_interval,
     breaks = num_intervals
diff --git a/R/measures_net_emd.R b/R/measures_net_emd.R
index ddcd1592..e1688ec1 100755
--- a/R/measures_net_emd.R
+++ b/R/measures_net_emd.R
@@ -1,23 +1,37 @@
 #' NetEMD Network Earth Mover's Distance between a pair of networks.
 #'
 #' Calculates the network Earth Mover's Distance (EMD) between
-#' two sets of network features. This is done by individually normalising the distribution
-#' of each feature so that they have unit mass and unit variance. Then the minimun EMD between the same pair of features (one for each corresponding graph) is calculated by considering all possible translations of the feature distributions. Finally the average over all features is reported.
+#' two sets of network features. This is done by individually normalising the
+#' distribution of each feature so that they have unit mass and unit variance.
+#' Then the minimun EMD between the same pair of features (one for each
+#' corresponding graph) is calculated by considering all possible translations
+#' of the feature distributions. Finally the average over all features is
+#' reported.
 #' This is calculated as follows:
 #'   1. Normalise each feature histogram to have unit mass and unit variance.
-#'   2. For each feature, find the minimum EMD between each pair of histograms considering all possible  histogram translations.
+#'   2. For each feature, find the minimum EMD between each pair of histograms
+#'      considering all possible  histogram translations.
 #'   3. Take the average minimum EMD across all features.
-#' @param graph_1 A network/graph object from the \code{igraph} package. \code{graph_1} can be set to \code{NULL} (default) if \code{dhists_1} is provided.
-#' @param graph_2 A network/graph object from the \code{igraph} package. \code{graph_2} can be set to \code{NULL} (default) if \code{dhists_2} is provided.
-#' @param dhists_1 Either, a \code{dhist} discrete histogram object, or list of such objects, or a matrix of network features (each column representing a feature). \code{dhists_1} can be set to \code{NULL} (default) if \code{graph_1} is provided. A \code{dhist} object can be obtained from \code{graph_features_to_histograms}.
+#' @param graph_1 A network/graph object from the \code{igraph} package.
+#' \code{graph_1} can be set to \code{NULL} (default) if \code{dhists_1} is
+#' provided.
+#' @param graph_2 A network/graph object from the \code{igraph} package.
+#' \code{graph_2} can be set to \code{NULL} (default) if \code{dhists_2} is
+#' provided.
+#' @param dhists_1 Either, a \code{dhist} discrete histogram object, or list of
+#' such objects, or a matrix of network features (each column representing a
+#' feature). \code{dhists_1} can be set to \code{NULL} (default) if
+#' \code{graph_1} is provided. A \code{dhist} object can be obtained from
+#' \code{graph_features_to_histograms}.
 #' @param dhists_2 Same as \code{dhists_1}.
-#' @param method The method to be used to find the minimum EMD across all potential
-#' offsets for each pair of histograms. Default is "optimise" to use
+#' @param method The method to be used to find the minimum EMD across all
+#' potential offsets for each pair of histograms. Default is "optimise" to use
 #' R's built-in \code{stats::optimise} method to efficiently find the offset
 #' with the minimal EMD. However, this is not guaranteed to find the global
-#' minimum if multiple local minima EMDs exist. You can alternatively specify the
-#' "exhaustive" method, which will exhaustively evaluate the EMD between the
-#' histograms at all offsets that are candidates for the minimal EMD at the cost of computational time.
+#' minimum if multiple local minima EMDs exist. You can alternatively specify
+#' the "exhaustive" method, which will exhaustively evaluate the EMD between the
+#' histograms at all offsets that are candidates for the minimal EMD at the cost
+#' of computational time.
 #' @param return_details Logical indicating whether to return the individual
 #' minimal EMDs and associated offsets for all pairs of histograms.
 #' @param smoothing_window_width Width of "top-hat" smoothing window to apply to
@@ -26,29 +40,40 @@
 #' \code{smoothing_window_width} that is appropriate for the discrete domain
 #' (e.g.for the integer domain a width of 1 is the natural choice).
 #' @param feature_type Type of graphlet-based feature to count: "graphlet"
-#' counts the number of graphlets each node participates in; "orbit" (default) calculates
-#' the number of graphlet orbits each node participates in.
+#' counts the number of graphlets each node participates in; "orbit" (default)
+#' calculates the number of graphlet orbits each node participates in.
 #' @param max_graphlet_size Determines the maximum size of graphlets to count.
 #' Only graphlets containing up to \code{max_graphlet_size} nodes will be
 #' counted. Possible values are 4, and 5 (default).
 #' @param ego_neighbourhood_size The number of steps from the source node to
-#' include nodes for each ego-network. NetEmd was proposed for individual nodes alone, hence the default value is 0.
-#' @return NetEMD measure for the two sets of discrete histograms (or graphs). If
-#' (\code{return_details = FALSE}) then a list with the following named elements is returned
-#' \code{net_emd}: the NetEMD for the set of histogram pairs (or graphs), \code{min_emds}:
-#' the minimal EMD for each pair of histograms, \code{min_offsets}: the associated
-#' offsets giving the minimal EMD for each pair of histograms
+#' include nodes for each ego-network. NetEmd was proposed for individual nodes
+#' alone, hence the default value is 0.
+#' @return NetEMD measure for the two sets of discrete histograms (or graphs).
+#' If (\code{return_details = FALSE}) then a list with the following named
+#' elements is returned \code{net_emd}: the NetEMD for the set of histogram
+#' pairs (or graphs), \code{min_emds}: the minimal EMD for each pair of
+#' histograms, \code{min_offsets}: the associated offsets giving the minimal EMD
+#' for each pair of histograms
 #' @examples
 #' require(igraph)
 #' graph_1 <- graph.lattice(c(8, 8))
 #' graph_2 <- graph.lattice(c(44, 44))
-#' netemd_one_to_one(graph_1 = graph_1, graph_2 = graph_2, feature_type = "orbit", max_graphlet_size = 5)
+#' netemd_one_to_one(
+#'   graph_1 = graph_1,
+#'   graph_2 = graph_2,
+#'   feature_type = "orbit",
+#'   max_graphlet_size = 5
+#' )
 #'
 #' # Providing a matrix of network features
 #' props_a <- count_orbits_per_node(graph = graph_1, max_graphlet_size = 5)
 #' props_b <- count_orbits_per_node(graph = graph_2, max_graphlet_size = 5)
 #'
-#' netemd_one_to_one(dhists_1 = props_a, dhists_2 = props_b, smoothing_window_width = 1)
+#' netemd_one_to_one(
+#'   dhists_1 = props_a,
+#'   dhists_2 = props_b,
+#'   smoothing_window_width = 1
+#' )
 #'
 #' # Providing the network features as lists of dhist objects
 #' dhists_1 <- graph_features_to_histograms(props_a)
@@ -59,22 +84,59 @@
 #'
 #' # A variation of NetEmd: Using the Laplacian spectrum
 #' # Laplacian
-#' Lapg_1 <- igraph::laplacian_matrix(graph = graph_1, normalized = FALSE, sparse = FALSE)
-#' Lapg_2 <- igraph::laplacian_matrix(graph = graph_2, normalized = FALSE, sparse = FALSE)
+#' Lapg_1 <- igraph::laplacian_matrix(
+#'   graph = graph_1,
+#'   normalized = FALSE,
+#'   sparse = FALSE
+#' )
+#' Lapg_2 <- igraph::laplacian_matrix(
+#'   graph = graph_2,
+#'   normalized = FALSE,
+#'   sparse = FALSE
+#' )
 #'
 #' # Normalized Laplacian
-#' NLapg_1 <- igraph::laplacian_matrix(graph = graph_1, normalized = TRUE, sparse = FALSE)
-#' NLapg_2 <- igraph::laplacian_matrix(graph = graph_2, normalized = TRUE, sparse = FALSE)
+#' NLapg_1 <- igraph::laplacian_matrix(
+#'   graph = graph_1,
+#'   normalized = TRUE,
+#'   sparse = FALSE
+#' )
+#' NLapg_2 <- igraph::laplacian_matrix(
+#'   graph = graph_2,
+#'   normalized = TRUE,
+#'   sparse = FALSE
+#' )
 #'
 #' # Spectra (This may take a couple of minutes).
-#' props_1 <- cbind(L.Spectra = eigen(Lapg_1)$values, NL.Spectra = eigen(NLapg_1)$values)
-#' props_2 <- cbind(L.Spectra = eigen(Lapg_2)$values, NL.Spectra = eigen(NLapg_2)$values)
+#' props_1 <- cbind(
+#'   L.Spectra = eigen(Lapg_1)$values,
+#'   NL.Spectra = eigen(NLapg_1)$values
+#' )
+#' props_2 <- cbind(
+#'   L.Spectra = eigen(Lapg_2)$values,
+#'   NL.Spectra = eigen(NLapg_2)$values
+#' )
 #'
-#' netemd_one_to_one(dhists_1 = props_1, dhists_2 = props_2, smoothing_window_width = 0) # Use of smoothing window 1 is given for discrete integer distributions. If the network features are considered continuous variables smoothing_window_width equal to zero is recommended.
+#' # Use of smoothing window 1 is given for discrete integer distributions. If
+#' # the network features are considered continuous variables
+#' # smoothing_window_width equal to zero is recommended.
+#' netemd_one_to_one(
+#'   dhists_1 = props_1,
+#'   dhists_2 = props_2,
+#'   smoothing_window_width = 0
+#' )
 #'
 #' @export
-netemd_one_to_one <- function(graph_1 = NULL, graph_2 = NULL, dhists_1 = NULL, dhists_2 = NULL, method = "optimise",
-                              return_details = FALSE, smoothing_window_width = 0, feature_type = "orbit", max_graphlet_size = 5, ego_neighbourhood_size = 0) {
+netemd_one_to_one <- function(graph_1 = NULL,
+                              graph_2 = NULL,
+                              dhists_1 = NULL,
+                              dhists_2 = NULL,
+                              method = "optimise",
+                              return_details = FALSE,
+                              smoothing_window_width = 0,
+                              feature_type = "orbit",
+                              max_graphlet_size = 5,
+                              ego_neighbourhood_size = 0) {
   ## ------------------------------------------------------------------------
   # Check arguments 1
   if (!igraph::is.igraph(graph_1) & is.null(dhists_1)) {
@@ -85,7 +147,8 @@ netemd_one_to_one <- function(graph_1 = NULL, graph_2 = NULL, dhists_1 = NULL, d
   }
   ## ------------------------------------------------------------------------
   # Check arguments 2
-  # If dhists_1 is a matrix of network features then transform them to dhist objects.
+  # If dhists_1 is a matrix of network features then transform them to dhist
+  # objects.
   if (is.matrix(dhists_1)) {
     dhists_1 <- graph_features_to_histograms(dhists_1)
   }
@@ -120,7 +183,10 @@ netemd_one_to_one <- function(graph_1 = NULL, graph_2 = NULL, dhists_1 = NULL, d
   ## ------------------------------------------------------------------------
   # Require either a pair of "dhist" discrete histograms or two lists of "dhist"
   # discrete histograms
-  pair_of_dhist_lists <- all(purrr::map_lgl(dhists_1, is_dhist)) && all(purrr::map_lgl(dhists_2, is_dhist))
+  pair_of_dhist_lists <- all(
+    purrr::map_lgl(dhists_1, is_dhist)
+  ) &&
+    all(purrr::map_lgl(dhists_2, is_dhist))
 
   # If input is two lists of "dhist" discrete histograms, determine the minimum
   # EMD and associated offset for pairs of histograms taken from the same
@@ -145,7 +211,12 @@ netemd_one_to_one <- function(graph_1 = NULL, graph_2 = NULL, dhists_1 = NULL, d
     # Note that the offsets represent shifts after the histograms have been
     # scaled to unit variance
     if (return_details) {
-      return(list(net_emd = net_emd, min_emds = min_emds, min_offsets = min_offsets, min_offsets_std = min_offsets_std))
+      return(list(
+        net_emd = net_emd,
+        min_emds = min_emds,
+        min_offsets = min_offsets,
+        min_offsets_std = min_offsets_std
+      ))
     } else {
       return(arithmetic_mean)
     }
@@ -164,14 +235,20 @@ netemd_one_to_one <- function(graph_1 = NULL, graph_2 = NULL, dhists_1 = NULL, d
 
 #' NetEMDs between all graph pairs using provided Graphlet-based Degree
 #' Distributions
-#' @param graphs A list of network/graph objects from the \code{igraph} package. \code{graphs} can be set to \code{NULL} (default) if \code{dhists} is provided.
-#' @param dhists A list whose elements contain either: A  list of \code{dhist} discrete histogram objects for each graph, or a list  a matrix of network features (each column representing a feature). \code{dhists} can be set to \code{NULL} (default) if \code{graphs} is provided.  A \code{dhist} object can be obtained from \code{graph_features_to_histograms}.
+#' @param graphs A list of network/graph objects from the \code{igraph} package.
+#' \code{graphs} can be set to \code{NULL} (default) if \code{dhists} is
+#' provided.
+#' @param dhists A list whose elements contain either: A  list of \code{dhist}
+#' discrete histogram objects for each graph, or a list  a matrix of network
+#' features (each column representing a feature). \code{dhists} can be set to
+#' \code{NULL} (default) if \code{graphs} is provided.  A \code{dhist} object
+#' can be obtained from \code{graph_features_to_histograms}.
 #' @param method The method to use to find the minimum EMD across all potential
 #' offsets for each pair of histograms. Default is "optimise" to use
 #' R's built-in \code{stats::optimise} method to efficiently find the offset
 #' with the minimal EMD. However, this is not guaranteed to find the global
-#' minimum if multiple local minima EMDs exist. You can alternatively specify the
-#' "exhaustive" method, which will exhaustively evaluate the EMD between the
+#' minimum if multiple local minima EMDs exist. You can alternatively specify
+#' the "exhaustive" method, which will exhaustively evaluate the EMD between the
 #' histograms at all offsets that are candidates for the minimal EMD.
 #' @param return_details Logical indicating whether to return the individual
 #' minimal EMDs and associated offsets for all pairs of histograms
@@ -183,27 +260,42 @@ netemd_one_to_one <- function(graph_1 = NULL, graph_2 = NULL, dhists_1 = NULL, d
 #' @param  mc.cores Number of cores to use for parallel processing. Defaults to
 #' the \code{mc.cores} option set in the R environment.
 #' @param feature_type Type of graphlet-based feature to count: "graphlet"
-#' counts the number of graphlets each node participates in; "orbit" (default) calculates
-#' the number of graphlet orbits each node participates in.
+#' counts the number of graphlets each node participates in; "orbit" (default)
+#' calculates the number of graphlet orbits each node participates in.
 #' @param max_graphlet_size Determines the maximum size of graphlets to count.
 #' Only graphlets containing up to \code{max_graphlet_size} nodes will be
 #' counted. Possible values are 4, and 5 (default).
 #' @param ego_neighbourhood_size The number of steps from the source node to
-#' include nodes for each ego-network. NetEmd was proposed for individual nodes alone, hence the default value is 0.
+#' include nodes for each ego-network. NetEmd was proposed for individual nodes
+#' alone, hence the default value is 0.
 #' @return NetEMD measures between all pairs of graphs for which features
 #' were provided. Format of returned data depends on the \code{return_details}
 #' parameter. If set to FALSE, a list is returned with the following named
 #' elements:\code{net_emd}: a vector of NetEMDs for each pair of graphs,
 #' \code{comp_spec}: a comparison specification table containing the graph names
 #' and indices within the input GDD list for each pair of graphs compared.
-#' If \code{return_details} is set to FALSE, the list also contains the following
-#' matrices for each graph pair: \code{min_emds}: the minimal EMD for each GDD
-#' used to compute the NetEMD, \code{min_offsets}: the associated offsets giving
-#' the minimal EMD for each GDD
+#' If \code{return_details} is set to FALSE, the list also contains the
+#' following matrices for each graph pair: \code{min_emds}: the minimal EMD for
+#' each GDD used to compute the NetEMD, \code{min_offsets}: the associated
+#' offsets giving the minimal EMD for each GDD
 #' @export
-netemd_many_to_many <- function(graphs = NULL, dhists = NULL, method = "optimise", smoothing_window_width = 0,
-                                return_details = FALSE, mc.cores = getOption("mc.cores", 2L), feature_type = "orbit", max_graphlet_size = 5, ego_neighbourhood_size = 0) {
-  if (max_graphlet_size > 4 & mc.cores > 1) print(paste("This function will compute orbits of graphlets up to size 5 using ", mc.cores, " cores. Depending on the density and size of the graphs, this may lead to a large compsumption of RAM."))
+netemd_many_to_many <- function(graphs = NULL,
+                                dhists = NULL,
+                                method = "optimise",
+                                smoothing_window_width = 0,
+                                return_details = FALSE,
+                                mc.cores = getOption("mc.cores", 2L),
+                                feature_type = "orbit",
+                                max_graphlet_size = 5,
+                                ego_neighbourhood_size = 0) {
+  if (max_graphlet_size > 4 & mc.cores > 1) {
+    print(paste(
+      "This function will compute orbits of graphlets up to size 5 using ",
+      mc.cores,
+      " cores. Depending on the density and size of the graphs, this may lead ",
+      "to a large compsumption of RAM."
+    ))
+  }
 
   # NOTE: mcapply only works on unix-like systems with system level forking
   # capability. This means it will work on Linux and OSX, but not Windows.
@@ -219,7 +311,10 @@ netemd_many_to_many <- function(graphs = NULL, dhists = NULL, method = "optimise
   which_imput_type <- NULL
   if (!is.null(graphs) & is.null(dhists)) {
     if (!all((unlist(sapply(X = graphs, FUN = igraph::is.igraph))))) {
-      stop("Graphs need to be igraph graph objects, or a list of dhists network features should be supplied.")
+      stop(
+        "Graphs need to be igraph graph objects, or a list of dhists network ",
+        "features should be supplied."
+      )
     }
     which_imput_type <- "Graphs"
   }
@@ -236,14 +331,22 @@ netemd_many_to_many <- function(graphs = NULL, dhists = NULL, method = "optimise
       which_imput_type <- "dhist"
     }
     if (is.null(which_imput_type)) {
-      warning("dhists does not conform to a Matrix or dhist class for all elmenents/netwroks in the list.")
+      warning(
+        "dhists does not conform to a Matrix or dhist class for all ",
+        "elmenents/netwroks in the list."
+      )
     }
   }
   ## ------------------------------------------------------------------------
   # Check arguments 2
-  # If dhists is a list of matrices of network features then transform them to dhist objects.
+  # If dhists is a list of matrices of network features then transform them to
+  # dhist objects.
   if (which_imput_type == "Matrix") {
-    dhists <- sapply(X = dhists, FUN = graph_features_to_histograms, simplify = FALSE)
+    dhists <- sapply(
+      X = dhists,
+      FUN = graph_features_to_histograms,
+      simplify = FALSE
+    )
   }
   ## ------------------------------------------------------------------------
   # Check arguments 3
@@ -269,22 +372,51 @@ netemd_many_to_many <- function(graphs = NULL, dhists = NULL, method = "optimise
   ## ------------------------------------------------------------------------
   comp_spec <- cross_comparison_spec(dhists)
   num_features <- length(dhists[[1]])
-  out <- purrr::simplify(parallel::mcmapply(function(index_a, index_b) {
-    netemd_one_to_one(
-      dhists_1 = dhists[[index_a]], dhists_2 = dhists[[index_b]],
-      method = method, return_details = return_details,
-      smoothing_window_width = smoothing_window_width
+  out <- purrr::simplify(
+    parallel::mcmapply(function(index_a, index_b) {
+      netemd_one_to_one(
+        dhists_1 = dhists[[index_a]], dhists_2 = dhists[[index_b]],
+        method = method, return_details = return_details,
+        smoothing_window_width = smoothing_window_width
+      )
+    },
+    comp_spec$index_a, comp_spec$index_b,
+    SIMPLIFY = FALSE, mc.cores = mc.cores
     )
-  }, comp_spec$index_a, comp_spec$index_b, SIMPLIFY = FALSE, mc.cores = mc.cores))
+  )
   if (return_details) {
     net_emds <- purrr::simplify(purrr::map(out, ~ .$net_emd))
-    min_emds <- matrix(purrr::simplify(purrr::map(out, ~ .$min_emds)), ncol = num_features, byrow = TRUE)
-    colnames(min_emds) <- purrr::simplify(purrr::map(1:num_features, ~ paste("MinEMD_O", . - 1, sep = "")))
-    min_offsets <- matrix(purrr::simplify(purrr::map(out, ~ .$min_offsets)), ncol = num_features, byrow = TRUE)
-    colnames(min_offsets) <- purrr::simplify(purrr::map(1:num_features, ~ paste("MinOffsets_O", . - 1, sep = "")))
-    min_offsets_std <- matrix(purrr::simplify(purrr::map(out, ~ .$min_offsets_std)), ncol = num_features, byrow = TRUE)
-    colnames(min_offsets_std) <- purrr::simplify(purrr::map(1:num_features, ~ paste("MinOffsetsStd_O", . - 1, sep = "")))
-    ret <- list(netemds = net_emds, comp_spec = comp_spec, min_emds = min_emds, min_offsets = min_offsets, min_offsets_std = min_offsets_std)
+    min_emds <- matrix(
+      purrr::simplify(purrr::map(out, ~ .$min_emds)),
+      ncol = num_features,
+      byrow = TRUE
+    )
+    colnames(min_emds) <- purrr::simplify(
+      purrr::map(1:num_features, ~ paste("MinEMD_O", . - 1, sep = ""))
+    )
+    min_offsets <- matrix(
+      purrr::simplify(purrr::map(out, ~ .$min_offsets)),
+      ncol = num_features,
+      byrow = TRUE
+    )
+    colnames(min_offsets) <- purrr::simplify(
+      purrr::map(1:num_features, ~ paste("MinOffsets_O", . - 1, sep = ""))
+    )
+    min_offsets_std <- matrix(
+      purrr::simplify(purrr::map(out, ~ .$min_offsets_std)),
+      ncol = num_features,
+      byrow = TRUE
+    )
+    colnames(min_offsets_std) <- purrr::simplify(
+      purrr::map(1:num_features, ~ paste("MinOffsetsStd_O", . - 1, sep = ""))
+    )
+    ret <- list(
+      netemds = net_emds,
+      comp_spec = comp_spec,
+      min_emds = min_emds,
+      min_offsets = min_offsets,
+      min_offsets_std = min_offsets_std
+    )
   } else {
     net_emds <- out
     ret <- list(netemds = net_emds, comp_spec = comp_spec)
@@ -292,21 +424,25 @@ netemd_many_to_many <- function(graphs = NULL, dhists = NULL, method = "optimise
   return(ret)
 }
 
-#' Internal function to compute the minimum Earth Mover's Distance between standarized and translated histograms
+#' Internal function to compute the minimum Earth Mover's Distance between
+#' standarized and translated histograms
 #'
 #' Calculates the minimum Earth Mover's Distance (EMD) between two
-#' discrete histograms after normalising each histogram to unit mass and variance.
+#' discrete histograms after normalising each histogram to unit mass and
+#' variance.
 #' This is calculated as follows:
 #'   1. Normalise each histogram to have unit mass and unit variance
 #'   2. Find the minimum EMD between the histograms
-#' @param dhists_1 A \code{dhist} discrete histogram object or a list of such objects
-#' @param dhists_2 A \code{dhist} discrete histogram object or a list of such objects
+#' @param dhists_1 A \code{dhist} discrete histogram object or a list of such
+#' objects
+#' @param dhists_2 A \code{dhist} discrete histogram object or a list of such
+#' objects
 #' @param method The method to use to find the minimum EMD across all potential
 #' offsets for each pair of histograms. Default is "optimise" to use
 #' R's built-in \code{stats::optimise} method to efficiently find the offset
 #' with the minimal EMD. However, this is not guaranteed to find the global
-#' minimum if multiple local minima EMDs exist. You can alternatively specify the
-#' "exhaustive" method, which will exhaustively evaluate the EMD between the
+#' minimum if multiple local minima EMDs exist. You can alternatively specify
+#' the "exhaustive" method, which will exhaustively evaluate the EMD between the
 #' histograms at all offsets that are candidates for the minimal EMD.
 #' @param smoothing_window_width Width of "top-hat" smoothing window to apply to
 #' "smear" point masses across a finite width in the real domain. Default is 0,
@@ -314,8 +450,10 @@ netemd_many_to_many <- function(graphs = NULL, dhists = NULL, method = "optimise
 #' \code{smoothing_window_width} that is appropriate for the discrete domain
 #' (e.g.for the integer domain a width of 1 is the natural choice)
 #' @return A list with the following named elements
-#' \code{net_emd}: the NetEMD for the set of histogram pairs, \code{min_offsets}: the associated
-#' offsets giving the minimal EMD for each pair of histograms and \code{min_offset_std}: Offset used in the standardised histograms.
+#' \code{net_emd}: the NetEMD for the set of histogram pairs,
+#' \code{min_offsets}: the associated offsets giving the minimal EMD for each
+#' pair of histograms and \code{min_offset_std}: Offset used in the standardised
+#' histograms.
 #' @examples
 #' require(igraph)
 #' goldstd_1 <- graph.lattice(c(8, 8))
@@ -325,7 +463,12 @@ netemd_many_to_many <- function(graphs = NULL, dhists = NULL, method = "optimise
 #' dhists_1 <- graph_features_to_histograms(props_1)
 #' dhists_2 <- graph_features_to_histograms(props_2)
 #' # Obtain the minimum NetEMD_edges between the histograms
-#' netemd_single_pair(dhists_1[[1]], dhists_2[[1]], method = "optimise", smoothing_window_width = 0)
+#' netemd_single_pair(
+#'   dhists_1[[1]],
+#'   dhists_2[[1]],
+#'   method = "optimise",
+#'   smoothing_window_width = 0
+#' )
 #' @export
 netemd_single_pair <- function(dhist1, dhist2, method = "optimise",
                                smoothing_window_width = 0) {
diff --git a/R/net_emd_speed_benchmark.R b/R/net_emd_speed_benchmark.R
index 62c97611..3f37ccb9 100644
--- a/R/net_emd_speed_benchmark.R
+++ b/R/net_emd_speed_benchmark.R
@@ -4,10 +4,11 @@ netEMDSpeedTest <- function() {
   print(source_dir)
   edge_format <- "ncol"
   file_pattern <- ""
-  #    source_dir <- system.file(file.path("extdata", "VRPINS"), package = "netdist")
-  #    edge_format = "ncol"
-  #    file_pattern = ".txt"
-  graphs <- read_simple_graphs(source_dir = source_dir, format = edge_format, pattern = file_pattern)
+  graphs <- read_simple_graphs(
+    source_dir = source_dir,
+    format = edge_format,
+    pattern = file_pattern
+  )
   n1 <- names(graphs)
   lab1 <- c()
   gddBuildTime <- c()
@@ -26,7 +27,10 @@ netEMDSpeedTest <- function() {
       netEMDStart <- Sys.time()
       net_emd(gdd1, gdd2)
       endTime <- Sys.time()
-      gddBuildTime <- append(gddBuildTime, as.double(netEMDStart - fulltimeStart))
+      gddBuildTime <- append(
+        gddBuildTime,
+        as.double(netEMDStart - fulltimeStart)
+      )
       netEMDtime <- append(netEMDtime, as.double(endTime - netEMDStart))
     }
   }
diff --git a/R/orca_interface.R b/R/orca_interface.R
index 51614203..10065126 100644
--- a/R/orca_interface.R
+++ b/R/orca_interface.R
@@ -68,11 +68,18 @@ indexed_edges_to_graph <- function(indexed_edges) {
 #' @return A named list of simplified igraph graph object, with the name of each
 #' graph set to the name of the file it was read from.
 #' @examples
-#' # Set source directory for Virus protein-protein interaction edge files stored in the netdist package.
-#' source_dir <- system.file(file.path("extdata", "VRPINS"), package = "netdist")
+#' # Set source directory for Virus protein-protein interaction edge files
+#' # stored in the netdist package.
+#' source_dir <- system.file(
+#'   file.path("extdata", "VRPINS"),
+#'   package = "netdist"
+#' )
 #' print(source_dir)
 #' # Load query graphs as igraph objects
-#' graph_1 <- read_simple_graph(file.path(source_dir, "EBV.txt"), format = "ncol")
+#' graph_1 <- read_simple_graph(
+#'   file.path(source_dir, "EBV.txt"),
+#'   format = "ncol"
+#' )
 #' graph_1
 #' @export
 read_simple_graphs <- function(source_dir,
@@ -208,10 +215,12 @@ simplify_graph <- function(graph, as_undirected = TRUE, remove_loops = TRUE,
 #'
 #' Converts a matrix of node level features (e.g. for example counts
 #' of multiple graphlets or orbits at each node) to
-#' a set of histogram like objects (observed frequency distribution of each feature/column)
-#' @param features_matrix A matrix whose rows represent nodes and whose columns represent different node level features. This means that entry ij provides the value of feature j for node i.
-#' @return Feature histograms: List of "discrete histograms" for each
-#' feature
+#' a set of histogram like objects (observed frequency distribution of each
+#' feature/column)
+#' @param features_matrix A matrix whose rows represent nodes and whose columns
+#' represent different node level features. This means that entry ij provides
+#' the value of feature j for node i.
+#' @return Feature histograms: List of "discrete histograms" for each feature
 #' @export
 graph_features_to_histograms <- function(features_matrix) {
   apply(features_matrix, 2, dhist_from_obs)
@@ -224,8 +233,9 @@ graph_features_to_histogramsSLOW <- function(features_matrix) {
 
 #' Graphlet-based degree distributions (GDDs)
 #'
-#' Short-cut function to create graphlet-based degree distributions from \code{igraph} graph object
-#' using the ORCA fast graphlet orbit counting package.
+#' Short-cut function to create graphlet-based degree distributions from
+#' \code{igraph} graph object using the ORCA fast graphlet orbit counting
+#' package.
 #' @param graph A connected, undirected, simple graph as an \code{igraph} object
 #' @param feature_type Type of graphlet-based feature to count: "graphlet"
 #' counts the number of graphlets each node participates in; "orbit" calculates
@@ -233,7 +243,8 @@ graph_features_to_histogramsSLOW <- function(features_matrix) {
 #' @param max_graphlet_size Determines the maximum size of graphlets to count.
 #' Only graphlets containing up to \code{max_graphlet_size} nodes will be
 #' counted. Currently only size 4 and 5 are supported.
-#' @param ego_neighbourhood_size The number of steps from the source node used to select the
+#' @param ego_neighbourhood_size The number of steps from the source node used
+#' to select the
 #' neighboring nodes to be included in the source node ego-network.
 #' @return List of graphlet-based degree distributions, with each distribution
 #' represented as a \code{dhist} discrete histogram object.
@@ -353,8 +364,9 @@ count_graphlets_for_graph <- function(graph, max_graphlet_size) {
 #' @param max_graphlet_size Determines the maximum size of graphlets to count.
 #' Only graphlets containing up to \code{max_graphlet_size} nodes will be
 #' counted. Currently only size 4 (default) and 5 are supported.
-#' @param neighbourhood_size The number of steps from the source node used to select the
-#' neighboring nodes to be included in the source node ego-network. (Default 2).
+#' @param neighbourhood_size The number of steps from the source node used to
+#' select the neighboring nodes to be included in the source node ego-network.
+#' (Default 2).
 #' @param min_ego_nodes Only ego networks with at least \code{min_ego_nodes}
 #' nodes are returned. (Default 3).
 #' @param min_ego_edges Only ego networks with at least \code{min_ego_edges}
@@ -517,7 +529,8 @@ orbit_to_graphlet_counts <- function(orbit_counts) {
 #' Graphlet key
 #'
 #' Metdata about graphlet groups.
-#' @param max_graphlet_size Maximum number of nodes graphlets can contain. Currently only size 2 to 5 are supported.
+#' @param max_graphlet_size Maximum number of nodes graphlets can contain.
+#' Currently only size 2 to 5 are supported.
 #' @return Metadata list with the following named fields:
 #' \itemize{
 #'   \item \code{max_nodes}: Maximum number of nodes graphlets can contain
@@ -553,7 +566,8 @@ graphlet_key <- function(max_graphlet_size) {
 #' Orbit key
 #'
 #' Metdata about orbit groups.
-#' @param max_graphlet_size Maximum number of nodes graphlets can contain. Currently only size 2 to 5 are supported.
+#' @param max_graphlet_size Maximum number of nodes graphlets can contain.
+#' Currently only size 2 to 5 are supported.
 #' @return Metadata list with the following named fields:
 #' \itemize{
 #'   \item \code{max_nodes}: Maximum number of nodes graphlets can contain
@@ -611,11 +625,11 @@ graphlet_ids_for_size <- function(graphlet_size) {
 #' @param feature_type Type of graphlet-based degree distributions. Can be
 #' \code{graphlet} to count graphlets or \code{orbit} to count orbits.
 #' @return A named list where each element contains a set of GDDs for a single
-#' @param max_graphlet_size Maximum size of graphlets to use when generating GDD.
-#'  Currently only size 4 and 5 are supported.
-#' @param ego_neighbourhood_size The number of steps from the source node used to select the
-#' neighboring nodes to be included in the source node ego-network. If set to 0, ego-networks will not be
-#' used.
+#' @param max_graphlet_size Maximum size of graphlets to use when generating
+#' GDD. Currently only size 4 and 5 are supported.
+#' @param ego_neighbourhood_size The number of steps from the source node used
+#' to select the neighboring nodes to be included in the source node
+#' ego-network. If set to 0, ego-networks will not be used.
 #' @param  mc.cores Number of cores to use for parallel processing. Defaults to
 #' the \code{mc.cores} option set in the R environment.
 #' @return A named list where each element contains a set of GDDs for a single
diff --git a/tests/testthat/test_dhist.R b/tests/testthat/test_dhist.R
index 37dc59b2..66ce6574 100644
--- a/tests/testthat/test_dhist.R
+++ b/tests/testthat/test_dhist.R
@@ -1,253 +1,301 @@
 context("dhist: Discrete histogram from observations")
-test_that("discrete_hist generates correct discrete histograms for random integer observations", {
-  # Method for generating random observations containing specific locations a
-  # specific number of times
-  random_observations <- function(locations, counts) {
-    # Construct vector containing each location replicated "count" times
-    observations <- purrr::simplify(purrr::map2(locations, counts, rep))
-    # Randomise the order of the observations
-    sample(observations, size = length(observations), replace = FALSE)
-  }
+test_that(
+  paste(
+    "discrete_hist generates correct discrete histograms for random integer",
+    "observations"
+  ),
+  {
+    # Method for generating random observations containing specific locations a
+    # specific number of times
+    random_observations <- function(locations, counts) {
+      # Construct vector containing each location replicated "count" times
+      observations <- purrr::simplify(purrr::map2(locations, counts, rep))
+      # Randomise the order of the observations
+      sample(observations, size = length(observations), replace = FALSE)
+    }
 
-  set.seed(2684)
-  num_tests <- 100
-
-  run_test <- function() {
-    # Set parameters for generation of random observation sets
-    num_observations <- 100
-    location_range <- -(num_observations * 3):(num_observations * 3)
-    # Do not allow zero counts as these locations will not be present in the
-    # observations generated from the locations and counts
-    count_range <- 1:10
-
-    # Generate random observation sets
-    locations <- sample(location_range, num_observations, replace = FALSE)
-    counts <- sample(count_range, num_observations, replace = TRUE)
-
-    # Construct vector containing each location replicated "count" times
-    observations_orig <- purrr::simplify(purrr::map2(locations, counts, rep))
-    # Randomise the order of the observations
-    observations <- sample(observations_orig, size = length(observations_orig), replace = FALSE)
-
-    # Generate discrete histograms
-    hist <- dhist_from_obs(observations)
-
-    # discrete_hist will drop bins with zero counts, so remove these from the
-    # expected data (not necessary now we've restricted counts to be >= 1, but
-    # the bug where we generated test locations with zero counts was so annoying
-    # to identify that we're going with a belt and braces approach)
-    non_zero_count_indexes <- counts != 0
-    expected_locations <- locations[non_zero_count_indexes]
-    expected_counts <- counts[non_zero_count_indexes]
-    # dhist_from_obs will return results with bins ordered by ascending location,
-    # so sort expected data to match
-    sorted_locations <- sort(expected_locations, index.return = TRUE)
-    sorted_location_indexes <- sorted_locations$ix
-    expected_locations <- expected_locations[sorted_location_indexes]
-    expected_counts <- expected_counts[sorted_location_indexes]
-
-    # Check that histogram locations and counts match those used to generate the
-    # observations
-    expect_true(all.equal(hist$locations, expected_locations))
-    expect_true(all.equal(hist$masses, expected_counts))
-  }
+    set.seed(2684)
+    num_tests <- 100
+
+    run_test <- function() {
+      # Set parameters for generation of random observation sets
+      num_observations <- 100
+      location_range <- -(num_observations * 3):(num_observations * 3)
+      # Do not allow zero counts as these locations will not be present in the
+      # observations generated from the locations and counts
+      count_range <- 1:10
+
+      # Generate random observation sets
+      locations <- sample(location_range, num_observations, replace = FALSE)
+      counts <- sample(count_range, num_observations, replace = TRUE)
+
+      # Construct vector containing each location replicated "count" times
+      observations_orig <- purrr::simplify(purrr::map2(locations, counts, rep))
+      # Randomise the order of the observations
+      observations <- sample(
+        observations_orig,
+        size = length(observations_orig),
+        replace = FALSE
+      )
+
+      # Generate discrete histograms
+      hist <- dhist_from_obs(observations)
+
+      # discrete_hist will drop bins with zero counts, so remove these from the
+      # expected data (not necessary now we've restricted counts to be >= 1, but
+      # the bug where we generated test locations with zero counts was so
+      # annoying to identify that we're going with a belt and braces approach)
+      non_zero_count_indexes <- counts != 0
+      expected_locations <- locations[non_zero_count_indexes]
+      expected_counts <- counts[non_zero_count_indexes]
+      # dhist_from_obs will return results with bins ordered by ascending
+      # location, so sort expected data to match
+      sorted_locations <- sort(expected_locations, index.return = TRUE)
+      sorted_location_indexes <- sorted_locations$ix
+      expected_locations <- expected_locations[sorted_location_indexes]
+      expected_counts <- expected_counts[sorted_location_indexes]
+
+      # Check that histogram locations and counts match those used to generate
+      # the observations
+      expect_true(all.equal(hist$locations, expected_locations))
+      expect_true(all.equal(hist$masses, expected_counts))
+    }
 
-  for (i in 1:num_tests) {
-    run_test()
+    for (i in 1:num_tests) {
+      run_test()
+    }
   }
-})
-
-context("dhist: constructor, equality operator and as_* transformation functions")
-test_that("dhist constuctor has correct locations and masses (default smoothing, unsorted)", {
-  locations1 <- c(7, 42, 1, 21, 101, 9)
-  masses1 <- c(15, 12, 16, 13, 11, 14)
-  actual1 <- dhist(locations = locations1, masses = masses1, sorted = FALSE)
-  locations2 <- c(3, 0, -62, 7, 16, -58)
-  masses2 <- c(23, 24, 26, 22, 21, 25)
-  actual2 <- dhist(locations = locations2, masses = masses2, sorted = FALSE)
-
-  expected_class <- "dhist"
-  expected_smoothing_window_width <- 0
-
-  expected1 <- list(
-    locations = locations1, masses = masses1,
-    smoothing_window_width = expected_smoothing_window_width
-  )
-  class(expected1) <- expected_class
-
-  expected2 <- list(
-    locations = locations2, masses = masses2,
-    smoothing_window_width = expected_smoothing_window_width
-  )
-  class(expected2) <- expected_class
-
-  expect_equal(actual1, expected1)
-  expect_equal(actual2, expected2)
-})
-
-test_that("dhist constuctor has correct locations and masses (default smoothing, sorted)", {
-  locations1 <- c(7, 42, 1, 21, 101, 9)
-  masses1 <- c(15, 12, 16, 13, 11, 14)
-  actual1 <- dhist(locations = locations1, masses = masses1, sorted = TRUE)
-  locations2 <- c(3, 0, -62, 7, 16, -58)
-  masses2 <- c(23, 24, 26, 22, 21, 25)
-  actual2 <- dhist(locations = locations2, masses = masses2, sorted = TRUE)
-
-  expected_class <- "dhist"
-  expected_smoothing_window_width <- 0
-
-  expected1 <- list(
-    locations = c(1, 7, 9, 21, 42, 101),
-    masses = c(16, 15, 14, 13, 12, 11),
-    smoothing_window_width = expected_smoothing_window_width
-  )
-  class(expected1) <- expected_class
-
-  expected2 <- list(
-    locations = c(-62, -58, 0, 3, 7, 16),
-    masses = c(26, 25, 24, 23, 22, 21),
-    smoothing_window_width = expected_smoothing_window_width
-  )
-  class(expected2) <- expected_class
-
-  expect_equal(actual1, expected1)
-  expect_equal(actual2, expected2)
-})
-
-test_that("dhist constuctor has correct locations and masses (default smoothing, default sorting)", {
-  locations1 <- c(7, 42, 1, 21, 101, 9)
-  masses1 <- c(15, 12, 16, 13, 11, 14)
-  actual1 <- dhist(locations = locations1, masses = masses1)
-  locations2 <- c(3, 0, -62, 7, 16, -58)
-  masses2 <- c(23, 24, 26, 22, 21, 25)
-  actual2 <- dhist(locations = locations2, masses = masses2)
-
-  expected_class <- "dhist"
-  expected_smoothing_window_width <- 0
+)
+
+context(
+  "dhist: constructor, equality operator and as_* transformation functions"
+)
+test_that(
+  paste(
+    "dhist constuctor has correct locations and masses (default smoothing,",
+    "unsorted)"
+  ),
+  {
+    locations1 <- c(7, 42, 1, 21, 101, 9)
+    masses1 <- c(15, 12, 16, 13, 11, 14)
+    actual1 <- dhist(locations = locations1, masses = masses1, sorted = FALSE)
+    locations2 <- c(3, 0, -62, 7, 16, -58)
+    masses2 <- c(23, 24, 26, 22, 21, 25)
+    actual2 <- dhist(locations = locations2, masses = masses2, sorted = FALSE)
+
+    expected_class <- "dhist"
+    expected_smoothing_window_width <- 0
+
+    expected1 <- list(
+      locations = locations1, masses = masses1,
+      smoothing_window_width = expected_smoothing_window_width
+    )
+    class(expected1) <- expected_class
 
-  expected1 <- list(
-    locations = c(1, 7, 9, 21, 42, 101),
-    masses = c(16, 15, 14, 13, 12, 11),
-    smoothing_window_width = expected_smoothing_window_width
-  )
-  class(expected1) <- expected_class
+    expected2 <- list(
+      locations = locations2, masses = masses2,
+      smoothing_window_width = expected_smoothing_window_width
+    )
+    class(expected2) <- expected_class
 
-  expected2 <- list(
-    locations = c(-62, -58, 0, 3, 7, 16),
-    masses = c(26, 25, 24, 23, 22, 21),
-    smoothing_window_width = expected_smoothing_window_width
-  )
-  class(expected2) <- expected_class
+    expect_equal(actual1, expected1)
+    expect_equal(actual2, expected2)
+  }
+)
+
+test_that(
+  paste(
+    "dhist constuctor has correct locations and masses (default smoothing,",
+    "sorted)"
+  ),
+  {
+    locations1 <- c(7, 42, 1, 21, 101, 9)
+    masses1 <- c(15, 12, 16, 13, 11, 14)
+    actual1 <- dhist(locations = locations1, masses = masses1, sorted = TRUE)
+    locations2 <- c(3, 0, -62, 7, 16, -58)
+    masses2 <- c(23, 24, 26, 22, 21, 25)
+    actual2 <- dhist(locations = locations2, masses = masses2, sorted = TRUE)
+
+    expected_class <- "dhist"
+    expected_smoothing_window_width <- 0
+
+    expected1 <- list(
+      locations = c(1, 7, 9, 21, 42, 101),
+      masses = c(16, 15, 14, 13, 12, 11),
+      smoothing_window_width = expected_smoothing_window_width
+    )
+    class(expected1) <- expected_class
 
-  expect_equal(actual1, expected1)
-  expect_equal(actual2, expected2)
-})
+    expected2 <- list(
+      locations = c(-62, -58, 0, 3, 7, 16),
+      masses = c(26, 25, 24, 23, 22, 21),
+      smoothing_window_width = expected_smoothing_window_width
+    )
+    class(expected2) <- expected_class
 
-test_that("dhist constuctor has correct locations and masses (specified smoothing, unsorted)", {
-  smoothing_window_width <- 1
+    expect_equal(actual1, expected1)
+    expect_equal(actual2, expected2)
+  }
+)
+
+test_that(
+  paste(
+    "dhist constuctor has correct locations and masses (default smoothing,",
+    "default sorting)"
+  ),
+  {
+    locations1 <- c(7, 42, 1, 21, 101, 9)
+    masses1 <- c(15, 12, 16, 13, 11, 14)
+    actual1 <- dhist(locations = locations1, masses = masses1)
+    locations2 <- c(3, 0, -62, 7, 16, -58)
+    masses2 <- c(23, 24, 26, 22, 21, 25)
+    actual2 <- dhist(locations = locations2, masses = masses2)
+
+    expected_class <- "dhist"
+    expected_smoothing_window_width <- 0
+
+    expected1 <- list(
+      locations = c(1, 7, 9, 21, 42, 101),
+      masses = c(16, 15, 14, 13, 12, 11),
+      smoothing_window_width = expected_smoothing_window_width
+    )
+    class(expected1) <- expected_class
 
-  locations1 <- c(7, 42, 1, 21, 101, 9)
-  masses1 <- c(15, 12, 16, 13, 11, 14)
-  actual1 <- dhist(
-    locations = locations1, masses = masses1,
-    smoothing_window_width = smoothing_window_width, sorted = FALSE
-  )
-  locations2 <- c(3, 0, -62, 7, 16, -58)
-  masses2 <- c(23, 24, 26, 22, 21, 25)
-  actual2 <- dhist(
-    locations = locations2, masses = masses2,
-    smoothing_window_width = smoothing_window_width, sorted = FALSE
-  )
+    expected2 <- list(
+      locations = c(-62, -58, 0, 3, 7, 16),
+      masses = c(26, 25, 24, 23, 22, 21),
+      smoothing_window_width = expected_smoothing_window_width
+    )
+    class(expected2) <- expected_class
 
-  expected_class <- "dhist"
+    expect_equal(actual1, expected1)
+    expect_equal(actual2, expected2)
+  }
+)
+
+test_that(
+  paste(
+    "dhist constuctor has correct locations and masses (specified smoothing,",
+    "unsorted)"
+  ),
+  {
+    smoothing_window_width <- 1
 
-  expected1 <- list(
-    locations = locations1, masses = masses1,
-    smoothing_window_width = smoothing_window_width
-  )
-  class(expected1) <- expected_class
+    locations1 <- c(7, 42, 1, 21, 101, 9)
+    masses1 <- c(15, 12, 16, 13, 11, 14)
+    actual1 <- dhist(
+      locations = locations1, masses = masses1,
+      smoothing_window_width = smoothing_window_width, sorted = FALSE
+    )
+    locations2 <- c(3, 0, -62, 7, 16, -58)
+    masses2 <- c(23, 24, 26, 22, 21, 25)
+    actual2 <- dhist(
+      locations = locations2, masses = masses2,
+      smoothing_window_width = smoothing_window_width, sorted = FALSE
+    )
 
-  expected2 <- list(
-    locations = locations2, masses = masses2,
-    smoothing_window_width = smoothing_window_width
-  )
-  class(expected2) <- expected_class
+    expected_class <- "dhist"
 
-  expect_equal(actual1, expected1)
-  expect_equal(actual2, expected2)
-})
+    expected1 <- list(
+      locations = locations1, masses = masses1,
+      smoothing_window_width = smoothing_window_width
+    )
+    class(expected1) <- expected_class
 
-test_that("dhist constuctor has correct locations and masses (specified smoothing, sorted)", {
-  smoothing_window_width <- 1
+    expected2 <- list(
+      locations = locations2, masses = masses2,
+      smoothing_window_width = smoothing_window_width
+    )
+    class(expected2) <- expected_class
 
-  locations1 <- c(7, 42, 1, 21, 101, 9)
-  masses1 <- c(15, 12, 16, 13, 11, 14)
-  actual1 <- dhist(
-    locations = locations1, masses = masses1,
-    smoothing_window_width = smoothing_window_width, sorted = TRUE
-  )
-  locations2 <- c(3, 0, -62, 7, 16, -58)
-  masses2 <- c(23, 24, 26, 22, 21, 25)
-  actual2 <- dhist(
-    locations = locations2, masses = masses2,
-    smoothing_window_width = smoothing_window_width, sorted = TRUE
-  )
+    expect_equal(actual1, expected1)
+    expect_equal(actual2, expected2)
+  }
+)
+
+test_that(
+  paste(
+    "dhist constuctor has correct locations and masses (specified smoothing,",
+    "sorted)"
+  ),
+  {
+    smoothing_window_width <- 1
 
-  expected_class <- "dhist"
+    locations1 <- c(7, 42, 1, 21, 101, 9)
+    masses1 <- c(15, 12, 16, 13, 11, 14)
+    actual1 <- dhist(
+      locations = locations1, masses = masses1,
+      smoothing_window_width = smoothing_window_width, sorted = TRUE
+    )
+    locations2 <- c(3, 0, -62, 7, 16, -58)
+    masses2 <- c(23, 24, 26, 22, 21, 25)
+    actual2 <- dhist(
+      locations = locations2, masses = masses2,
+      smoothing_window_width = smoothing_window_width, sorted = TRUE
+    )
 
-  expected1 <- list(
-    locations = c(1, 7, 9, 21, 42, 101),
-    masses = c(16, 15, 14, 13, 12, 11),
-    smoothing_window_width = smoothing_window_width
-  )
-  class(expected1) <- expected_class
+    expected_class <- "dhist"
 
-  expected2 <- list(
-    locations = c(-62, -58, 0, 3, 7, 16),
-    masses = c(26, 25, 24, 23, 22, 21),
-    smoothing_window_width = smoothing_window_width
-  )
-  class(expected2) <- expected_class
+    expected1 <- list(
+      locations = c(1, 7, 9, 21, 42, 101),
+      masses = c(16, 15, 14, 13, 12, 11),
+      smoothing_window_width = smoothing_window_width
+    )
+    class(expected1) <- expected_class
 
-  expect_equal(actual1, expected1)
-  expect_equal(actual2, expected2)
-})
+    expected2 <- list(
+      locations = c(-62, -58, 0, 3, 7, 16),
+      masses = c(26, 25, 24, 23, 22, 21),
+      smoothing_window_width = smoothing_window_width
+    )
+    class(expected2) <- expected_class
 
-test_that("dhist constuctor has correct locations and masses (specified smoothing, default sorting)", {
-  smoothing_window_width <- 1
+    expect_equal(actual1, expected1)
+    expect_equal(actual2, expected2)
+  }
+)
+
+test_that(
+  paste(
+    "dhist constuctor has correct locations and masses (specified smoothing,",
+    "default sorting)"
+  ),
+  {
+    smoothing_window_width <- 1
 
-  locations1 <- c(7, 42, 1, 21, 101, 9)
-  masses1 <- c(15, 12, 16, 13, 11, 14)
-  actual1 <- dhist(
-    locations = locations1, masses = masses1,
-    smoothing_window_width = smoothing_window_width
-  )
-  locations2 <- c(3, 0, -62, 7, 16, -58)
-  masses2 <- c(23, 24, 26, 22, 21, 25)
-  actual2 <- dhist(
-    locations = locations2, masses = masses2,
-    smoothing_window_width = smoothing_window_width
-  )
+    locations1 <- c(7, 42, 1, 21, 101, 9)
+    masses1 <- c(15, 12, 16, 13, 11, 14)
+    actual1 <- dhist(
+      locations = locations1, masses = masses1,
+      smoothing_window_width = smoothing_window_width
+    )
+    locations2 <- c(3, 0, -62, 7, 16, -58)
+    masses2 <- c(23, 24, 26, 22, 21, 25)
+    actual2 <- dhist(
+      locations = locations2, masses = masses2,
+      smoothing_window_width = smoothing_window_width
+    )
 
-  expected_class <- "dhist"
+    expected_class <- "dhist"
 
-  expected1 <- list(
-    locations = c(1, 7, 9, 21, 42, 101),
-    masses = c(16, 15, 14, 13, 12, 11),
-    smoothing_window_width = smoothing_window_width
-  )
-  class(expected1) <- expected_class
+    expected1 <- list(
+      locations = c(1, 7, 9, 21, 42, 101),
+      masses = c(16, 15, 14, 13, 12, 11),
+      smoothing_window_width = smoothing_window_width
+    )
+    class(expected1) <- expected_class
 
-  expected2 <- list(
-    locations = c(-62, -58, 0, 3, 7, 16),
-    masses = c(26, 25, 24, 23, 22, 21),
-    smoothing_window_width = smoothing_window_width
-  )
-  class(expected2) <- expected_class
+    expected2 <- list(
+      locations = c(-62, -58, 0, 3, 7, 16),
+      masses = c(26, 25, 24, 23, 22, 21),
+      smoothing_window_width = smoothing_window_width
+    )
+    class(expected2) <- expected_class
 
-  expect_equal(actual1, expected1)
-  expect_equal(actual2, expected2)
-})
+    expect_equal(actual1, expected1)
+    expect_equal(actual2, expected2)
+  }
+)
 
 test_that("as_smoothed_dhist sets smoothing_window_width correctly", {
   dhist_pre <- dhist(locations <- c(7, 42, 1, 21, 101, 9),
@@ -260,7 +308,10 @@ test_that("as_smoothed_dhist sets smoothing_window_width correctly", {
     dhist_pre$smoothing_window_width,
     expected_smoothing_window_width_pre
   )
-  dhist_post <- as_smoothed_dhist(dhist_pre, expected_smoothing_window_width_post)
+  dhist_post <- as_smoothed_dhist(
+    dhist_pre,
+    expected_smoothing_window_width_post
+  )
   expect_equal(
     dhist_post$smoothing_window_width,
     expected_smoothing_window_width_post
@@ -279,7 +330,10 @@ test_that("as_unsmoothed_dhist sets smoothing_window_width correctly", {
     dhist_pre$smoothing_window_width,
     expected_smoothing_window_width_pre
   )
-  dhist_post <- as_smoothed_dhist(dhist_pre, expected_smoothing_window_width_post)
+  dhist_post <- as_smoothed_dhist(
+    dhist_pre,
+    expected_smoothing_window_width_post
+  )
   expect_equal(
     dhist_post$smoothing_window_width,
     expected_smoothing_window_width_post
@@ -303,7 +357,8 @@ test_that("Non-identical dhists are NOT considered equal", {
 
   # Change a single element of the locations field
   dhist2_one_location_mismatch <- dhist1
-  dhist2_one_location_mismatch$locations[3] <- dhist2_one_location_mismatch$locations[1] + 1
+  dhist2_one_location_mismatch$locations[3] <-
+    dhist2_one_location_mismatch$locations[1] + 1
   expect_false(dhist1 == dhist2_one_location_mismatch)
 
   # Change a single element of the masses field
@@ -323,25 +378,37 @@ test_that("Non-identical dhists are NOT considered equal", {
 })
 
 context("dhist: Discrete histogram variance")
-test_that("dhist_variance difference for smoothed and unsmoothed dhists is smoothing_window_width^2 / 12", {
-  dhist <- dhist(locations <- c(7, 42, 1, 21, 101, 9), masses = c(15, 12, 16, 13, 11, 14))
-  # Be careful: ensure that no smoothing window width results in overlapping bins
-  smoothing_window_width_A <- 1
-  smoothing_window_width_B <- 2
-  dhist_unsmoothed <- as_unsmoothed_dhist(dhist)
-  dhist_smoothed_A <- as_smoothed_dhist(dhist, smoothing_window_width_A)
-  dhist_smoothed_B <- as_smoothed_dhist(dhist, smoothing_window_width_B)
-
-  var_unsmoothed <- dhist_variance(dhist_unsmoothed)
-  var_smoothed_A <- dhist_variance(dhist_smoothed_A)
-  var_smoothed_B <- dhist_variance(dhist_smoothed_B)
-
-  expected_var_smoothed_A <- var_unsmoothed + ((smoothing_window_width_A^2) / 12)
-  expected_var_smoothed_B <- var_unsmoothed + ((smoothing_window_width_B^2) / 12)
-
-  expect_equal(var_smoothed_A, expected_var_smoothed_A)
-  expect_equal(var_smoothed_B, expected_var_smoothed_B)
-})
+test_that(
+  paste(
+    "dhist_variance difference for smoothed and unsmoothed dhists is",
+    "smoothing_window_width^2 / 12"
+  ),
+  {
+    dhist <- dhist(
+      locations <- c(7, 42, 1, 21, 101, 9),
+      masses = c(15, 12, 16, 13, 11, 14)
+    )
+    # Be careful: ensure that no smoothing window width results in overlapping
+    # bins
+    smoothing_window_width_A <- 1
+    smoothing_window_width_B <- 2
+    dhist_unsmoothed <- as_unsmoothed_dhist(dhist)
+    dhist_smoothed_A <- as_smoothed_dhist(dhist, smoothing_window_width_A)
+    dhist_smoothed_B <- as_smoothed_dhist(dhist, smoothing_window_width_B)
+
+    var_unsmoothed <- dhist_variance(dhist_unsmoothed)
+    var_smoothed_A <- dhist_variance(dhist_smoothed_A)
+    var_smoothed_B <- dhist_variance(dhist_smoothed_B)
+
+    expected_var_smoothed_A <- var_unsmoothed +
+      ((smoothing_window_width_A^2) / 12)
+    expected_var_smoothed_B <- var_unsmoothed +
+      ((smoothing_window_width_B^2) / 12)
+
+    expect_equal(var_smoothed_A, expected_var_smoothed_A)
+    expect_equal(var_smoothed_B, expected_var_smoothed_B)
+  }
+)
 
 test_that("dhist_variance returns sigma^2 for unsmoothed normal histograms", {
   num_hists <- 5
@@ -410,110 +477,149 @@ test_that("normalise_dhist_mass output sums to 1", {
 })
 
 context("dhist: Discrete histogram variance normalisation")
-test_that("normalise_histogram_variance output has variance of 1 for random integer histograms", {
-  # Generate histograms with random masses and random centres
-  num_hists <- 10
-  num_bins <- 70
-
-  mass_min <- 0
-  mass_max <- 100
-  rand_masses <- function() {
-    return(runif(num_bins, mass_min, mass_max))
-  }
+test_that(
+  paste(
+    "normalise_histogram_variance output has variance of 1 for random integer",
+    "histograms"
+  ),
+  {
+    # Generate histograms with random masses and random centres
+    num_hists <- 10
+    num_bins <- 70
+
+    mass_min <- 0
+    mass_max <- 100
+    rand_masses <- function() {
+      return(runif(num_bins, mass_min, mass_max))
+    }
 
-  centre_min <- -30
-  centre_max <- 70
-  rand_locations <- function() {
-    return(round(sample(centre_min:centre_max, num_bins), digits = 0))
-  }
+    centre_min <- -30
+    centre_max <- 70
+    rand_locations <- function() {
+      return(round(sample(centre_min:centre_max, num_bins), digits = 0))
+    }
 
-  rand_dhists <- replicate(num_hists, dhist(masses = rand_masses(), locations = rand_locations()), simplify = FALSE)
+    rand_dhists <- replicate(
+      num_hists,
+      dhist(masses = rand_masses(), locations = rand_locations()),
+      simplify = FALSE
+    )
 
-  smoothing_window_width <- 1
-  rand_dhists_unsmoothed <- purrr::map(rand_dhists, as_unsmoothed_dhist)
-  rand_dhists_smoothed <- purrr::map(rand_dhists, as_smoothed_dhist, smoothing_window_width = smoothing_window_width)
+    smoothing_window_width <- 1
+    rand_dhists_unsmoothed <- purrr::map(rand_dhists, as_unsmoothed_dhist)
+    rand_dhists_smoothed <- purrr::map(
+      rand_dhists,
+      as_smoothed_dhist,
+      smoothing_window_width = smoothing_window_width
+    )
 
-  expected_post_norm_smoothing_windows <- purrr::map_dbl(rand_dhists_smoothed, function(dhist) {
-    smoothing_window_width / dhist_std(dhist)
-  })
+    expected_post_norm_smoothing_windows <- purrr::map_dbl(
+      rand_dhists_smoothed,
+      function(dhist) {
+        smoothing_window_width / dhist_std(dhist)
+      }
+    )
 
-  actual_dhist_unsmoothed <- purrr::map(rand_dhists_unsmoothed, function(dhist) {
-    normalise_dhist_variance(dhist)
-  })
-  actual_dhist_smoothed <- purrr::map(rand_dhists_smoothed, function(dhist) {
-    normalise_dhist_variance(dhist)
-  })
-  expected_variance <- 1
-  # Check variance of normalised hostograms is as expected
-  purrr::walk(actual_dhist_unsmoothed, function(dhist) {
-    expect_equal(dhist_variance(dhist), expected_variance)
-  })
-  purrr::walk(actual_dhist_smoothed, function(dhist) {
-    expect_equal(dhist_variance(dhist), expected_variance)
-  })
-  # Check smoothing window is as expected (0 for unsmoothe; smoothing_window_width/sigma for smoothed)
-  purrr::walk(actual_dhist_unsmoothed, function(dhist) {
-    expect_equal(dhist$smoothing_window_width, 0)
-  })
-  purrr::walk2(
-    actual_dhist_smoothed, expected_post_norm_smoothing_windows,
-    function(dhist, sww) {
-      expect_equal(dhist$smoothing_window_width, sww)
-    }
-  )
-  # Check masses unaltered
-  purrr::walk2(
-    actual_dhist_unsmoothed, rand_dhists_unsmoothed,
-    function(actual, expected) {
-      expect_equal(actual$masses, expected$masses)
-    }
-  )
-  purrr::walk2(
-    actual_dhist_smoothed, rand_dhists_smoothed,
-    function(actual, expected) {
-      expect_equal(actual$masses, expected$masses)
+    actual_dhist_unsmoothed <- purrr::map(
+      rand_dhists_unsmoothed,
+      function(dhist) {
+        normalise_dhist_variance(dhist)
+      }
+    )
+    actual_dhist_smoothed <- purrr::map(rand_dhists_smoothed, function(dhist) {
+      normalise_dhist_variance(dhist)
+    })
+    expected_variance <- 1
+    # Check variance of normalised hostograms is as expected
+    purrr::walk(actual_dhist_unsmoothed, function(dhist) {
+      expect_equal(dhist_variance(dhist), expected_variance)
+    })
+    purrr::walk(actual_dhist_smoothed, function(dhist) {
+      expect_equal(dhist_variance(dhist), expected_variance)
+    })
+    # Check smoothing window is as expected (0 for unsmoothe; smoothing_window
+    # width/sigma for smoothed)
+    purrr::walk(actual_dhist_unsmoothed, function(dhist) {
+      expect_equal(dhist$smoothing_window_width, 0)
+    })
+    purrr::walk2(
+      actual_dhist_smoothed, expected_post_norm_smoothing_windows,
+      function(dhist, sww) {
+        expect_equal(dhist$smoothing_window_width, sww)
+      }
+    )
+    # Check masses unaltered
+    purrr::walk2(
+      actual_dhist_unsmoothed, rand_dhists_unsmoothed,
+      function(actual, expected) {
+        expect_equal(actual$masses, expected$masses)
+      }
+    )
+    purrr::walk2(
+      actual_dhist_smoothed, rand_dhists_smoothed,
+      function(actual, expected) {
+        expect_equal(actual$masses, expected$masses)
+      }
+    )
+  }
+)
+
+test_that(
+  paste(
+    "normalise_histogram_variance output has variance of 1 for normal",
+    "histograms"
+  ),
+  {
+    num_hists <- 5
+    num_bins <- 100001
+
+    mus <- runif(num_hists, -10, 10)
+    sigmas <- runif(num_hists, 0, 10)
+
+    rand_locations <- function(mu, sigma) {
+      return(seq(mu - 5 * sigma, mu + 5 * sigma, length.out = num_bins))
     }
-  )
-})
 
-test_that("normalise_histogram_variance output has variance of 1 for normal histograms", {
-  num_hists <- 5
-  num_bins <- 100001
-
-  mus <- runif(num_hists, -10, 10)
-  sigmas <- runif(num_hists, 0, 10)
-
-  rand_locations <- function(mu, sigma) {
-    return(seq(mu - 5 * sigma, mu + 5 * sigma, length.out = num_bins))
+    rand_dhists <- purrr::map2(mus, sigmas, function(mu, sigma) {
+      locations <- rand_locations(mu, sigma)
+      masses <- dnorm(locations, mean = mu, sd = sigma)
+      return(dhist(masses = masses, locations = locations))
+    })
+
+    actuals <- purrr::map(rand_dhists, function(dhist) {
+      dhist_variance(normalise_dhist_variance(dhist))
+    })
+    expected <- 1
+    purrr::map_dbl(actuals, function(actual) {
+      expect_equal(actual, expected)
+    })
   }
-
-  rand_dhists <- purrr::map2(mus, sigmas, function(mu, sigma) {
-    locations <- rand_locations(mu, sigma)
-    masses <- dnorm(locations, mean = mu, sd = sigma)
-    return(dhist(masses = masses, locations = locations))
-  })
-
-  actuals <- purrr::map(rand_dhists, function(dhist) {
-    dhist_variance(normalise_dhist_variance(dhist))
-  })
-  expected <- 1
-  purrr::map_dbl(actuals, function(actual) {
-    expect_equal(actual, expected)
-  })
-})
+)
 
 context("dhist: Sort dhist")
 test_that("sort_dhist works", {
   # NOTE: Need to construct dhist objects explicitly as the dhist constructor
   # now returns a sorted dhist and we want to be independent of this
-  dhist1 <- list(locations = c(7, 42, 1, 21, 101, 9), masses = c(15, 12, 16, 13, 11, 14))
+  dhist1 <- list(
+    locations = c(7, 42, 1, 21, 101, 9),
+    masses = c(15, 12, 16, 13, 11, 14)
+  )
   class(dhist1) <- "dhist"
-  dhist2 <- list(locations = c(3, 0, -62, 7, 16, -58), masses = c(23, 24, 26, 22, 21, 25))
+  dhist2 <- list(
+    locations = c(3, 0, -62, 7, 16, -58),
+    masses = c(23, 24, 26, 22, 21, 25)
+  )
   class(dhist2) <- "dhist"
 
-  expected1 <- list(locations = c(1, 7, 9, 21, 42, 101), masses = c(16, 15, 14, 13, 12, 11))
+  expected1 <- list(
+    locations = c(1, 7, 9, 21, 42, 101),
+    masses = c(16, 15, 14, 13, 12, 11)
+  )
   class(expected1) <- "dhist"
-  expected2 <- list(locations = c(-62, -58, 0, 3, 7, 16), masses = c(26, 25, 24, 23, 22, 21))
+  expected2 <- list(
+    locations = c(-62, -58, 0, 3, 7, 16),
+    masses = c(26, 25, 24, 23, 22, 21)
+  )
   class(expected2) <- "dhist"
 
   actual1 <- sort_dhist(dhist1)
@@ -524,174 +630,269 @@ test_that("sort_dhist works", {
 })
 
 context("dhist: ECMF")
-test_that("dhist_ecmf returns correct step function when smoothing_window_width is zero", {
-  dhist1 <- dhist(locations = c(1, 2, 4, 7, 11, 16, 22), masses = c(21, 22, 23, 27, 31, 36, 42))
-  dhist1_unsmoothed <- as_unsmoothed_dhist(dhist1)
-
-  ecmf1 <- dhist_ecmf(dhist1)
-  actual_knots1 <- ecmf_knots(ecmf1)
-  actual_knots_ecds1 <- ecmf1(actual_knots1)
-  inter_knots_x <- head(actual_knots1, length(actual_knots1) - 1)
-  actual_inter_knots_ecds1 <- ecmf1(inter_knots_x)
-  extra_knots <- c(actual_knots1[1] - 1, actual_knots1[length(actual_knots1)] + 1)
-  actual_extra_knots_ecds1 <- ecmf1(extra_knots)
-
-  cum_masses1 <- cumsum(dhist1$masses)
-  max_cum_mass <- cum_masses1[length(cum_masses1)]
-  expected_knots_ecds1 <- cum_masses1
-  expected_inter_knots_ecds1 <- head(expected_knots_ecds1, length(expected_knots_ecds1) - 1)
-  expected_extra_knots_ecds1 <- c(0, max_cum_mass)
-  expected_knots1 <- dhist1$locations
-
-  expect_equal(actual_knots1, expected_knots1)
-  expect_equal(actual_knots_ecds1, expected_knots_ecds1)
-  expect_equal(actual_inter_knots_ecds1, expected_inter_knots_ecds1)
-  expect_equal(actual_extra_knots_ecds1, expected_extra_knots_ecds1)
-})
+test_that(
+  paste(
+    "dhist_ecmf returns correct step function when smoothing_window_width is",
+    "zero"
+  ),
+  {
+    dhist1 <- dhist(
+      locations = c(1, 2, 4, 7, 11, 16, 22),
+      masses = c(21, 22, 23, 27, 31, 36, 42)
+    )
+    dhist1_unsmoothed <- as_unsmoothed_dhist(dhist1)
+
+    ecmf1 <- dhist_ecmf(dhist1)
+    actual_knots1 <- ecmf_knots(ecmf1)
+    actual_knots_ecds1 <- ecmf1(actual_knots1)
+    inter_knots_x <- head(actual_knots1, length(actual_knots1) - 1)
+    actual_inter_knots_ecds1 <- ecmf1(inter_knots_x)
+    extra_knots <- c(
+      actual_knots1[1] - 1,
+      actual_knots1[length(actual_knots1)] + 1
+    )
+    actual_extra_knots_ecds1 <- ecmf1(extra_knots)
+
+    cum_masses1 <- cumsum(dhist1$masses)
+    max_cum_mass <- cum_masses1[length(cum_masses1)]
+    expected_knots_ecds1 <- cum_masses1
+    expected_inter_knots_ecds1 <- head(
+      expected_knots_ecds1,
+      length(expected_knots_ecds1) - 1
+    )
+    expected_extra_knots_ecds1 <- c(0, max_cum_mass)
+    expected_knots1 <- dhist1$locations
+
+    expect_equal(actual_knots1, expected_knots1)
+    expect_equal(actual_knots_ecds1, expected_knots_ecds1)
+    expect_equal(actual_inter_knots_ecds1, expected_inter_knots_ecds1)
+    expect_equal(actual_extra_knots_ecds1, expected_extra_knots_ecds1)
+  }
+)
 
 context("dhist: Area between ECMFs (simple integer dhists)")
-test_that("area_between_dhist_ecmfs returns correct value for simple integer dhists", {
-  # Example dhists constructed by hand to result in lots of "bowtie" segments
-  # for smoothed ECMFs and to allow expected areas to be calculated by hand
-  # Unsmoothed locations are on an integer grid, smoothed bin edges are on a
-  # half-integer grid
-  # Smoothed and unsmoothed ECMF cumulative masses are on integer grid
-  # Smoothed ECMF crossing points are on a quarter-integer grid
-  dhistA <- dhist(locations = c(1, 3, 4), masses = c(2, 1, 1))
-  dhistB <- dhist(locations = c(0, 2, 4, 5), masses = c(0.5, 2, 0.5, 1))
-
-  # Set up smoothed and unsmoothed versions of histograms
-  smoothing_window_width <- 1
-  dhistA_unsmoothed <- as_unsmoothed_dhist(dhistA)
-  dhistB_unsmoothed <- as_unsmoothed_dhist(dhistB)
-  dhistA_smoothed <- as_smoothed_dhist(dhistA, smoothing_window_width)
-  dhistB_smoothed <- as_smoothed_dhist(dhistB, smoothing_window_width)
-
-  # Set expected area
-  expected_area_unsmoothed <- 4
-  expected_area_smoothed <- 3
-
-  # Generate ecmfs
-  ecmfA_unsmoothed <- dhist_ecmf(dhistA_unsmoothed)
-  ecmfB_unsmoothed <- dhist_ecmf(dhistB_unsmoothed)
-  ecmfA_smoothed <- dhist_ecmf(dhistA_smoothed)
-  ecmfB_smoothed <- dhist_ecmf(dhistB_smoothed)
-
-  # Calculate area between ECMFs
-  actual_area_unsmoothed <- area_between_dhist_ecmfs(ecmfA_unsmoothed, ecmfB_unsmoothed)
-  actual_area_smoothed <- area_between_dhist_ecmfs(ecmfA_smoothed, ecmfB_smoothed)
-
-  # Compare caculated areas with expected areas
-  expect_equal(actual_area_unsmoothed, expected_area_unsmoothed)
-  expect_equal(actual_area_smoothed, expected_area_smoothed)
-})
+test_that(
+  paste(
+    "area_between_dhist_ecmfs returns correct value for simple integer dhists"
+  ),
+  {
+    # Example dhists constructed by hand to result in lots of "bowtie" segments
+    # for smoothed ECMFs and to allow expected areas to be calculated by hand
+    # Unsmoothed locations are on an integer grid, smoothed bin edges are on a
+    # half-integer grid
+    # Smoothed and unsmoothed ECMF cumulative masses are on integer grid
+    # Smoothed ECMF crossing points are on a quarter-integer grid
+    dhistA <- dhist(locations = c(1, 3, 4), masses = c(2, 1, 1))
+    dhistB <- dhist(locations = c(0, 2, 4, 5), masses = c(0.5, 2, 0.5, 1))
+
+    # Set up smoothed and unsmoothed versions of histograms
+    smoothing_window_width <- 1
+    dhistA_unsmoothed <- as_unsmoothed_dhist(dhistA)
+    dhistB_unsmoothed <- as_unsmoothed_dhist(dhistB)
+    dhistA_smoothed <- as_smoothed_dhist(dhistA, smoothing_window_width)
+    dhistB_smoothed <- as_smoothed_dhist(dhistB, smoothing_window_width)
+
+    # Set expected area
+    expected_area_unsmoothed <- 4
+    expected_area_smoothed <- 3
+
+    # Generate ecmfs
+    ecmfA_unsmoothed <- dhist_ecmf(dhistA_unsmoothed)
+    ecmfB_unsmoothed <- dhist_ecmf(dhistB_unsmoothed)
+    ecmfA_smoothed <- dhist_ecmf(dhistA_smoothed)
+    ecmfB_smoothed <- dhist_ecmf(dhistB_smoothed)
+
+    # Calculate area between ECMFs
+    actual_area_unsmoothed <- area_between_dhist_ecmfs(
+      ecmfA_unsmoothed,
+      ecmfB_unsmoothed
+    )
+    actual_area_smoothed <- area_between_dhist_ecmfs(
+      ecmfA_smoothed,
+      ecmfB_smoothed
+    )
 
-context("dhist: Area between ECMFs (non-integer normalised dhists)")
-test_that("area_between_dhist_ecmfs returns correct value for non-integer normalised dhists", {
+    # Compare caculated areas with expected areas
+    expect_equal(actual_area_unsmoothed, expected_area_unsmoothed)
+    expect_equal(actual_area_smoothed, expected_area_smoothed)
+  }
+)
 
-  # Previous simple integer grid where both histograms have been separately
-  # normalised to unit mass and variance. Has locations and masses at a range
-  # of floating point locations. Has bowties, triangles and trapeziums.
-  dhistA <- dhist(locations = c(1, 3, 4), masses = c(2, 1, 1))
-  dhistB <- dhist(locations = c(0, 2, 4, 5), masses = c(0.5, 2, 0.5, 1))
+context("dhist: Area between ECMFs (non-integer normalised dhists)")
+test_that(
+  paste(
+    "area_between_dhist_ecmfs returns correct value for non-integer normalised",
+    "dhists"
+  ),
+  {
+
+    # Previous simple integer grid where both histograms have been separately
+    # normalised to unit mass and variance. Has locations and masses at a range
+    # of floating point locations. Has bowties, triangles and trapeziums.
+    dhistA <- dhist(locations = c(1, 3, 4), masses = c(2, 1, 1))
+    dhistB <- dhist(locations = c(0, 2, 4, 5), masses = c(0.5, 2, 0.5, 1))
+
+    dhistA <- normalise_dhist_mass(normalise_dhist_variance(dhistA))
+    dhistB <- normalise_dhist_mass(normalise_dhist_variance(dhistB))
+
+    # Set up smoothed and unsmoothed versions of histograms
+    smoothing_window_width <- 1
+    dhistA_unsmoothed <- as_unsmoothed_dhist(dhistA)
+    dhistB_unsmoothed <- as_unsmoothed_dhist(dhistB)
+    dhistA_smoothed <- as_smoothed_dhist(dhistA, smoothing_window_width)
+    dhistB_smoothed <- as_smoothed_dhist(dhistB, smoothing_window_width)
+
+    # Generate ecmfs
+    ecmfA_unsmoothed <- dhist_ecmf(dhistA_unsmoothed)
+    ecmfB_unsmoothed <- dhist_ecmf(dhistB_unsmoothed)
+    ecmfA_smoothed <- dhist_ecmf(dhistA_smoothed)
+    ecmfB_smoothed <- dhist_ecmf(dhistB_smoothed)
+
+    # Define some functions to make calculation of manually measured areas
+    # easier
+    rectangle_area <- function(width, height) {
+      return(width * height)
+    }
+    triangle_area <- function(base, height) {
+      return(0.5 * base * height)
+    }
+    trapezium_area <- function(side_a, side_b, height) {
+      return(0.5 * (side_a + side_b) * height)
+    }
+    # Measurements of expected area between ECMFs done by hand by printing
+    # normalised ECMFs on a grid with x-spacing of 0.02 and y-spacing of 0.01)
+    # Actual grid counts preserved in data to facilitate less tedious manual
+    # checking if required
+    # --- Unsmoothed ---
+    area_A_unsmoothed <- rectangle_area(width = 10 * 0.02, height = 12.5 * 0.01)
+    area_B_unsmoothed <- rectangle_area(width = 50.5 * 0.02, height = 37.5 * 0.01)
+    area_C_unsmoothed <- rectangle_area(width = 26 * 0.02, height = 12.5 * 0.01)
+    area_D_unsmoothed <- rectangle_area(width = 34.5 * 0.02, height = 12.5 * 0.01)
+    area_E_unsmoothed <- rectangle_area(width = 26.5 * 0.02, height = 25 * 0.01)
+    expected_area_unsmoothed <-
+      sum(
+        area_A_unsmoothed, area_B_unsmoothed, area_C_unsmoothed,
+        area_D_unsmoothed, area_E_unsmoothed
+      )
+    # --- Smoothed ---
+    area_A_smoothed <- triangle_area(base = 2.75 * 0.01, height = 6.5 * 0.02)
+    area_B_smoothed <- triangle_area(base = 2.75 * 0.01, height = 3 * 0.02)
+    area_C_smoothed <- triangle_area(base = 18.5 * 0.01, height = 21 * 0.02)
+    area_D_smoothed <- trapezium_area(
+      side_a = 18.5 * 0.01,
+      side_b = 37.5 * 0.01,
+      height = 14.5 * 0.02
+    )
+    area_E_smoothed <- trapezium_area(
+      side_a = 37.5 * 0.01,
+      side_b = 37.5 * 0.01,
+      height = 16 * 0.02
+    )
+    area_F_smoothed <- triangle_area(base = 37.5 * 0.01, height = 22.5 * 0.02)
+    area_G_smoothed <- triangle_area(base = 7.5 * 0.01, height = 8 * 0.02)
+    area_H_smoothed <- triangle_area(base = 7.5 * 0.01, height = 11 * 0.02)
+    area_I_smoothed <- triangle_area(base = 12.5 * 0.01, height = 19.5 * 0.02)
+    area_J_smoothed <- trapezium_area(
+      side_a = 12.5 * 0.01,
+      side_b = 20 * 0.01,
+      height = 30.5 * 0.02
+    )
+    area_K_smoothed <- trapezium_area(
+      side_a = 20 * 0.01,
+      side_b = 18 * 0.01,
+      height = 8 * 0.02
+    )
+    area_L_smoothed <- triangle_area(base = 18 * 0.01, height = 22 * 0.02)
+    expected_area_smoothed <-
+      sum(
+        area_A_smoothed, area_B_smoothed, area_C_smoothed, area_D_smoothed,
+        area_E_smoothed, area_F_smoothed, area_G_smoothed, area_H_smoothed,
+        area_I_smoothed, area_J_smoothed, area_K_smoothed, area_L_smoothed
+      )
+
+    # Calculate area between ECMFs
+    actual_area_unsmoothed <- area_between_dhist_ecmfs(
+      ecmfA_unsmoothed,
+      ecmfB_unsmoothed
+    )
+    actual_area_smoothed <- area_between_dhist_ecmfs(
+      ecmfA_smoothed,
+      ecmfB_smoothed
+    )
 
-  dhistA <- normalise_dhist_mass(normalise_dhist_variance(dhistA))
-  dhistB <- normalise_dhist_mass(normalise_dhist_variance(dhistB))
+    # Compare caculated areas with expected areas
+    expect_equalish_manual <- function(actual, expected, relative_tolerance) {
+      relative_diff <- abs(actual - expected) / expected
+      expect_lte(relative_diff, relative_tolerance)
+    }
 
-  # Set up smoothed and unsmoothed versions of histograms
-  smoothing_window_width <- 1
-  dhistA_unsmoothed <- as_unsmoothed_dhist(dhistA)
-  dhistB_unsmoothed <- as_unsmoothed_dhist(dhistB)
-  dhistA_smoothed <- as_smoothed_dhist(dhistA, smoothing_window_width)
-  dhistB_smoothed <- as_smoothed_dhist(dhistB, smoothing_window_width)
-
-  # Generate ecmfs
-  ecmfA_unsmoothed <- dhist_ecmf(dhistA_unsmoothed)
-  ecmfB_unsmoothed <- dhist_ecmf(dhistB_unsmoothed)
-  ecmfA_smoothed <- dhist_ecmf(dhistA_smoothed)
-  ecmfB_smoothed <- dhist_ecmf(dhistB_smoothed)
-
-  # Define some functions to make calculation of manually measured areas easier
-  rectangle_area <- function(width, height) {
-    return(width * height)
-  }
-  triangle_area <- function(base, height) {
-    return(0.5 * base * height)
-  }
-  trapezium_area <- function(side_a, side_b, height) {
-    return(0.5 * (side_a + side_b) * height)
+    # Given manual measurement of areas between curves, consider area correct
+    # if actual and expected areas are within 1% of each other
+    expect_equalish_manual(actual_area_unsmoothed, expected_area_unsmoothed, 0.01)
+    expect_equalish_manual(actual_area_smoothed, expected_area_smoothed, 0.01)
   }
-  # Measurements of expected area between ECMFs done by hand by printing
-  # normalised ECMFs on a grid with x-spacing of 0.02 and y-spacing of 0.01)
-  # Actual grid counts preserved in data to facilitate less tedious manual
-  # checking if required
-  # --- Unsmoothed ---
-  area_A_unsmoothed <- rectangle_area(width = 10 * 0.02, height = 12.5 * 0.01)
-  area_B_unsmoothed <- rectangle_area(width = 50.5 * 0.02, height = 37.5 * 0.01)
-  area_C_unsmoothed <- rectangle_area(width = 26 * 0.02, height = 12.5 * 0.01)
-  area_D_unsmoothed <- rectangle_area(width = 34.5 * 0.02, height = 12.5 * 0.01)
-  area_E_unsmoothed <- rectangle_area(width = 26.5 * 0.02, height = 25 * 0.01)
-  expected_area_unsmoothed <-
-    sum(
-      area_A_unsmoothed, area_B_unsmoothed, area_C_unsmoothed,
-      area_D_unsmoothed, area_E_unsmoothed
-    )
-  # --- Smoothed ---
-  area_A_smoothed <- triangle_area(base = 2.75 * 0.01, height = 6.5 * 0.02)
-  area_B_smoothed <- triangle_area(base = 2.75 * 0.01, height = 3 * 0.02)
-  area_C_smoothed <- triangle_area(base = 18.5 * 0.01, height = 21 * 0.02)
-  area_D_smoothed <- trapezium_area(side_a = 18.5 * 0.01, side_b = 37.5 * 0.01, height = 14.5 * 0.02)
-  area_E_smoothed <- trapezium_area(side_a = 37.5 * 0.01, side_b = 37.5 * 0.01, height = 16 * 0.02)
-  area_F_smoothed <- triangle_area(base = 37.5 * 0.01, height = 22.5 * 0.02)
-  area_G_smoothed <- triangle_area(base = 7.5 * 0.01, height = 8 * 0.02)
-  area_H_smoothed <- triangle_area(base = 7.5 * 0.01, height = 11 * 0.02)
-  area_I_smoothed <- triangle_area(base = 12.5 * 0.01, height = 19.5 * 0.02)
-  area_J_smoothed <- trapezium_area(side_a = 12.5 * 0.01, side_b = 20 * 0.01, height = 30.5 * 0.02)
-  area_K_smoothed <- trapezium_area(side_a = 20 * 0.01, side_b = 18 * 0.01, height = 8 * 0.02)
-  area_L_smoothed <- triangle_area(base = 18 * 0.01, height = 22 * 0.02)
-  expected_area_smoothed <-
-    sum(
-      area_A_smoothed, area_B_smoothed, area_C_smoothed, area_D_smoothed,
-      area_E_smoothed, area_F_smoothed, area_G_smoothed, area_H_smoothed,
-      area_I_smoothed, area_J_smoothed, area_K_smoothed, area_L_smoothed
-    )
-
-  # Calculate area between ECMFs
-  actual_area_unsmoothed <- area_between_dhist_ecmfs(ecmfA_unsmoothed, ecmfB_unsmoothed)
-  actual_area_smoothed <- area_between_dhist_ecmfs(ecmfA_smoothed, ecmfB_smoothed)
-
-  # Compare caculated areas with expected areas
-  expect_equalish_manual <- function(actual, expected, relative_tolerance) {
-    relative_diff <- abs(actual - expected) / expected
-    expect_lte(relative_diff, relative_tolerance)
-  }
-
-  # Given manual measurement of areas between curves, consider area correct
-  # if actual and expected areas are within 1% of each other
-  expect_equalish_manual(actual_area_unsmoothed, expected_area_unsmoothed, 0.01)
-  expect_equalish_manual(actual_area_smoothed, expected_area_smoothed, 0.01)
-})
+)
 
 context("dhist: Harmonise dhist locations")
 test_that("harmonise_dhist_locations works A", {
-  dhist1 <- dhist(masses = c(11, 12, 13), locations = c(1, 3, 5), smoothing_window_width = 1, sorted = FALSE)
-  dhist2 <- dhist(masses = c(21, 22, 23), locations = c(2, 4, 6), smoothing_window_width = 1, sorted = FALSE)
+  dhist1 <- dhist(
+    masses = c(11, 12, 13),
+    locations = c(1, 3, 5),
+    smoothing_window_width = 1,
+    sorted = FALSE
+  )
+  dhist2 <- dhist(
+    masses = c(21, 22, 23),
+    locations = c(2, 4, 6),
+    smoothing_window_width = 1,
+    sorted = FALSE
+  )
 
   expected <- list(
-    dhist1 = dhist(masses = c(11, 12, 13, 0, 0, 0), locations = c(1, 3, 5, 2, 4, 6), smoothing_window_width = 1, sorted = FALSE),
-    dhist2 = dhist(masses = c(21, 22, 23, 0, 0, 0), locations = c(2, 4, 6, 1, 3, 5), smoothing_window_width = 1, sorted = FALSE)
+    dhist1 = dhist(
+      masses = c(11, 12, 13, 0, 0, 0),
+      locations = c(1, 3, 5, 2, 4, 6),
+      smoothing_window_width = 1,
+      sorted = FALSE
+    ),
+    dhist2 = dhist(
+      masses = c(21, 22, 23, 0, 0, 0),
+      locations = c(2, 4, 6, 1, 3, 5),
+      smoothing_window_width = 1,
+      sorted = FALSE
+    )
   )
   actual <- harmonise_dhist_locations(dhist1, dhist2)
   expect_equal(actual, expected)
 })
 
 test_that("harmonise_dhist_locations works B", {
-  dhist1 <- dhist(masses = c(1, 1, 1), locations = c(1, 3, 5), smoothing_window_width = 1, sorted = FALSE)
-  dhist2 <- dhist(masses = c(1, 1, 1), locations = c(4, 5, 6), smoothing_window_width = 1, sorted = FALSE)
+  dhist1 <- dhist(
+    masses = c(1, 1, 1),
+    locations = c(1, 3, 5),
+    smoothing_window_width = 1,
+    sorted = FALSE
+  )
+  dhist2 <- dhist(
+    masses = c(1, 1, 1),
+    locations = c(4, 5, 6),
+    smoothing_window_width = 1,
+    sorted = FALSE
+  )
 
   expected <- list(
-    dhist1 = dhist(masses = c(1, 1, 1, 0, 0), locations = c(1, 3, 5, 4, 6), smoothing_window_width = 1, sorted = FALSE),
-    dhist2 = dhist(masses = c(1, 1, 1, 0, 0), locations = c(4, 5, 6, 1, 3), smoothing_window_width = 1, sorted = FALSE)
+    dhist1 = dhist(
+      masses = c(1, 1, 1, 0, 0),
+      locations = c(1, 3, 5, 4, 6),
+      smoothing_window_width = 1,
+      sorted = FALSE
+    ),
+    dhist2 = dhist(
+      masses = c(1, 1, 1, 0, 0),
+      locations = c(4, 5, 6, 1, 3),
+      smoothing_window_width = 1,
+      sorted = FALSE
+    )
   )
 
   actual <- harmonise_dhist_locations(dhist1, dhist2)
diff --git a/tests/testthat/test_emd.R b/tests/testthat/test_emd.R
index 9db131ee..3c782aec 100644
--- a/tests/testthat/test_emd.R
+++ b/tests/testthat/test_emd.R
@@ -1,14 +1,17 @@
 context("EMD: Cost matrix")
 # COST_MATRIX: Property-based tests
-test_that("cost_matrix returns all zeros when all bin locations are identical", {
-  bin_centres1 <- c(1, 1, 1, 1, 1, 1, 1)
-  bin_centres2 <- bin_centres1
-  expected <- matrix(0,
-    nrow = length(bin_centres1),
-    ncol = length(bin_centres2)
-  )
-  expect_equal(cost_matrix(bin_centres1, bin_centres2), expected)
-})
+test_that(
+  "cost_matrix returns all zeros when all bin locations are identical",
+  {
+    bin_centres1 <- c(1, 1, 1, 1, 1, 1, 1)
+    bin_centres2 <- bin_centres1
+    expected <- matrix(0,
+      nrow = length(bin_centres1),
+      ncol = length(bin_centres2)
+    )
+    expect_equal(cost_matrix(bin_centres1, bin_centres2), expected)
+  }
+)
 
 test_that("cost_matrix returns zeros along diagonal when both sets of bin
           locations are the same", {
diff --git a/tests/testthat/test_graph_binning.R b/tests/testthat/test_graph_binning.R
index b54d4796..f2bbf28d 100644
--- a/tests/testthat/test_graph_binning.R
+++ b/tests/testthat/test_graph_binning.R
@@ -1,29 +1,46 @@
 context("Graph binning: Adaptive binning")
-test_that("adaptive_breaks merges 2 lowest bins where only first bin is below minimum", {
-  min_count <- 5
-  x <- c(
-    1.5, rep(2.2, min_count), rep(3.5, min_count), rep(4.5, min_count),
-    rep(5.5, min_count), rep(6.5, min_count + 1)
-  )
-  initial_breaks <- 1:7
-  final_breaks_actual <- adaptive_breaks(x, min_count = min_count, breaks = initial_breaks)
-  final_breaks_expected <- c(1, 3, 4, 5, 6, 7)
-
-  expect_equal(final_breaks_actual, final_breaks_expected)
-})
+test_that(
+  "adaptive_breaks merges 2 lowest bins where only first bin is below minimum",
+  {
+    min_count <- 5
+    x <- c(
+      1.5, rep(2.2, min_count), rep(3.5, min_count), rep(4.5, min_count),
+      rep(5.5, min_count), rep(6.5, min_count + 1)
+    )
+    initial_breaks <- 1:7
+    final_breaks_actual <- adaptive_breaks(
+      x,
+      min_count = min_count,
+      breaks = initial_breaks
+    )
+    final_breaks_expected <- c(1, 3, 4, 5, 6, 7)
 
-test_that("adaptive_breaks merges 3 lowest bins where lowest 2 combined are below minimum", {
-  min_count <- 5
-  x <- c(
-    1.5, rep(2.2, 2), rep(3.5, min_count), rep(4.5, min_count),
-    rep(5.5, min_count), rep(6.5, min_count + 1)
-  )
-  initial_breaks <- 1:7
-  final_breaks_actual <- adaptive_breaks(x, min_count = min_count, breaks = initial_breaks)
-  final_breaks_expected <- c(1, 4, 5, 6, 7)
+    expect_equal(final_breaks_actual, final_breaks_expected)
+  }
+)
+
+test_that(
+  paste(
+    "adaptive_breaks merges 3 lowest bins where lowest 2 combined are below",
+    "minimum"
+  ),
+  {
+    min_count <- 5
+    x <- c(
+      1.5, rep(2.2, 2), rep(3.5, min_count), rep(4.5, min_count),
+      rep(5.5, min_count), rep(6.5, min_count + 1)
+    )
+    initial_breaks <- 1:7
+    final_breaks_actual <- adaptive_breaks(
+      x,
+      min_count = min_count,
+      breaks = initial_breaks
+    )
+    final_breaks_expected <- c(1, 4, 5, 6, 7)
 
-  expect_equal(final_breaks_actual, final_breaks_expected)
-})
+    expect_equal(final_breaks_actual, final_breaks_expected)
+  }
+)
 
 test_that("adaptive_breaks merges pair of bins in middle", {
   min_count <- 5
@@ -32,7 +49,11 @@ test_that("adaptive_breaks merges pair of bins in middle", {
     rep(5.5, min_count), rep(6.5, min_count + 1)
   )
   initial_breaks <- 1:7
-  final_breaks_actual <- adaptive_breaks(x, min_count = min_count, breaks = initial_breaks)
+  final_breaks_actual <- adaptive_breaks(
+    x,
+    min_count = min_count,
+    breaks = initial_breaks
+  )
   final_breaks_expected <- c(1, 2, 3, 5, 6, 7)
 
   expect_equal(final_breaks_actual, final_breaks_expected)
@@ -45,57 +66,92 @@ test_that("adaptive_breaks merges two spearated pairs of bins in middle", {
     rep(5.5, 3), rep(6.5, 2), rep(7.8, min_count)
   )
   initial_breaks <- 1:8
-  final_breaks_actual <- adaptive_breaks(x, min_count = min_count, breaks = initial_breaks)
+  final_breaks_actual <- adaptive_breaks(
+    x,
+    min_count = min_count,
+    breaks = initial_breaks
+  )
   final_breaks_expected <- c(1, 2, 4, 5, 7, 8)
 
   expect_equal(final_breaks_actual, final_breaks_expected)
 })
 
-test_that("adaptive_breaks merges 2 uppermost bins where both are below minimum", {
-  min_count <- 5
-  x <- c(
-    rep(1.5, min_count), rep(2.2, min_count), rep(3.5, min_count),
-    rep(4.5, min_count), rep(5.5, 2), rep(6.5, 3)
-  )
-  initial_breaks <- 1:7
-  final_breaks_actual <- adaptive_breaks(x, min_count = min_count, breaks = initial_breaks)
-  final_breaks_expected <- c(1, 2, 3, 4, 5, 7)
-
-  expect_equal(final_breaks_actual, final_breaks_expected)
-})
+test_that(
+  "adaptive_breaks merges 2 uppermost bins where both are below minimum",
+  {
+    min_count <- 5
+    x <- c(
+      rep(1.5, min_count), rep(2.2, min_count), rep(3.5, min_count),
+      rep(4.5, min_count), rep(5.5, 2), rep(6.5, 3)
+    )
+    initial_breaks <- 1:7
+    final_breaks_actual <- adaptive_breaks(
+      x,
+      min_count = min_count,
+      breaks = initial_breaks
+    )
+    final_breaks_expected <- c(1, 2, 3, 4, 5, 7)
 
-test_that("adaptive_breaks merges 2 uppermost bins where only last bin is below minimum", {
-  min_count <- 5
-  x <- c(
-    rep(1.5, min_count), rep(2.2, min_count), rep(3.5, min_count),
-    rep(4.5, min_count), rep(5.5, min_count), rep(6.5, 3)
-  )
-  initial_breaks <- 1:7
-  final_breaks_actual <- adaptive_breaks(x, min_count = min_count, breaks = initial_breaks)
-  final_breaks_expected <- c(1, 2, 3, 4, 5, 7)
+    expect_equal(final_breaks_actual, final_breaks_expected)
+  }
+)
+
+test_that(
+  paste(
+    "adaptive_breaks merges 2 uppermost bins where only last bin is below",
+    "minimum"
+  ),
+  {
+    min_count <- 5
+    x <- c(
+      rep(1.5, min_count), rep(2.2, min_count), rep(3.5, min_count),
+      rep(4.5, min_count), rep(5.5, min_count), rep(6.5, 3)
+    )
+    initial_breaks <- 1:7
+    final_breaks_actual <- adaptive_breaks(
+      x,
+      min_count = min_count,
+      breaks = initial_breaks
+    )
+    final_breaks_expected <- c(1, 2, 3, 4, 5, 7)
 
-  expect_equal(final_breaks_actual, final_breaks_expected)
-})
+    expect_equal(final_breaks_actual, final_breaks_expected)
+  }
+)
 
 test_that("adaptive_breaks merges bins with no members with the next bin", {
   min_count <- 5
   x <- c(rep(1.5, min_count), rep(5.5, min_count), rep(6.5, min_count))
   initial_breaks <- 1:7
-  final_breaks_actual <- adaptive_breaks(x, min_count = min_count, breaks = initial_breaks)
+  final_breaks_actual <- adaptive_breaks(
+    x,
+    min_count = min_count,
+    breaks = initial_breaks
+  )
   final_breaks_expected <- c(1, 2, 6, 7)
 
   expect_equal(final_breaks_actual, final_breaks_expected)
 })
 
-test_that("adaptive_breaks merges 2 bins below minimum, plus the empty bins between them", {
-  min_count <- 5
-  x <- c(rep(1.5, min_count), rep(2.3, 1), rep(5.5, 4), rep(6.5, min_count))
-  initial_breaks <- 1:7
-  final_breaks_actual <- adaptive_breaks(x, min_count = min_count, breaks = initial_breaks)
-  final_breaks_expected <- c(1, 2, 6, 7)
+test_that(
+  paste(
+    "adaptive_breaks merges 2 bins below minimum, plus the empty bins between",
+    "them"
+  ),
+  {
+    min_count <- 5
+    x <- c(rep(1.5, min_count), rep(2.3, 1), rep(5.5, 4), rep(6.5, min_count))
+    initial_breaks <- 1:7
+    final_breaks_actual <- adaptive_breaks(
+      x,
+      min_count = min_count,
+      breaks = initial_breaks
+    )
+    final_breaks_expected <- c(1, 2, 6, 7)
 
-  expect_equal(final_breaks_actual, final_breaks_expected)
-})
+    expect_equal(final_breaks_actual, final_breaks_expected)
+  }
+)
 
 context("Graph binning:  Adaptively binned densities")
 test_that("binned_densities_adaptive works", {
@@ -129,7 +185,20 @@ test_that("binned_densities_adaptive works", {
     expected_interval_indexes = expected_interval_indexes
   )
   # Test 2:
-  densities <- c(0, 0.012, 0.099, 0.201, 0.299, 0.402, 0.49, 0.596, 0.699, 0.803, 0.899, 1.0)
+  densities <- c(
+    0,
+    0.012,
+    0.099,
+    0.201,
+    0.299,
+    0.402,
+    0.49,
+    0.596,
+    0.699,
+    0.803,
+    0.899,
+    1.0
+  )
   min_counts_per_interval <- 2
   num_intervals <- 100
   expected_breaks <- c(0, 0.02, 0.21, 0.41, 0.6, 0.81, 1.0)
diff --git a/tests/testthat/test_measures_net_dis.R b/tests/testthat/test_measures_net_dis.R
index ac1a68c2..39cc1992 100644
--- a/tests/testthat/test_measures_net_dis.R
+++ b/tests/testthat/test_measures_net_dis.R
@@ -65,7 +65,8 @@ test_that(test_message, {
   # === TEST count_graphlet_tuples_ego ===
   # NOTE: This test is not amazing, as graphlet_tuple_counts_ego is very similar
   # to the method under test. However, it's a simple method so maybe that's ok?
-  graphlet_tuple_counts_ego <- function(graphlet_counts_ego, max_graphlet_size) {
+  graphlet_tuple_counts_ego <- function(graphlet_counts_ego,
+                                        max_graphlet_size) {
     t(apply(graphlet_counts_ego, 1,
       count_graphlet_tuples,
       max_graphlet_size = max_graphlet_size
@@ -81,53 +82,130 @@ test_that(test_message, {
 
   # Generate expected tuple counts for graphlets up to size 4 and 5
   # 1. For ego-networks of order 1
-  expected_tuple_count_n11_ego1_gs4 <- graphlet_tuple_counts_ego(graph_n11_ego1, 4)
-  expected_tuple_count_n37_ego1_gs4 <- graphlet_tuple_counts_ego(graph_n37_ego1, 4)
-  expected_tuple_count_n73_ego1_gs4 <- graphlet_tuple_counts_ego(graph_n73_ego1, 4)
-  expected_tuple_count_n11_ego1_gs5 <- graphlet_tuple_counts_ego(graph_n11_ego1, 5)
-  expected_tuple_count_n37_ego1_gs5 <- graphlet_tuple_counts_ego(graph_n37_ego1, 5)
-  expected_tuple_count_n73_ego1_gs5 <- graphlet_tuple_counts_ego(graph_n73_ego1, 5)
+  expected_tuple_count_n11_ego1_gs4 <- graphlet_tuple_counts_ego(
+    graph_n11_ego1, 4
+  )
+  expected_tuple_count_n37_ego1_gs4 <- graphlet_tuple_counts_ego(
+    graph_n37_ego1, 4
+  )
+  expected_tuple_count_n73_ego1_gs4 <- graphlet_tuple_counts_ego(
+    graph_n73_ego1, 4
+  )
+  expected_tuple_count_n11_ego1_gs5 <- graphlet_tuple_counts_ego(
+    graph_n11_ego1, 5
+  )
+  expected_tuple_count_n37_ego1_gs5 <- graphlet_tuple_counts_ego(
+    graph_n37_ego1, 5
+  )
+  expected_tuple_count_n73_ego1_gs5 <- graphlet_tuple_counts_ego(
+    graph_n73_ego1, 5
+  )
   # 2. For ego-networks of order 2
-  expected_tuple_count_n11_ego2_gs4 <- graphlet_tuple_counts_ego(graph_n11_ego2, 4)
-  expected_tuple_count_n37_ego2_gs4 <- graphlet_tuple_counts_ego(graph_n37_ego2, 4)
-  expected_tuple_count_n73_ego2_gs4 <- graphlet_tuple_counts_ego(graph_n73_ego2, 4)
-  expected_tuple_count_n11_ego2_gs5 <- graphlet_tuple_counts_ego(graph_n11_ego2, 5)
-  expected_tuple_count_n37_ego2_gs5 <- graphlet_tuple_counts_ego(graph_n37_ego2, 5)
-  expected_tuple_count_n73_ego2_gs5 <- graphlet_tuple_counts_ego(graph_n73_ego2, 5)
+  expected_tuple_count_n11_ego2_gs4 <- graphlet_tuple_counts_ego(
+    graph_n11_ego2, 4
+  )
+  expected_tuple_count_n37_ego2_gs4 <- graphlet_tuple_counts_ego(
+    graph_n37_ego2, 4
+  )
+  expected_tuple_count_n73_ego2_gs4 <- graphlet_tuple_counts_ego(
+    graph_n73_ego2, 4
+  )
+  expected_tuple_count_n11_ego2_gs5 <- graphlet_tuple_counts_ego(
+    graph_n11_ego2, 5
+  )
+  expected_tuple_count_n37_ego2_gs5 <- graphlet_tuple_counts_ego(
+    graph_n37_ego2, 5
+  )
+  expected_tuple_count_n73_ego2_gs5 <- graphlet_tuple_counts_ego(
+    graph_n73_ego2, 5
+  )
 
   # Calculate actual tuple counts
   # 1. For ego-networks of order 1
-  actual_tuple_count_n11_ego1_gs4 <- count_graphlet_tuples_ego(graph_n11_ego1, 4)
-  actual_tuple_count_n37_ego1_gs4 <- count_graphlet_tuples_ego(graph_n37_ego1, 4)
-  actual_tuple_count_n73_ego1_gs4 <- count_graphlet_tuples_ego(graph_n73_ego1, 4)
-  actual_tuple_count_n11_ego1_gs5 <- count_graphlet_tuples_ego(graph_n11_ego1, 5)
-  actual_tuple_count_n37_ego1_gs5 <- count_graphlet_tuples_ego(graph_n37_ego1, 5)
-  actual_tuple_count_n73_ego1_gs5 <- count_graphlet_tuples_ego(graph_n73_ego1, 5)
+  actual_tuple_count_n11_ego1_gs4 <- count_graphlet_tuples_ego(
+    graph_n11_ego1, 4
+  )
+  actual_tuple_count_n37_ego1_gs4 <- count_graphlet_tuples_ego(
+    graph_n37_ego1, 4
+  )
+  actual_tuple_count_n73_ego1_gs4 <- count_graphlet_tuples_ego(
+    graph_n73_ego1, 4
+  )
+  actual_tuple_count_n11_ego1_gs5 <- count_graphlet_tuples_ego(
+    graph_n11_ego1, 5
+  )
+  actual_tuple_count_n37_ego1_gs5 <- count_graphlet_tuples_ego(
+    graph_n37_ego1, 5
+  )
+  actual_tuple_count_n73_ego1_gs5 <- count_graphlet_tuples_ego(
+    graph_n73_ego1, 5
+  )
   # 2. For ego-networks of order 2
-  actual_tuple_count_n11_ego2_gs4 <- count_graphlet_tuples_ego(graph_n11_ego2, 4)
-  actual_tuple_count_n37_ego2_gs4 <- count_graphlet_tuples_ego(graph_n37_ego2, 4)
-  actual_tuple_count_n73_ego2_gs4 <- count_graphlet_tuples_ego(graph_n73_ego2, 4)
-  actual_tuple_count_n11_ego2_gs5 <- count_graphlet_tuples_ego(graph_n11_ego2, 5)
-  actual_tuple_count_n37_ego2_gs5 <- count_graphlet_tuples_ego(graph_n37_ego2, 5)
-  actual_tuple_count_n73_ego2_gs5 <- count_graphlet_tuples_ego(graph_n73_ego2, 5)
+  actual_tuple_count_n11_ego2_gs4 <- count_graphlet_tuples_ego(
+    graph_n11_ego2, 4
+  )
+  actual_tuple_count_n37_ego2_gs4 <- count_graphlet_tuples_ego(
+    graph_n37_ego2, 4
+  )
+  actual_tuple_count_n73_ego2_gs4 <- count_graphlet_tuples_ego(
+    graph_n73_ego2, 4
+  )
+  actual_tuple_count_n11_ego2_gs5 <- count_graphlet_tuples_ego(
+    graph_n11_ego2, 5
+  )
+  actual_tuple_count_n37_ego2_gs5 <- count_graphlet_tuples_ego(
+    graph_n37_ego2, 5
+  )
+  actual_tuple_count_n73_ego2_gs5 <- count_graphlet_tuples_ego(
+    graph_n73_ego2, 5
+  )
 
   # Compare expected with actual
-  expect_equal(expected_tuple_count_n11_ego1_gs4, actual_tuple_count_n11_ego1_gs4)
-  expect_equal(expected_tuple_count_n37_ego1_gs4, actual_tuple_count_n37_ego1_gs4)
-  expect_equal(expected_tuple_count_n73_ego1_gs4, actual_tuple_count_n73_ego1_gs4)
-  expect_equal(expected_tuple_count_n11_ego1_gs5, actual_tuple_count_n11_ego1_gs5)
-  expect_equal(expected_tuple_count_n37_ego1_gs5, actual_tuple_count_n37_ego1_gs5)
-  expect_equal(expected_tuple_count_n73_ego1_gs5, actual_tuple_count_n73_ego1_gs5)
+  expect_equal(
+    expected_tuple_count_n11_ego1_gs4, actual_tuple_count_n11_ego1_gs4
+  )
+  expect_equal(
+    expected_tuple_count_n37_ego1_gs4, actual_tuple_count_n37_ego1_gs4
+  )
+  expect_equal(
+    expected_tuple_count_n73_ego1_gs4, actual_tuple_count_n73_ego1_gs4
+  )
+  expect_equal(
+    expected_tuple_count_n11_ego1_gs5, actual_tuple_count_n11_ego1_gs5
+  )
+  expect_equal(
+    expected_tuple_count_n37_ego1_gs5, actual_tuple_count_n37_ego1_gs5
+  )
+  expect_equal(
+    expected_tuple_count_n73_ego1_gs5, actual_tuple_count_n73_ego1_gs5
+  )
   # 2. For ego-networks of order 2
-  expect_equal(expected_tuple_count_n11_ego2_gs4, actual_tuple_count_n11_ego2_gs4)
-  expect_equal(expected_tuple_count_n37_ego2_gs4, actual_tuple_count_n37_ego2_gs4)
-  expect_equal(expected_tuple_count_n73_ego2_gs4, actual_tuple_count_n73_ego2_gs4)
-  expect_equal(expected_tuple_count_n11_ego2_gs5, actual_tuple_count_n11_ego2_gs5)
-  expect_equal(expected_tuple_count_n37_ego2_gs5, actual_tuple_count_n37_ego2_gs5)
-  expect_equal(expected_tuple_count_n73_ego2_gs5, actual_tuple_count_n73_ego2_gs5)
+  expect_equal(
+    expected_tuple_count_n11_ego2_gs4, actual_tuple_count_n11_ego2_gs4
+  )
+  expect_equal(
+    expected_tuple_count_n37_ego2_gs4, actual_tuple_count_n37_ego2_gs4
+  )
+  expect_equal(
+    expected_tuple_count_n73_ego2_gs4, actual_tuple_count_n73_ego2_gs4
+  )
+  expect_equal(
+    expected_tuple_count_n11_ego2_gs5, actual_tuple_count_n11_ego2_gs5
+  )
+  expect_equal(
+    expected_tuple_count_n37_ego2_gs5, actual_tuple_count_n37_ego2_gs5
+  )
+  expect_equal(
+    expected_tuple_count_n73_ego2_gs5, actual_tuple_count_n73_ego2_gs5
+  )
 })
 
-context("Measures Netdis: Ego-network density values match those for manually verified networks")
+context(
+  paste(
+    "Measures Netdis: Ego-network density values match those for manually",
+    "verified networks"
+  )
+)
 test_that("Ego-network 4-node density values match manually verified totals", {
   # Set up a small sample network with at least one ego-network that contains
   # at least one of each graphlets
@@ -201,170 +279,188 @@ test_that("Ego-network 4-node density values match manually verified totals", {
   expect_equal(actual_densities_o2, expected_densities_o2)
 })
 
-context("Measures Netdis: Ego-network density-binned reference counts for manually verified networks")
-test_that("Ego-network 4-node density-binned reference counts match manually verified totals", {
-  # Set up a small sample network with at least one ego-network that contains
-  # at least one of each graphlets
-  elist <- rbind(
-    c("n1", "n2"),
-    c("n2", "n3"),
-    c("n1", "n4"),
-    c("n2", "n5"),
-    c("n1", "n6"),
-    c("n1", "n7"),
-    c("n2", "n4"),
-    c("n4", "n6"),
-    c("n6", "n8"),
-    c("n7", "n8"),
-    c("n7", "n9"),
-    c("n7", "n10"),
-    c("n8", "n9"),
-    c("n8", "n10"),
-    c("n9", "n10")
-  )
-  graph <- igraph::graph_from_edgelist(elist, directed = FALSE)
-
-  # Set parameters for test
-  max_graphlet_size <- 4
-  min_counts_per_interval <- 2
-  num_intervals <- 100
-
-  # Set node and graphlet labels to use for row and col names in expected counts
-  node_labels <- igraph::V(graph)$name
-  graphlet_labels <- c("G0", "G1", "G2", "G3", "G4", "G5", "G6", "G7", "G8")
-
-  # Set manually verified ego-network node counts and edge densities
-  # 1 . Ego-networks of order 1
-  expected_node_counts_o1 <- c(5, 5, 2, 4, 2, 4, 5, 5, 4, 4)
-  expected_edge_counts_o1 <- c(6, 5, 1, 5, 1, 4, 7, 7, 6, 6)
-  max_edge_counts_o1 <- choose(expected_node_counts_o1, 2)
-  expected_densities_o1 <- c(expected_edge_counts_o1 / max_edge_counts_o1)
-  # Order 1 expected densities should be:
-  # 0.6, 0.5, 1.0, 0.83, 1.0, 0.67, 0.7, 0.7, 1.0, 1.0
-  # 2. Ego-networks of order 2
-  expected_node_counts_o2 <- c(10, 7, 5, 8, 5, 8, 8, 7, 6, 6)
-  expected_edge_counts_o2 <- c(15, 8, 5, 10, 5, 13, 13, 11, 9, 9)
-  max_edge_counts_o2 <- choose(expected_node_counts_o2, 2)
-  expected_densities_o2 <- c(expected_edge_counts_o2 / max_edge_counts_o2)
-  # Order 2 expected densities should be:
-  # 0.33, 0.38, 0.50, 0.36, 0.50, 0.46, 0.46, 0.52, 0.60, 0.60
-
-  # Set manually verified density bins for ego-networks
-  # 1. Ego-networks of order 1
-  expected_breaks_o1 <- c(0.5, 0.605, 0.705, 1)
-  expected_interval_indexes_o1 <- c(1, 1, 3, 3, 3, 2, 2, 2, 3, 3)
-  expected_binned_densities_o1 <- list(
-    densities = expected_densities_o1,
-    interval_indexes = expected_interval_indexes_o1,
-    breaks = expected_breaks_o1
-  )
-  # Check binned densities are as expected
-  actual_binned_densities_o1 <- binned_densities_adaptive(
-    expected_densities_o1,
-    min_counts_per_interval = min_counts_per_interval,
-    num_intervals = num_intervals
-  )
-  expect_equal(actual_binned_densities_o1, expected_binned_densities_o1)
-  # 2. Ego-networks of order 2
-  expected_min_break_o2 <- 1 / 3
-  expected_max_break_o2 <- 0.6
-  expected_initial_interval_o2 <-
-    (expected_max_break_o2 - expected_min_break_o2) / (num_intervals) # 0.00266666667
-  expected_breaks_o2 <- expected_min_break_o2 + (expected_initial_interval_o2 * c(0, 9, 50, 63, 100))
-  expected_interval_indexes_o2 <- c(1, 2, 3, 1, 3, 2, 2, 4, 4, 4)
-  expected_binned_densities_o2 <- list(
-    densities = expected_densities_o2,
-    interval_indexes = expected_interval_indexes_o2,
-    breaks = expected_breaks_o2
-  )
-  # Check binned densities are as expected
-  actual_binned_densities_o2 <- binned_densities_adaptive(
-    expected_densities_o2,
-    min_counts_per_interval = min_counts_per_interval,
-    num_intervals = num_intervals
-  )
-  expect_equal(actual_binned_densities_o2, expected_binned_densities_o2)
-
-  # Set manually verified scaled ego-network graphlet counts
-  graphlet_key <- graphlet_key(max_graphlet_size)
-  k <- graphlet_key$node_count
-  # 1-step ego networks
-  expected_counts_o1 <- rbind(
-    c(6, 5, 2, 0, 1, 0, 2, 1, 0) / zeros_to_ones(choose(5, k)),
-    c(5, 5, 1, 0, 2, 0, 2, 0, 0) / zeros_to_ones(choose(5, k)),
-    c(1, 0, 0, 0, 0, 0, 0, 0, 0) / zeros_to_ones(choose(2, k)),
-    c(5, 2, 2, 0, 0, 0, 0, 1, 0) / zeros_to_ones(choose(4, k)),
-    c(1, 0, 0, 0, 0, 0, 0, 0, 0) / zeros_to_ones(choose(2, k)),
-    c(4, 2, 1, 0, 0, 0, 1, 0, 0) / zeros_to_ones(choose(4, k)),
-    c(7, 3, 4, 0, 0, 0, 3, 0, 1) / zeros_to_ones(choose(5, k)),
-    c(7, 3, 4, 0, 0, 0, 3, 0, 1) / zeros_to_ones(choose(5, k)),
-    c(6, 0, 4, 0, 0, 0, 0, 0, 1) / zeros_to_ones(choose(4, k)),
-    c(6, 0, 4, 0, 0, 0, 0, 0, 1) / zeros_to_ones(choose(4, k))
-  )
-  rownames(expected_counts_o1) <- node_labels
-  colnames(expected_counts_o1) <- graphlet_labels
-  # 2-step ego networks
-  expected_counts_o2 <- rbind(
-    c(15, 18, 6, 21, 3, 1, 11, 1, 1) / zeros_to_ones(choose(10, k)),
-    c(8, 10, 2, 6, 3, 0, 4, 1, 0) / zeros_to_ones(choose(7, k)),
-    c(5, 5, 1, 0, 2, 0, 2, 0, 0) / zeros_to_ones(choose(5, k)),
-    c(10, 14, 2, 11, 3, 1, 5, 1, 0) / zeros_to_ones(choose(8, k)),
-    c(5, 5, 1, 0, 2, 0, 2, 0, 0) / zeros_to_ones(choose(5, k)),
-    c(13, 13, 6, 15, 1, 1, 9, 1, 1) / zeros_to_ones(choose(8, k)),
-    c(13, 13, 6, 15, 1, 1, 9, 1, 1) / zeros_to_ones(choose(8, k)),
-    c(11, 10, 5, 10, 0, 1, 8, 0, 1) / zeros_to_ones(choose(7, k)),
-    c(9, 8, 4, 4, 0, 1, 6, 0, 1) / zeros_to_ones(choose(6, k)),
-    c(9, 8, 4, 4, 0, 1, 6, 0, 1) / zeros_to_ones(choose(6, k))
-  )
-  rownames(expected_counts_o2) <- node_labels
-  colnames(expected_counts_o2) <- graphlet_labels
-
-  # Calculate binned average expected counts based on manually verified counts
-  # and density bins
-  # Order 1: Expected interval indexes: 1, 1, 3, 3, 3, 2, 2, 2, 3, 3
-  mean_counts_bin1_o1 <- (expected_counts_o1[1, ] + expected_counts_o1[2, ]) / 2
-  mean_counts_bin2_o1 <- (expected_counts_o1[6, ] + expected_counts_o1[7, ] +
-    expected_counts_o1[8, ]) / 3
-  mean_counts_bin3_o1 <- (expected_counts_o1[3, ] + expected_counts_o1[4, ] +
-    expected_counts_o1[5, ] + expected_counts_o1[9, ] +
-    expected_counts_o1[10, ]) / 5
-  expected_mean_density_binned_counts_o1 <- rbind(
-    mean_counts_bin1_o1, mean_counts_bin2_o1, mean_counts_bin3_o1
-  )
-  rownames(expected_mean_density_binned_counts_o1) <- 1:3
-  # Order 2: Expected interval indexes: 1, 3, 3, 1, 3, 2, 2, 4, 4, 4
-  mean_counts_bin1_o2 <- (expected_counts_o2[1, ] + expected_counts_o2[4, ]) / 2
-  mean_counts_bin2_o2 <- (expected_counts_o2[2, ] + expected_counts_o2[6, ] +
-    expected_counts_o2[7, ]) / 3
-  mean_counts_bin3_o2 <- (expected_counts_o2[3, ] + expected_counts_o2[5, ]) / 2
-  mean_counts_bin4_o2 <- (expected_counts_o2[8, ] + expected_counts_o2[9, ] +
-    expected_counts_o2[10, ]) / 3
-  expected_mean_density_binned_counts_o2 <- rbind(
-    mean_counts_bin1_o2, mean_counts_bin2_o2, mean_counts_bin3_o2,
-    mean_counts_bin4_o2
-  )
-  rownames(expected_mean_density_binned_counts_o2) <- 1:4
+context(
+  paste(
+    "Measures Netdis: Ego-network density-binned reference counts for manually",
+    "verified networks"
+  )
+)
+test_that(
+  paste(
+    "Ego-network 4-node density-binned reference counts match manually",
+    "verified totals"
+  ),
+  {
+    # Set up a small sample network with at least one ego-network that contains
+    # at least one of each graphlets
+    elist <- rbind(
+      c("n1", "n2"),
+      c("n2", "n3"),
+      c("n1", "n4"),
+      c("n2", "n5"),
+      c("n1", "n6"),
+      c("n1", "n7"),
+      c("n2", "n4"),
+      c("n4", "n6"),
+      c("n6", "n8"),
+      c("n7", "n8"),
+      c("n7", "n9"),
+      c("n7", "n10"),
+      c("n8", "n9"),
+      c("n8", "n10"),
+      c("n9", "n10")
+    )
+    graph <- igraph::graph_from_edgelist(elist, directed = FALSE)
+
+    # Set parameters for test
+    max_graphlet_size <- 4
+    min_counts_per_interval <- 2
+    num_intervals <- 100
+
+    # Set node and graphlet labels to use for row and col names in expected
+    # counts
+    node_labels <- igraph::V(graph)$name
+    graphlet_labels <- c("G0", "G1", "G2", "G3", "G4", "G5", "G6", "G7", "G8")
+
+    # Set manually verified ego-network node counts and edge densities
+    # 1 . Ego-networks of order 1
+    expected_node_counts_o1 <- c(5, 5, 2, 4, 2, 4, 5, 5, 4, 4)
+    expected_edge_counts_o1 <- c(6, 5, 1, 5, 1, 4, 7, 7, 6, 6)
+    max_edge_counts_o1 <- choose(expected_node_counts_o1, 2)
+    expected_densities_o1 <- c(expected_edge_counts_o1 / max_edge_counts_o1)
+    # Order 1 expected densities should be:
+    # 0.6, 0.5, 1.0, 0.83, 1.0, 0.67, 0.7, 0.7, 1.0, 1.0
+    # 2. Ego-networks of order 2
+    expected_node_counts_o2 <- c(10, 7, 5, 8, 5, 8, 8, 7, 6, 6)
+    expected_edge_counts_o2 <- c(15, 8, 5, 10, 5, 13, 13, 11, 9, 9)
+    max_edge_counts_o2 <- choose(expected_node_counts_o2, 2)
+    expected_densities_o2 <- c(expected_edge_counts_o2 / max_edge_counts_o2)
+    # Order 2 expected densities should be:
+    # 0.33, 0.38, 0.50, 0.36, 0.50, 0.46, 0.46, 0.52, 0.60, 0.60
+
+    # Set manually verified density bins for ego-networks
+    # 1. Ego-networks of order 1
+    expected_breaks_o1 <- c(0.5, 0.605, 0.705, 1)
+    expected_interval_indexes_o1 <- c(1, 1, 3, 3, 3, 2, 2, 2, 3, 3)
+    expected_binned_densities_o1 <- list(
+      densities = expected_densities_o1,
+      interval_indexes = expected_interval_indexes_o1,
+      breaks = expected_breaks_o1
+    )
+    # Check binned densities are as expected
+    actual_binned_densities_o1 <- binned_densities_adaptive(
+      expected_densities_o1,
+      min_counts_per_interval = min_counts_per_interval,
+      num_intervals = num_intervals
+    )
+    expect_equal(actual_binned_densities_o1, expected_binned_densities_o1)
+    # 2. Ego-networks of order 2
+    expected_min_break_o2 <- 1 / 3
+    expected_max_break_o2 <- 0.6
+    expected_initial_interval_o2 <-
+      (expected_max_break_o2 - expected_min_break_o2) / (num_intervals)
+    expected_breaks_o2 <- expected_min_break_o2 +
+      (expected_initial_interval_o2 * c(0, 9, 50, 63, 100))
+    expected_interval_indexes_o2 <- c(1, 2, 3, 1, 3, 2, 2, 4, 4, 4)
+    expected_binned_densities_o2 <- list(
+      densities = expected_densities_o2,
+      interval_indexes = expected_interval_indexes_o2,
+      breaks = expected_breaks_o2
+    )
+    # Check binned densities are as expected
+    actual_binned_densities_o2 <- binned_densities_adaptive(
+      expected_densities_o2,
+      min_counts_per_interval = min_counts_per_interval,
+      num_intervals = num_intervals
+    )
+    expect_equal(actual_binned_densities_o2, expected_binned_densities_o2)
+
+    # Set manually verified scaled ego-network graphlet counts
+    graphlet_key <- graphlet_key(max_graphlet_size)
+    k <- graphlet_key$node_count
+    # 1-step ego networks
+    expected_counts_o1 <- rbind(
+      c(6, 5, 2, 0, 1, 0, 2, 1, 0) / zeros_to_ones(choose(5, k)),
+      c(5, 5, 1, 0, 2, 0, 2, 0, 0) / zeros_to_ones(choose(5, k)),
+      c(1, 0, 0, 0, 0, 0, 0, 0, 0) / zeros_to_ones(choose(2, k)),
+      c(5, 2, 2, 0, 0, 0, 0, 1, 0) / zeros_to_ones(choose(4, k)),
+      c(1, 0, 0, 0, 0, 0, 0, 0, 0) / zeros_to_ones(choose(2, k)),
+      c(4, 2, 1, 0, 0, 0, 1, 0, 0) / zeros_to_ones(choose(4, k)),
+      c(7, 3, 4, 0, 0, 0, 3, 0, 1) / zeros_to_ones(choose(5, k)),
+      c(7, 3, 4, 0, 0, 0, 3, 0, 1) / zeros_to_ones(choose(5, k)),
+      c(6, 0, 4, 0, 0, 0, 0, 0, 1) / zeros_to_ones(choose(4, k)),
+      c(6, 0, 4, 0, 0, 0, 0, 0, 1) / zeros_to_ones(choose(4, k))
+    )
+    rownames(expected_counts_o1) <- node_labels
+    colnames(expected_counts_o1) <- graphlet_labels
+    # 2-step ego networks
+    expected_counts_o2 <- rbind(
+      c(15, 18, 6, 21, 3, 1, 11, 1, 1) / zeros_to_ones(choose(10, k)),
+      c(8, 10, 2, 6, 3, 0, 4, 1, 0) / zeros_to_ones(choose(7, k)),
+      c(5, 5, 1, 0, 2, 0, 2, 0, 0) / zeros_to_ones(choose(5, k)),
+      c(10, 14, 2, 11, 3, 1, 5, 1, 0) / zeros_to_ones(choose(8, k)),
+      c(5, 5, 1, 0, 2, 0, 2, 0, 0) / zeros_to_ones(choose(5, k)),
+      c(13, 13, 6, 15, 1, 1, 9, 1, 1) / zeros_to_ones(choose(8, k)),
+      c(13, 13, 6, 15, 1, 1, 9, 1, 1) / zeros_to_ones(choose(8, k)),
+      c(11, 10, 5, 10, 0, 1, 8, 0, 1) / zeros_to_ones(choose(7, k)),
+      c(9, 8, 4, 4, 0, 1, 6, 0, 1) / zeros_to_ones(choose(6, k)),
+      c(9, 8, 4, 4, 0, 1, 6, 0, 1) / zeros_to_ones(choose(6, k))
+    )
+    rownames(expected_counts_o2) <- node_labels
+    colnames(expected_counts_o2) <- graphlet_labels
+
+    # Calculate binned average expected counts based on manually verified counts
+    # and density bins
+    # Order 1: Expected interval indexes: 1, 1, 3, 3, 3, 2, 2, 2, 3, 3
+    mean_counts_bin1_o1 <- (expected_counts_o1[1, ] +
+      expected_counts_o1[2, ]) / 2
+    mean_counts_bin2_o1 <- (expected_counts_o1[6, ] + expected_counts_o1[7, ] +
+      expected_counts_o1[8, ]) / 3
+    mean_counts_bin3_o1 <- (expected_counts_o1[3, ] + expected_counts_o1[4, ] +
+      expected_counts_o1[5, ] + expected_counts_o1[9, ] +
+      expected_counts_o1[10, ]) / 5
+    expected_mean_density_binned_counts_o1 <- rbind(
+      mean_counts_bin1_o1, mean_counts_bin2_o1, mean_counts_bin3_o1
+    )
+    rownames(expected_mean_density_binned_counts_o1) <- 1:3
+    # Order 2: Expected interval indexes: 1, 3, 3, 1, 3, 2, 2, 4, 4, 4
+    mean_counts_bin1_o2 <- (expected_counts_o2[1, ] +
+      expected_counts_o2[4, ]) / 2
+    mean_counts_bin2_o2 <- (expected_counts_o2[2, ] + expected_counts_o2[6, ] +
+      expected_counts_o2[7, ]) / 3
+    mean_counts_bin3_o2 <- (expected_counts_o2[3, ] +
+      expected_counts_o2[5, ]) / 2
+    mean_counts_bin4_o2 <- (expected_counts_o2[8, ] +
+      expected_counts_o2[9, ] + expected_counts_o2[10, ]) / 3
+    expected_mean_density_binned_counts_o2 <- rbind(
+      mean_counts_bin1_o2, mean_counts_bin2_o2, mean_counts_bin3_o2,
+      mean_counts_bin4_o2
+    )
+    rownames(expected_mean_density_binned_counts_o2) <- 1:4
 
-  # Calculate actual output of function under test
-  actual_mean_density_binned_counts_o1 <- mean_density_binned_graphlet_counts(
-    expected_counts_o1, expected_interval_indexes_o1
-  )
-  actual_mean_density_binned_counts_o2 <- mean_density_binned_graphlet_counts(
-    expected_counts_o2, expected_interval_indexes_o2
-  )
+    # Calculate actual output of function under test
+    actual_mean_density_binned_counts_o1 <- mean_density_binned_graphlet_counts(
+      expected_counts_o1, expected_interval_indexes_o1
+    )
+    actual_mean_density_binned_counts_o2 <- mean_density_binned_graphlet_counts(
+      expected_counts_o2, expected_interval_indexes_o2
+    )
 
-  # Check actual output vs expected
-  expect_equal(
-    actual_mean_density_binned_counts_o1,
-    expected_mean_density_binned_counts_o1
-  )
-  expect_equal(
-    actual_mean_density_binned_counts_o2,
-    expected_mean_density_binned_counts_o2
-  )
-})
+    # Check actual output vs expected
+    expect_equal(
+      actual_mean_density_binned_counts_o1,
+      expected_mean_density_binned_counts_o1
+    )
+    expect_equal(
+      actual_mean_density_binned_counts_o2,
+      expected_mean_density_binned_counts_o2
+    )
+  }
+)
 
-context("Measures Netdis: scale_graphlet_counts_ego for manually verified networks")
+context(
+  "Measures Netdis: scale_graphlet_counts_ego for manually verified networks"
+)
 test_that("Ego-network 4-node graphlet counts match manually verified totals", {
   # Set up a small sample network with at least one ego-network that contains
   # at least one of each graphlets
@@ -391,13 +487,14 @@ test_that("Ego-network 4-node graphlet counts match manually verified totals", {
   node_labels <- igraph::V(graph)$name
   graphlet_labels <- c("G0", "G1", "G2", "G3", "G4", "G5", "G6", "G7", "G8")
 
-  # Count graphlets in each ego network of the graph with neighbourhood sizes of 1 and 2
+  # Count graphlets in each ego network of the graph with neighbourhood sizes of
+  # 1 and 2
   max_graphlet_size <- 4
   min_ego_edges <- 0
   min_ego_nodes <- 0
 
-  # Use previously tested functions to generate ego networks and calcualte graphlet
-  # counts.
+  # Use previously tested functions to generate ego networks and calcualte
+  # graphlet counts.
   # ego nets
   ego_networks_o1 <- make_named_ego_graph(graph,
     order = 1,
@@ -471,293 +568,328 @@ test_that("Ego-network 4-node graphlet counts match manually verified totals", {
   expect_equal(actual_counts_o2, expected_counts_o2)
 })
 
-context("Measures Netdis: Ego-network density-binned counts for manually verified networks")
-test_that("density_binned_counts output matches manually verified totals with different scaling and aggregation functions", {
-  # Set up a small sample network with at least one ego-network that contains
-  # at least one of each graphlets
-  elist <- rbind(
-    c("n1", "n2"),
-    c("n2", "n3"),
-    c("n1", "n4"),
-    c("n2", "n5"),
-    c("n1", "n6"),
-    c("n1", "n7"),
-    c("n2", "n4"),
-    c("n4", "n6"),
-    c("n6", "n8"),
-    c("n7", "n8"),
-    c("n7", "n9"),
-    c("n7", "n10"),
-    c("n8", "n9"),
-    c("n8", "n10"),
-    c("n9", "n10")
-  )
-  graph <- igraph::graph_from_edgelist(elist, directed = FALSE)
+context(
+  paste(
+    "Measures Netdis: Ego-network density-binned counts for manually",
+    "verified networks"
+  )
+)
+test_that(
+  paste(
+    "density_binned_counts output matches manually verified totals with",
+    "different scaling and aggregation functions"
+  ),
+  {
+    # Set up a small sample network with at least one ego-network that contains
+    # at least one of each graphlets
+    elist <- rbind(
+      c("n1", "n2"),
+      c("n2", "n3"),
+      c("n1", "n4"),
+      c("n2", "n5"),
+      c("n1", "n6"),
+      c("n1", "n7"),
+      c("n2", "n4"),
+      c("n4", "n6"),
+      c("n6", "n8"),
+      c("n7", "n8"),
+      c("n7", "n9"),
+      c("n7", "n10"),
+      c("n8", "n9"),
+      c("n8", "n10"),
+      c("n9", "n10")
+    )
+    graph <- igraph::graph_from_edgelist(elist, directed = FALSE)
+
+    # Set parameters for test
+    max_graphlet_size <- 4
+    min_counts_per_interval <- 2
+    num_intervals <- 100
+    min_ego_edges <- 0
+    min_ego_nodes <- 0
+
+    # Set node and graphlet labels to use for row and col names in
+    # expected counts
+    node_labels <- igraph::V(graph)$name
+    graphlet_labels <- c("G0", "G1", "G2", "G3", "G4", "G5", "G6", "G7", "G8")
+
+    # Set manually verified ego-network node counts and edge densities
+    # 1 . Ego-networks of order 1
+    expected_node_counts_o1 <- c(5, 5, 2, 4, 2, 4, 5, 5, 4, 4)
+    expected_edge_counts_o1 <- c(6, 5, 1, 5, 1, 4, 7, 7, 6, 6)
+    max_edge_counts_o1 <- choose(expected_node_counts_o1, 2)
+    expected_densities_o1 <- c(expected_edge_counts_o1 / max_edge_counts_o1)
+    # Order 1 expected densities should be:
+    # 0.6, 0.5, 1.0, 0.83, 1.0, 0.67, 0.7, 0.7, 1.0, 1.0
+    # 2. Ego-networks of order 2
+    expected_node_counts_o2 <- c(10, 7, 5, 8, 5, 8, 8, 7, 6, 6)
+    expected_edge_counts_o2 <- c(15, 8, 5, 10, 5, 13, 13, 11, 9, 9)
+    max_edge_counts_o2 <- choose(expected_node_counts_o2, 2)
+    expected_densities_o2 <- c(expected_edge_counts_o2 / max_edge_counts_o2)
+    # Order 2 expected densities should be:
+    # 0.33, 0.38, 0.50, 0.36, 0.50, 0.46, 0.46, 0.52, 0.60, 0.60
+
+    # Set manually verified density bins for ego-networks
+    # 1. Ego-networks of order 1
+    expected_breaks_o1 <- c(0.5, 0.605, 0.705, 1)
+    expected_interval_indexes_o1 <- c(1, 1, 3, 3, 3, 2, 2, 2, 3, 3)
+    expected_binned_densities_o1 <- list(
+      densities = expected_densities_o1,
+      interval_indexes = expected_interval_indexes_o1,
+      breaks = expected_breaks_o1
+    )
+    # 2. Ego-networks of order 2
+    expected_min_break_o2 <- 1 / 3
+    expected_max_break_o2 <- 0.6
+    expected_initial_interval_o2 <-
+      (expected_max_break_o2 - expected_min_break_o2) / (num_intervals)
+    expected_breaks_o2 <- expected_min_break_o2 +
+      (expected_initial_interval_o2 * c(0, 9, 50, 63, 100))
+    expected_interval_indexes_o2 <- c(1, 2, 3, 1, 3, 2, 2, 4, 4, 4)
+    expected_binned_densities_o2 <- list(
+      densities = expected_densities_o2,
+      interval_indexes = expected_interval_indexes_o2,
+      breaks = expected_breaks_o2
+    )
 
-  # Set parameters for test
-  max_graphlet_size <- 4
-  min_counts_per_interval <- 2
-  num_intervals <- 100
-  min_ego_edges <- 0
-  min_ego_nodes <- 0
+    # Set manually verified scaled ego-network graphlet counts
+    graphlet_key <- graphlet_key(max_graphlet_size)
+    k <- graphlet_key$node_count
+    # 1-step ego networks
+    expected_counts_o1 <- rbind(
+      c(6, 5, 2, 0, 1, 0, 2, 1, 0) / zeros_to_ones(choose(5, k)),
+      c(5, 5, 1, 0, 2, 0, 2, 0, 0) / zeros_to_ones(choose(5, k)),
+      c(1, 0, 0, 0, 0, 0, 0, 0, 0) / zeros_to_ones(choose(2, k)),
+      c(5, 2, 2, 0, 0, 0, 0, 1, 0) / zeros_to_ones(choose(4, k)),
+      c(1, 0, 0, 0, 0, 0, 0, 0, 0) / zeros_to_ones(choose(2, k)),
+      c(4, 2, 1, 0, 0, 0, 1, 0, 0) / zeros_to_ones(choose(4, k)),
+      c(7, 3, 4, 0, 0, 0, 3, 0, 1) / zeros_to_ones(choose(5, k)),
+      c(7, 3, 4, 0, 0, 0, 3, 0, 1) / zeros_to_ones(choose(5, k)),
+      c(6, 0, 4, 0, 0, 0, 0, 0, 1) / zeros_to_ones(choose(4, k)),
+      c(6, 0, 4, 0, 0, 0, 0, 0, 1) / zeros_to_ones(choose(4, k))
+    )
+    rownames(expected_counts_o1) <- node_labels
+    colnames(expected_counts_o1) <- graphlet_labels
+    # 2-step ego networks
+    expected_counts_o2 <- rbind(
+      c(15, 18, 6, 21, 3, 1, 11, 1, 1) / zeros_to_ones(choose(10, k)),
+      c(8, 10, 2, 6, 3, 0, 4, 1, 0) / zeros_to_ones(choose(7, k)),
+      c(5, 5, 1, 0, 2, 0, 2, 0, 0) / zeros_to_ones(choose(5, k)),
+      c(10, 14, 2, 11, 3, 1, 5, 1, 0) / zeros_to_ones(choose(8, k)),
+      c(5, 5, 1, 0, 2, 0, 2, 0, 0) / zeros_to_ones(choose(5, k)),
+      c(13, 13, 6, 15, 1, 1, 9, 1, 1) / zeros_to_ones(choose(8, k)),
+      c(13, 13, 6, 15, 1, 1, 9, 1, 1) / zeros_to_ones(choose(8, k)),
+      c(11, 10, 5, 10, 0, 1, 8, 0, 1) / zeros_to_ones(choose(7, k)),
+      c(9, 8, 4, 4, 0, 1, 6, 0, 1) / zeros_to_ones(choose(6, k)),
+      c(9, 8, 4, 4, 0, 1, 6, 0, 1) / zeros_to_ones(choose(6, k))
+    )
+    rownames(expected_counts_o2) <- node_labels
+    colnames(expected_counts_o2) <- graphlet_labels
+
+    # Calculate binned average expected counts based on manually verified counts
+    # and density bins
+    # Order 1: Expected interval indexes: 1, 1, 3, 3, 3, 2, 2, 2, 3, 3
+    mean_counts_bin1_o1 <- (expected_counts_o1[1, ] +
+      expected_counts_o1[2, ]) / 2
+    mean_counts_bin2_o1 <- (expected_counts_o1[6, ] + expected_counts_o1[7, ] +
+      expected_counts_o1[8, ]) / 3
+    mean_counts_bin3_o1 <- (expected_counts_o1[3, ] + expected_counts_o1[4, ] +
+      expected_counts_o1[5, ] + expected_counts_o1[9, ] +
+      expected_counts_o1[10, ]) / 5
+    expected_mean_density_binned_counts_o1 <- rbind(
+      mean_counts_bin1_o1, mean_counts_bin2_o1, mean_counts_bin3_o1
+    )
+    rownames(expected_mean_density_binned_counts_o1) <- 1:3
+    # Order 2: Expected interval indexes: 1, 3, 3, 1, 3, 2, 2, 4, 4, 4
+    mean_counts_bin1_o2 <- (expected_counts_o2[1, ] +
+      expected_counts_o2[4, ]) / 2
+    mean_counts_bin2_o2 <- (expected_counts_o2[2, ] + expected_counts_o2[6, ] +
+      expected_counts_o2[7, ]) / 3
+    mean_counts_bin3_o2 <- (expected_counts_o2[3, ] +
+      expected_counts_o2[5, ]) / 2
+    mean_counts_bin4_o2 <- (expected_counts_o2[8, ] + expected_counts_o2[9, ] +
+      expected_counts_o2[10, ]) / 3
+    expected_mean_density_binned_counts_o2 <- rbind(
+      mean_counts_bin1_o2, mean_counts_bin2_o2, mean_counts_bin3_o2,
+      mean_counts_bin4_o2
+    )
+    rownames(expected_mean_density_binned_counts_o2) <- 1:4
 
-  # Set node and graphlet labels to use for row and col names in expected counts
-  node_labels <- igraph::V(graph)$name
-  graphlet_labels <- c("G0", "G1", "G2", "G3", "G4", "G5", "G6", "G7", "G8")
+    # density_binned_counts with default arguments should give
+    # mean graphlet count in each density bin
+    actual_density_binned_counts_o1 <- density_binned_counts(
+      expected_counts_o1,
+      expected_interval_indexes_o1
+    )
 
-  # Set manually verified ego-network node counts and edge densities
-  # 1 . Ego-networks of order 1
-  expected_node_counts_o1 <- c(5, 5, 2, 4, 2, 4, 5, 5, 4, 4)
-  expected_edge_counts_o1 <- c(6, 5, 1, 5, 1, 4, 7, 7, 6, 6)
-  max_edge_counts_o1 <- choose(expected_node_counts_o1, 2)
-  expected_densities_o1 <- c(expected_edge_counts_o1 / max_edge_counts_o1)
-  # Order 1 expected densities should be:
-  # 0.6, 0.5, 1.0, 0.83, 1.0, 0.67, 0.7, 0.7, 1.0, 1.0
-  # 2. Ego-networks of order 2
-  expected_node_counts_o2 <- c(10, 7, 5, 8, 5, 8, 8, 7, 6, 6)
-  expected_edge_counts_o2 <- c(15, 8, 5, 10, 5, 13, 13, 11, 9, 9)
-  max_edge_counts_o2 <- choose(expected_node_counts_o2, 2)
-  expected_densities_o2 <- c(expected_edge_counts_o2 / max_edge_counts_o2)
-  # Order 2 expected densities should be:
-  # 0.33, 0.38, 0.50, 0.36, 0.50, 0.46, 0.46, 0.52, 0.60, 0.60
+    actual_density_binned_counts_o2 <- density_binned_counts(
+      expected_counts_o2,
+      expected_interval_indexes_o2
+    )
 
-  # Set manually verified density bins for ego-networks
-  # 1. Ego-networks of order 1
-  expected_breaks_o1 <- c(0.5, 0.605, 0.705, 1)
-  expected_interval_indexes_o1 <- c(1, 1, 3, 3, 3, 2, 2, 2, 3, 3)
-  expected_binned_densities_o1 <- list(
-    densities = expected_densities_o1,
-    interval_indexes = expected_interval_indexes_o1,
-    breaks = expected_breaks_o1
-  )
-  # 2. Ego-networks of order 2
-  expected_min_break_o2 <- 1 / 3
-  expected_max_break_o2 <- 0.6
-  expected_initial_interval_o2 <-
-    (expected_max_break_o2 - expected_min_break_o2) / (num_intervals) # 0.00266666667
-  expected_breaks_o2 <- expected_min_break_o2 + (expected_initial_interval_o2 * c(0, 9, 50, 63, 100))
-  expected_interval_indexes_o2 <- c(1, 2, 3, 1, 3, 2, 2, 4, 4, 4)
-  expected_binned_densities_o2 <- list(
-    densities = expected_densities_o2,
-    interval_indexes = expected_interval_indexes_o2,
-    breaks = expected_breaks_o2
-  )
-
-  # Set manually verified scaled ego-network graphlet counts
-  graphlet_key <- graphlet_key(max_graphlet_size)
-  k <- graphlet_key$node_count
-  # 1-step ego networks
-  expected_counts_o1 <- rbind(
-    c(6, 5, 2, 0, 1, 0, 2, 1, 0) / zeros_to_ones(choose(5, k)),
-    c(5, 5, 1, 0, 2, 0, 2, 0, 0) / zeros_to_ones(choose(5, k)),
-    c(1, 0, 0, 0, 0, 0, 0, 0, 0) / zeros_to_ones(choose(2, k)),
-    c(5, 2, 2, 0, 0, 0, 0, 1, 0) / zeros_to_ones(choose(4, k)),
-    c(1, 0, 0, 0, 0, 0, 0, 0, 0) / zeros_to_ones(choose(2, k)),
-    c(4, 2, 1, 0, 0, 0, 1, 0, 0) / zeros_to_ones(choose(4, k)),
-    c(7, 3, 4, 0, 0, 0, 3, 0, 1) / zeros_to_ones(choose(5, k)),
-    c(7, 3, 4, 0, 0, 0, 3, 0, 1) / zeros_to_ones(choose(5, k)),
-    c(6, 0, 4, 0, 0, 0, 0, 0, 1) / zeros_to_ones(choose(4, k)),
-    c(6, 0, 4, 0, 0, 0, 0, 0, 1) / zeros_to_ones(choose(4, k))
-  )
-  rownames(expected_counts_o1) <- node_labels
-  colnames(expected_counts_o1) <- graphlet_labels
-  # 2-step ego networks
-  expected_counts_o2 <- rbind(
-    c(15, 18, 6, 21, 3, 1, 11, 1, 1) / zeros_to_ones(choose(10, k)),
-    c(8, 10, 2, 6, 3, 0, 4, 1, 0) / zeros_to_ones(choose(7, k)),
-    c(5, 5, 1, 0, 2, 0, 2, 0, 0) / zeros_to_ones(choose(5, k)),
-    c(10, 14, 2, 11, 3, 1, 5, 1, 0) / zeros_to_ones(choose(8, k)),
-    c(5, 5, 1, 0, 2, 0, 2, 0, 0) / zeros_to_ones(choose(5, k)),
-    c(13, 13, 6, 15, 1, 1, 9, 1, 1) / zeros_to_ones(choose(8, k)),
-    c(13, 13, 6, 15, 1, 1, 9, 1, 1) / zeros_to_ones(choose(8, k)),
-    c(11, 10, 5, 10, 0, 1, 8, 0, 1) / zeros_to_ones(choose(7, k)),
-    c(9, 8, 4, 4, 0, 1, 6, 0, 1) / zeros_to_ones(choose(6, k)),
-    c(9, 8, 4, 4, 0, 1, 6, 0, 1) / zeros_to_ones(choose(6, k))
-  )
-  rownames(expected_counts_o2) <- node_labels
-  colnames(expected_counts_o2) <- graphlet_labels
+    # Check actual output vs expected
+    expect_equal(
+      actual_density_binned_counts_o1,
+      expected_mean_density_binned_counts_o1
+    )
+    expect_equal(
+      actual_density_binned_counts_o2,
+      expected_mean_density_binned_counts_o2
+    )
 
-  # Calculate binned average expected counts based on manually verified counts
-  # and density bins
-  # Order 1: Expected interval indexes: 1, 1, 3, 3, 3, 2, 2, 2, 3, 3
-  mean_counts_bin1_o1 <- (expected_counts_o1[1, ] + expected_counts_o1[2, ]) / 2
-  mean_counts_bin2_o1 <- (expected_counts_o1[6, ] + expected_counts_o1[7, ] +
-    expected_counts_o1[8, ]) / 3
-  mean_counts_bin3_o1 <- (expected_counts_o1[3, ] + expected_counts_o1[4, ] +
-    expected_counts_o1[5, ] + expected_counts_o1[9, ] +
-    expected_counts_o1[10, ]) / 5
-  expected_mean_density_binned_counts_o1 <- rbind(
-    mean_counts_bin1_o1, mean_counts_bin2_o1, mean_counts_bin3_o1
-  )
-  rownames(expected_mean_density_binned_counts_o1) <- 1:3
-  # Order 2: Expected interval indexes: 1, 3, 3, 1, 3, 2, 2, 4, 4, 4
-  mean_counts_bin1_o2 <- (expected_counts_o2[1, ] + expected_counts_o2[4, ]) / 2
-  mean_counts_bin2_o2 <- (expected_counts_o2[2, ] + expected_counts_o2[6, ] +
-    expected_counts_o2[7, ]) / 3
-  mean_counts_bin3_o2 <- (expected_counts_o2[3, ] + expected_counts_o2[5, ]) / 2
-  mean_counts_bin4_o2 <- (expected_counts_o2[8, ] + expected_counts_o2[9, ] +
-    expected_counts_o2[10, ]) / 3
-  expected_mean_density_binned_counts_o2 <- rbind(
-    mean_counts_bin1_o2, mean_counts_bin2_o2, mean_counts_bin3_o2,
-    mean_counts_bin4_o2
-  )
-  rownames(expected_mean_density_binned_counts_o2) <- 1:4
-
-  # density_binned_counts with default arguments should give
-  # mean graphlet count in each density bin
-  actual_density_binned_counts_o1 <- density_binned_counts(
-    expected_counts_o1,
-    expected_interval_indexes_o1
-  )
-
-  actual_density_binned_counts_o2 <- density_binned_counts(
-    expected_counts_o2,
-    expected_interval_indexes_o2
-  )
-
-  # Check actual output vs expected
-  expect_equal(
-    actual_density_binned_counts_o1,
-    expected_mean_density_binned_counts_o1
-  )
-  expect_equal(
-    actual_density_binned_counts_o2,
-    expected_mean_density_binned_counts_o2
-  )
-
-  # Calculate max binned counts based on manually verified counts
-  # and density bins
-  # Order 1: Expected interval indexes: 1, 1, 3, 3, 3, 2, 2, 2, 3, 3
-  # apply(x, 2, max): returns max of each column in x
-  max_counts_bin1_o1 <- apply(rbind(expected_counts_o1[1, ], expected_counts_o1[2, ]), 2, max)
-  max_counts_bin2_o1 <- apply(rbind(
-    expected_counts_o1[6, ], expected_counts_o1[7, ],
-    expected_counts_o1[8, ]
-  ), 2, max)
-  max_counts_bin3_o1 <- apply(rbind(
-    expected_counts_o1[3, ], expected_counts_o1[4, ],
-    expected_counts_o1[5, ], expected_counts_o1[9, ],
-    expected_counts_o1[10, ]
-  ), 2, max)
-
-  expected_max_density_binned_counts_o1 <- rbind(
-    max_counts_bin1_o1, max_counts_bin2_o1, max_counts_bin3_o1
-  )
-  rownames(expected_max_density_binned_counts_o1) <- 1:3
-  # Order 2: Expected interval indexes: 1, 3, 3, 1, 3, 2, 2, 4, 4, 4
-  max_counts_bin1_o2 <- apply(rbind(expected_counts_o2[1, ], expected_counts_o2[4, ]), 2, max)
-  max_counts_bin2_o2 <- apply(rbind(
-    expected_counts_o2[2, ], expected_counts_o2[6, ],
-    expected_counts_o2[7, ]
-  ), 2, max)
-  max_counts_bin3_o2 <- apply(rbind(expected_counts_o2[3, ], expected_counts_o2[5, ]), 2, max)
-  max_counts_bin4_o2 <- apply(rbind(
-    expected_counts_o2[8, ], expected_counts_o2[9, ],
-    expected_counts_o2[10, ]
-  ), 2, max)
-
-  expected_max_density_binned_counts_o2 <- rbind(
-    max_counts_bin1_o2, max_counts_bin2_o2, max_counts_bin3_o2,
-    max_counts_bin4_o2
-  )
-  rownames(expected_max_density_binned_counts_o2) <- 1:4
-
-  # density_binned_counts with agg_fn = max should give
-  # max graphlet count in each density bin
-  agg_fn <- max
-  scale_fn <- NULL
-
-  actual_max_density_binned_counts_o1 <- density_binned_counts(
-    expected_counts_o1,
-    expected_interval_indexes_o1,
-    agg_fn = agg_fn,
-    scale_fn = scale_fn
-  )
-
-  actual_max_density_binned_counts_o2 <- density_binned_counts(
-    expected_counts_o2,
-    expected_interval_indexes_o2,
-    agg_fn = agg_fn,
-    scale_fn = scale_fn
-  )
-
-  # Check actual output vs expected
-  expect_equal(
-    actual_max_density_binned_counts_o1,
-    expected_max_density_binned_counts_o1
-  )
-  expect_equal(
-    actual_max_density_binned_counts_o2,
-    expected_max_density_binned_counts_o2
-  )
+    # Calculate max binned counts based on manually verified counts
+    # and density bins
+    # Order 1: Expected interval indexes: 1, 1, 3, 3, 3, 2, 2, 2, 3, 3
+    # apply(x, 2, max): returns max of each column in x
+    max_counts_bin1_o1 <- apply(
+      rbind(expected_counts_o1[1, ], expected_counts_o1[2, ]), 2, max
+    )
+    max_counts_bin2_o1 <- apply(rbind(
+      expected_counts_o1[6, ], expected_counts_o1[7, ],
+      expected_counts_o1[8, ]
+    ), 2, max)
+    max_counts_bin3_o1 <- apply(rbind(
+      expected_counts_o1[3, ], expected_counts_o1[4, ],
+      expected_counts_o1[5, ], expected_counts_o1[9, ],
+      expected_counts_o1[10, ]
+    ), 2, max)
+
+    expected_max_density_binned_counts_o1 <- rbind(
+      max_counts_bin1_o1, max_counts_bin2_o1, max_counts_bin3_o1
+    )
+    rownames(expected_max_density_binned_counts_o1) <- 1:3
+    # Order 2: Expected interval indexes: 1, 3, 3, 1, 3, 2, 2, 4, 4, 4
+    max_counts_bin1_o2 <- apply(
+      rbind(expected_counts_o2[1, ], expected_counts_o2[4, ]), 2, max
+    )
+    max_counts_bin2_o2 <- apply(rbind(
+      expected_counts_o2[2, ], expected_counts_o2[6, ],
+      expected_counts_o2[7, ]
+    ), 2, max)
+    max_counts_bin3_o2 <- apply(
+      rbind(expected_counts_o2[3, ], expected_counts_o2[5, ]), 2, max
+    )
+    max_counts_bin4_o2 <- apply(rbind(
+      expected_counts_o2[8, ], expected_counts_o2[9, ],
+      expected_counts_o2[10, ]
+    ), 2, max)
+
+    expected_max_density_binned_counts_o2 <- rbind(
+      max_counts_bin1_o2, max_counts_bin2_o2, max_counts_bin3_o2,
+      max_counts_bin4_o2
+    )
+    rownames(expected_max_density_binned_counts_o2) <- 1:4
+
+    # density_binned_counts with agg_fn = max should give
+    # max graphlet count in each density bin
+    agg_fn <- max
+    scale_fn <- NULL
+
+    actual_max_density_binned_counts_o1 <- density_binned_counts(
+      expected_counts_o1,
+      expected_interval_indexes_o1,
+      agg_fn = agg_fn,
+      scale_fn = scale_fn
+    )
 
-  # density_binned_counts with scale_fn = scale_graphlet_counts_ego
-  # should give mean graphlet counts in each density bin scaled by
-  # count_graphlet_tuples.
-  agg_fn <- mean
-  scale_fn <- scale_graphlet_counts_ego
+    actual_max_density_binned_counts_o2 <- density_binned_counts(
+      expected_counts_o2,
+      expected_interval_indexes_o2,
+      agg_fn = agg_fn,
+      scale_fn = scale_fn
+    )
 
-  # calculate expected counts using previously tested function
-  expected_scaled_counts_o1 <-
-    scale_graphlet_counts_ego(expected_counts_o1,
-      max_graphlet_size = max_graphlet_size
+    # Check actual output vs expected
+    expect_equal(
+      actual_max_density_binned_counts_o1,
+      expected_max_density_binned_counts_o1
     )
-  expected_scaled_counts_o2 <-
-    scale_graphlet_counts_ego(expected_counts_o2,
-      max_graphlet_size = max_graphlet_size
+    expect_equal(
+      actual_max_density_binned_counts_o2,
+      expected_max_density_binned_counts_o2
     )
 
-  # calculate mean expected counts using expected density bins
-  mean_scaled_counts_bin1_o1 <- (expected_scaled_counts_o1[1, ] + expected_scaled_counts_o1[2, ]) / 2
-  mean_scaled_counts_bin2_o1 <- (expected_scaled_counts_o1[6, ] + expected_scaled_counts_o1[7, ] +
-    expected_scaled_counts_o1[8, ]) / 3
-  mean_scaled_counts_bin3_o1 <- (expected_scaled_counts_o1[3, ] + expected_scaled_counts_o1[4, ] +
-    expected_scaled_counts_o1[5, ] + expected_scaled_counts_o1[9, ] +
-    expected_scaled_counts_o1[10, ]) / 5
-  expected_scaled_density_binned_counts_o1 <- rbind(
-    mean_scaled_counts_bin1_o1, mean_scaled_counts_bin2_o1, mean_scaled_counts_bin3_o1
-  )
-  rownames(expected_scaled_density_binned_counts_o1) <- 1:3
-  # Order 2: Expected interval indexes: 1, 3, 3, 1, 3, 2, 2, 4, 4, 4
-  mean_scaled_counts_bin1_o2 <- (expected_scaled_counts_o2[1, ] + expected_scaled_counts_o2[4, ]) / 2
-  mean_scaled_counts_bin2_o2 <- (expected_scaled_counts_o2[2, ] + expected_scaled_counts_o2[6, ] +
-    expected_scaled_counts_o2[7, ]) / 3
-  mean_scaled_counts_bin3_o2 <- (expected_scaled_counts_o2[3, ] + expected_scaled_counts_o2[5, ]) / 2
-  mean_scaled_counts_bin4_o2 <- (expected_scaled_counts_o2[8, ] + expected_scaled_counts_o2[9, ] +
-    expected_scaled_counts_o2[10, ]) / 3
-  expected_scaled_density_binned_counts_o2 <- rbind(
-    mean_scaled_counts_bin1_o2, mean_scaled_counts_bin2_o2, mean_scaled_counts_bin3_o2,
-    mean_scaled_counts_bin4_o2
-  )
-  rownames(expected_scaled_density_binned_counts_o2) <- 1:4
-
-  # Calculate scaled binned counts with density_binned_counts (function to test)
-  actual_scaled_density_binned_counts_o1 <- density_binned_counts(
-    expected_counts_o1,
-    expected_interval_indexes_o1,
-    agg_fn = agg_fn,
-    scale_fn = scale_fn,
-    max_graphlet_size = max_graphlet_size
-  )
+    # density_binned_counts with scale_fn = scale_graphlet_counts_ego
+    # should give mean graphlet counts in each density bin scaled by
+    # count_graphlet_tuples.
+    agg_fn <- mean
+    scale_fn <- scale_graphlet_counts_ego
+
+    # calculate expected counts using previously tested function
+    expected_scaled_counts_o1 <-
+      scale_graphlet_counts_ego(expected_counts_o1,
+        max_graphlet_size = max_graphlet_size
+      )
+    expected_scaled_counts_o2 <-
+      scale_graphlet_counts_ego(expected_counts_o2,
+        max_graphlet_size = max_graphlet_size
+      )
+
+    # calculate mean expected counts using expected density bins
+    mean_scaled_counts_bin1_o1 <- (expected_scaled_counts_o1[1, ] +
+      expected_scaled_counts_o1[2, ]) / 2
+    mean_scaled_counts_bin2_o1 <- (expected_scaled_counts_o1[6, ] +
+      expected_scaled_counts_o1[7, ] +
+      expected_scaled_counts_o1[8, ]) / 3
+    mean_scaled_counts_bin3_o1 <- (expected_scaled_counts_o1[3, ] +
+      expected_scaled_counts_o1[4, ] +
+      expected_scaled_counts_o1[5, ] +
+      expected_scaled_counts_o1[9, ] +
+      expected_scaled_counts_o1[10, ]) / 5
+    expected_scaled_density_binned_counts_o1 <- rbind(
+      mean_scaled_counts_bin1_o1,
+      mean_scaled_counts_bin2_o1,
+      mean_scaled_counts_bin3_o1
+    )
+    rownames(expected_scaled_density_binned_counts_o1) <- 1:3
+    # Order 2: Expected interval indexes: 1, 3, 3, 1, 3, 2, 2, 4, 4, 4
+    mean_scaled_counts_bin1_o2 <- (expected_scaled_counts_o2[1, ] +
+      expected_scaled_counts_o2[4, ]) / 2
+    mean_scaled_counts_bin2_o2 <- (expected_scaled_counts_o2[2, ] +
+      expected_scaled_counts_o2[6, ] +
+      expected_scaled_counts_o2[7, ]) / 3
+    mean_scaled_counts_bin3_o2 <- (expected_scaled_counts_o2[3, ] +
+      expected_scaled_counts_o2[5, ]) / 2
+    mean_scaled_counts_bin4_o2 <- (expected_scaled_counts_o2[8, ] +
+      expected_scaled_counts_o2[9, ] +
+      expected_scaled_counts_o2[10, ]) / 3
+    expected_scaled_density_binned_counts_o2 <- rbind(
+      mean_scaled_counts_bin1_o2,
+      mean_scaled_counts_bin2_o2,
+      mean_scaled_counts_bin3_o2,
+      mean_scaled_counts_bin4_o2
+    )
+    rownames(expected_scaled_density_binned_counts_o2) <- 1:4
+
+    # Calculate scaled binned counts with density_binned_counts (function to
+    # test)
+    actual_scaled_density_binned_counts_o1 <- density_binned_counts(
+      expected_counts_o1,
+      expected_interval_indexes_o1,
+      agg_fn = agg_fn,
+      scale_fn = scale_fn,
+      max_graphlet_size = max_graphlet_size
+    )
 
-  actual_scaled_density_binned_counts_o2 <- density_binned_counts(
-    expected_counts_o2,
-    expected_interval_indexes_o2,
-    agg_fn = agg_fn,
-    scale_fn = scale_fn,
-    max_graphlet_size = max_graphlet_size
-  )
+    actual_scaled_density_binned_counts_o2 <- density_binned_counts(
+      expected_counts_o2,
+      expected_interval_indexes_o2,
+      agg_fn = agg_fn,
+      scale_fn = scale_fn,
+      max_graphlet_size = max_graphlet_size
+    )
 
-  # Check actual output vs expected
-  expect_equal(
-    actual_scaled_density_binned_counts_o1,
-    expected_scaled_density_binned_counts_o1
-  )
-  expect_equal(
-    actual_scaled_density_binned_counts_o2,
-    expected_scaled_density_binned_counts_o2
-  )
-})
+    # Check actual output vs expected
+    expect_equal(
+      actual_scaled_density_binned_counts_o1,
+      expected_scaled_density_binned_counts_o1
+    )
+    expect_equal(
+      actual_scaled_density_binned_counts_o2,
+      expected_scaled_density_binned_counts_o2
+    )
+  }
+)
 
 context("Measures Netdis: Expected graphlet counts")
 test_that("netdis_expected_counts_ego works for graphlets up to 4 nodes", {
@@ -834,7 +966,11 @@ test_that("netdis_expected_counts_ego works for graphlets up to 4 nodes", {
   }
   # Determine expected and actual expected graphlet counts
   expected_expected_graphlet_counts <-
-    purrr::map2(density_indexes, num_nodes, expected_expected_graphlet_counts_fn)
+    purrr::map2(
+      density_indexes,
+      num_nodes,
+      expected_expected_graphlet_counts_fn
+    )
   actual_expected_graphlet_counts <-
     purrr::map(graphlet_counts, netdis_expected_counts_ego,
       max_graphlet_size = max_graphlet_size,
@@ -899,13 +1035,17 @@ test_that("netdis_expected_counts works for graphlets up to 4 nodes", {
   num_edges_o1 <- c(6, 5, 1, 5, 1, 4, 7, 7, 6, 6)
   max_edges_o1 <- choose(num_nodes_o1, 2)
   densities_o1 <- num_edges_o1 / max_edges_o1
-  # Order 1 densities should be: 0.6000000 0.5000000 1.0000000 0.8333333 1.0000000 0.6666667 0.7000000 0.7000000 1.0000000 1.0000000
+  # Order 1 densities should be:
+  # 0.6000000 0.5000000 1.0000000 0.8333333 1.0000000
+  # 0.6666667 0.7000000 0.7000000 1.0000000 1.0000000
   # 2. Ego-networks of order 2
   num_nodes_o2 <- c(10, 7, 5, 8, 5, 8, 8, 7, 6, 6)
   num_edges_o2 <- c(15, 8, 5, 10, 5, 13, 13, 11, 9, 9)
   max_edges_o2 <- choose(num_nodes_o2, 2)
   densities_o2 <- num_edges_o2 / max_edges_o2
-  # Order 2 densities should be: 0.3333333 0.3809524 0.5000000 0.3571429 0.5000000 0.4642857 0.4642857 0.5238095 0.6000000 0.6000000
+  # Order 2 densities should be:
+  # 0.3333333 0.3809524 0.5000000 0.3571429 0.5000000
+  # 0.4642857 0.4642857 0.5238095 0.6000000 0.6000000
   # Set manually defined density breaks and indexes
   breaks <- c(0, 0.11, 0.21, 0.31, 0.41, 0.51, 0.61, 0.71, 0.81, 0.91, 1.0)
   density_indexes_o1 <- c(6, 5, 10, 9, 10, 7, 7, 7, 10, 10)
@@ -952,8 +1092,10 @@ test_that("netdis_expected_counts works for graphlets up to 4 nodes", {
   colnames(expected_expected_graphlet_counts_ego_o1) <- graphlet_labels
   colnames(expected_expected_graphlet_counts_ego_o2) <- graphlet_labels
   # Set row labels to ego network names
-  rownames(expected_expected_graphlet_counts_ego_o1) <- rownames(graphlet_counts_ego_o1)
-  rownames(expected_expected_graphlet_counts_ego_o2) <- rownames(graphlet_counts_ego_o1)
+  rownames(expected_expected_graphlet_counts_ego_o1) <-
+    rownames(graphlet_counts_ego_o1)
+  rownames(expected_expected_graphlet_counts_ego_o2) <-
+    rownames(graphlet_counts_ego_o1)
 
   # Calculate actual output of function under test
   actual_expected_graphlet_counts_ego_o1 <-
@@ -1006,8 +1148,10 @@ test_that("netdis_expected_counts works for graphlets up to 4 nodes", {
   colnames(expected_expected_graphlet_counts_ego_o1) <- graphlet_labels
   colnames(expected_expected_graphlet_counts_ego_o2) <- graphlet_labels
   # Set row labels to ego network names
-  rownames(expected_expected_graphlet_counts_ego_o1) <- rownames(graphlet_counts_ego_o1)
-  rownames(expected_expected_graphlet_counts_ego_o2) <- rownames(graphlet_counts_ego_o2)
+  rownames(expected_expected_graphlet_counts_ego_o1) <-
+    rownames(graphlet_counts_ego_o1)
+  rownames(expected_expected_graphlet_counts_ego_o2) <-
+    rownames(graphlet_counts_ego_o2)
 
   # Calculate actual output of function under test
   actual_expected_graphlet_counts_ego_o1 <-
@@ -1069,31 +1213,34 @@ test_that("netdis statistic function output matches manually verified result", {
   expect_equal(expected_netdis_4, actual_netdis_4)
   expect_equal(expected_netdis_5, actual_netdis_5)
 })
-test_that("netdis_uptok gives expected netdis result for graphlets up to size k", {
-  # arbitrary counts of correct size for graphlets up to size 5
-  counts_1 <- c(
-    11, 11, 13, 9, 12, 10, 14, 9, 13, 10, 10, 7, 9, 12, 6, 12, 9, 12,
-    9, 7, 15, 7, 5, 12, 16, 10, 10, 8, 9, 14
-  )
-  counts_2 <- c(
-    12, 11, 6, 10, 15, 7, 10, 8, 7, 7, 7, 13, 9, 14, 7, 12,
-    7, 10, 9, 11, 7, 7, 11, 8, 10, 14, 8, 16, 14, 10
-  )
+test_that(
+  "netdis_uptok gives expected netdis result for graphlets up to size k",
+  {
+    # arbitrary counts of correct size for graphlets up to size 5
+    counts_1 <- c(
+      11, 11, 13, 9, 12, 10, 14, 9, 13, 10, 10, 7, 9, 12, 6, 12, 9, 12,
+      9, 7, 15, 7, 5, 12, 16, 10, 10, 8, 9, 14
+    )
+    counts_2 <- c(
+      12, 11, 6, 10, 15, 7, 10, 8, 7, 7, 7, 13, 9, 14, 7, 12,
+      7, 10, 9, 11, 7, 7, 11, 8, 10, 14, 8, 16, 14, 10
+    )
 
-  # add graphlet names
-  ids <- graphlet_key(5)$id
-  names(counts_1) <- ids
-  names(counts_2) <- ids
+    # add graphlet names
+    ids <- graphlet_key(5)$id
+    names(counts_1) <- ids
+    names(counts_2) <- ids
 
-  # manually verified results
-  expected_netdis <- c(0.03418796, 0.02091792, 0.03826385)
-  names(expected_netdis) <- c("netdis3", "netdis4", "netdis5")
+    # manually verified results
+    expected_netdis <- c(0.03418796, 0.02091792, 0.03826385)
+    names(expected_netdis) <- c("netdis3", "netdis4", "netdis5")
 
-  # check function to test
-  actual_netdis <- netdis_uptok(counts_1, counts_2, 5)
+    # check function to test
+    actual_netdis <- netdis_uptok(counts_1, counts_2, 5)
 
-  expect_equal(expected_netdis, actual_netdis)
-})
+    expect_equal(expected_netdis, actual_netdis)
+  }
+)
 
 context("Netdis: full calculation pipeline")
 test_that("netdis_many_to_many gives expected result", {
diff --git a/tests/testthat/test_measures_net_emd.R b/tests/testthat/test_measures_net_emd.R
index d7aeb68c..bf6bfef9 100644
--- a/tests/testthat/test_measures_net_emd.R
+++ b/tests/testthat/test_measures_net_emd.R
@@ -1,6 +1,10 @@
 
 self_net_emd <- function(histogram, shift, method) {
-  netemd_one_to_one(dhists_1 = histogram, dhists_2 = shift_dhist(histogram, shift), method = method)
+  netemd_one_to_one(
+    dhists_1 = histogram,
+    dhists_2 = shift_dhist(histogram, shift),
+    method = method
+  )
 }
 expected <- 0
 
@@ -89,38 +93,46 @@ expect_self_netemd_correct(histogram,
   return_details = TRUE
 )
 
-test_that("net_emd returns 0 when comparing any normal histogram against itself (no offset)", {
-  num_hists <- 5
-  num_bins <- 101
+test_that(
+  paste(
+    "net_emd returns 0 when comparing any normal histogram against itself",
+    "(no offset)"
+  ),
+  {
+    num_hists <- 5
+    num_bins <- 101
 
-  mus <- runif(num_hists, -10, 10)
-  sigmas <- runif(num_hists, 0, 10)
+    mus <- runif(num_hists, -10, 10)
+    sigmas <- runif(num_hists, 0, 10)
 
-  rand_locations <- function(mu, sigma) {
-    return(seq(mu - 5 * sigma, mu + 5 * sigma, length.out = num_bins))
-  }
+    rand_locations <- function(mu, sigma) {
+      return(seq(mu - 5 * sigma, mu + 5 * sigma, length.out = num_bins))
+    }
 
-  rand_dhists <- purrr::map2(mus, sigmas, function(mu, sigma) {
-    locations <- rand_locations(mu, sigma)
-    masses <- dnorm(locations, mean = mu, sd = sigma)
-    return(dhist(masses = masses, locations = locations))
-  })
+    rand_dhists <- purrr::map2(mus, sigmas, function(mu, sigma) {
+      locations <- rand_locations(mu, sigma)
+      masses <- dnorm(locations, mean = mu, sd = sigma)
+      return(dhist(masses = masses, locations = locations))
+    })
 
-  expected <- 0
-  actuals_opt <- purrr::map(rand_dhists, function(dhist) {
-    netemd_one_to_one(dhists_1 = dhist, dhists_2 = dhist, method = "optimise")
-  })
-  purrr::walk(actuals_opt, function(actual) {
-    expect_equal(actual, expected)
-  })
+    expected <- 0
+    actuals_opt <- purrr::map(rand_dhists, function(dhist) {
+      netemd_one_to_one(dhists_1 = dhist, dhists_2 = dhist, method = "optimise")
+    })
+    purrr::walk(actuals_opt, function(actual) {
+      expect_equal(actual, expected)
+    })
 
-  actuals_exhaustive_default <- purrr::map(rand_dhists, function(dhist) {
-    netemd_one_to_one(dhists_1 = dhist, dhists_2 = dhist, method = "exhaustive")
-  })
-  purrr::walk(actuals_exhaustive_default, function(actual) {
-    expect_equal(actual, expected)
-  })
-})
+    actuals_exhaustive_default <- purrr::map(rand_dhists, function(dhist) {
+      netemd_one_to_one(
+        dhists_1 = dhist, dhists_2 = dhist, method = "exhaustive"
+      )
+    })
+    purrr::walk(actuals_exhaustive_default, function(actual) {
+      expect_equal(actual, expected)
+    })
+  }
+)
 
 test_that("net_emd returns 0 when comparing any normal histogram randomly offset
           against itself", {
@@ -146,7 +158,11 @@ test_that("net_emd returns 0 when comparing any normal histogram randomly offset
 
   netemd_offset_self <- function(dhist, offsets, method) {
     netemds <- purrr::map_dbl(offsets, function(offset) {
-      netemd_one_to_one(dhists_1 = dhist, dhists_2 = shift_dhist(dhist, offset), method = method)
+      netemd_one_to_one(
+        dhists_1 = dhist,
+        dhists_2 = shift_dhist(dhist, offset),
+        method = method
+      )
     })
     return(netemds)
   }
@@ -200,7 +216,12 @@ test_that("net_emd returns min_emd = 0 and min_offset = 0 when comparing any
 
   expect_self_netemd_correct <-
     function(histogram, shift, method, return_details = FALSE) {
-      self_net_emd <- netemd_one_to_one(dhists_1 = histogram, dhists_2 = shift_dhist(histogram, shift), method = method, return_details = return_details)
+      self_net_emd <- netemd_one_to_one(
+        dhists_1 = histogram,
+        dhists_2 = shift_dhist(histogram, shift),
+        method = method,
+        return_details = return_details
+      )
       loc <- histogram$locations
       mass <- histogram$masses
       var <- sum(loc * loc * mass) / sum(mass) - (sum(loc * mass) / sum(mass))^2
@@ -228,31 +249,56 @@ test_that("net_emd returns min_emd = 0 and min_offset = 0 when comparing any
   })
 })
 
-test_that("net_emd returns analytically derived non-zero solutions for distributions
-          where the analytical solution is known", {
-  # Helper functions to create dhists for a given value of "p"
-  two_bin_dhist <- function(p) {
-    dhist(locations = c(0, 1), masses = c(p, 1 - p))
-  }
-  three_bin_dhist <- function(p) {
-    dhist(locations = c(-1, 0, 1), masses = c(0.5 * p * (1 - p), 1 - (p * (1 - p)), 0.5 * p * (1 - p)))
-  }
+test_that(
+  paste(
+    "net_emd returns analytically derived non-zero solutions for",
+    "distributions where the analytical solution is known"
+  ),
+  {
+    # Helper functions to create dhists for a given value of "p"
+    two_bin_dhist <- function(p) {
+      dhist(locations = c(0, 1), masses = c(p, 1 - p))
+    }
+    three_bin_dhist <- function(p) {
+      dhist(
+        locations = c(-1, 0, 1),
+        masses = c(0.5 * p * (1 - p), 1 - (p * (1 - p)), 0.5 * p * (1 - p))
+      )
+    }
 
-  # Helper function to test actual vs expected
-  test_pair <- function(p, expected) {
-    dhistA <- two_bin_dhist(p)
-    dhistB <- three_bin_dhist(p)
-    expect_equal(netemd_one_to_one(dhists_1 = dhistA, dhists_2 = dhistB, method = "exhaustive"), expected, tolerance = 1e-12)
-    # Even setting the stats::optimise method tolerance to machine double precision, the
-    # optimised NetEMD is ~1e-09, so set a slightly looser tolerance here
-    expect_equal(netemd_one_to_one(dhists_1 = dhistA, dhists_2 = dhistB, method = "optimise"), expected, tolerance = 1e-08)
-  }
+    # Helper function to test actual vs expected
+    test_pair <- function(p, expected) {
+      dhistA <- two_bin_dhist(p)
+      dhistB <- three_bin_dhist(p)
+      expect_equal(
+        netemd_one_to_one(
+          dhists_1 = dhistA,
+          dhists_2 = dhistB,
+          method = "exhaustive"
+        ),
+        expected,
+        tolerance = 1e-12
+      )
+      # Even setting the stats::optimise method tolerance to machine double
+      # precision, the optimised NetEMD is ~1e-09, so set a slightly looser
+      # tolerance here
+      expect_equal(
+        netemd_one_to_one(
+          dhists_1 = dhistA,
+          dhists_2 = dhistB,
+          method = "optimise"
+        ),
+        expected,
+        tolerance = 1e-08
+      )
+    }
 
-  # Test for p values with analytically calculated NetEMD
-  test_pair(1 / 2, 1)
-  test_pair(1 / 3, 1 / sqrt(2))
-  test_pair(1 / 5, 1 / 2)
-})
+    # Test for p values with analytically calculated NetEMD
+    test_pair(1 / 2, 1)
+    test_pair(1 / 3, 1 / sqrt(2))
+    test_pair(1 / 5, 1 / 2)
+  }
+)
 
 context("Measures NetEMD: Virus PPI (EMD)")
 # EMD and NET_EMD: Virus PPI datasets
@@ -405,11 +451,16 @@ test_that("netemd_many_to_many works", {
   expected_netemd_fn <- function(gdds) {
     list(
       netemds = c(
-        netemd_one_to_one(dhists_1 = gdds$EBV, dhists_2 = gdds$ECL), netemd_one_to_one(dhists_1 = gdds$EBV, dhists_2 = gdds$HSV),
-        netemd_one_to_one(dhists_1 = gdds$EBV, dhists_2 = gdds$KSHV), netemd_one_to_one(dhists_1 = gdds$EBV, dhists_2 = gdds$VZV),
-        netemd_one_to_one(dhists_1 = gdds$ECL, dhists_2 = gdds$HSV), netemd_one_to_one(dhists_1 = gdds$ECL, dhists_2 = gdds$KSHV),
-        netemd_one_to_one(dhists_1 = gdds$ECL, dhists_2 = gdds$VZV), netemd_one_to_one(dhists_1 = gdds$HSV, dhists_2 = gdds$KSHV),
-        netemd_one_to_one(dhists_1 = gdds$HSV, dhists_2 = gdds$VZV), netemd_one_to_one(dhists_1 = gdds$KSHV, dhists_2 = gdds$VZV)
+        netemd_one_to_one(dhists_1 = gdds$EBV, dhists_2 = gdds$ECL),
+        netemd_one_to_one(dhists_1 = gdds$EBV, dhists_2 = gdds$HSV),
+        netemd_one_to_one(dhists_1 = gdds$EBV, dhists_2 = gdds$KSHV),
+        netemd_one_to_one(dhists_1 = gdds$EBV, dhists_2 = gdds$VZV),
+        netemd_one_to_one(dhists_1 = gdds$ECL, dhists_2 = gdds$HSV),
+        netemd_one_to_one(dhists_1 = gdds$ECL, dhists_2 = gdds$KSHV),
+        netemd_one_to_one(dhists_1 = gdds$ECL, dhists_2 = gdds$VZV),
+        netemd_one_to_one(dhists_1 = gdds$HSV, dhists_2 = gdds$KSHV),
+        netemd_one_to_one(dhists_1 = gdds$HSV, dhists_2 = gdds$VZV),
+        netemd_one_to_one(dhists_1 = gdds$KSHV, dhists_2 = gdds$VZV)
       ),
       comp_spec = cross_comparison_spec(gdds)
     )
diff --git a/tests/testthat/test_orca_interface.R b/tests/testthat/test_orca_interface.R
index 5c48dbc4..070ef949 100644
--- a/tests/testthat/test_orca_interface.R
+++ b/tests/testthat/test_orca_interface.R
@@ -1,48 +1,60 @@
 context("ORCA interface: Graph to ORCA edgelist round-trip")
 test_that("Graph to indexed edge list round trip conversion works", {
   data_dir <- system.file(file.path("extdata", "VRPINS"), package = "netdist")
-  g_orig <- igraph::read_graph(file = file.path(data_dir, "EBV.txt"), format = "ncol")
+  g_orig <- igraph::read_graph(
+    file = file.path(data_dir, "EBV.txt"),
+    format = "ncol"
+  )
   g_rtrip <- netdist::indexed_edges_to_graph(graph_to_indexed_edges(g_orig))
-  expect_true(all.equal(igraph::get.edgelist(g_orig), igraph::get.edgelist(g_orig)))
+  expect_true(all.equal(
+    igraph::get.edgelist(g_orig),
+    igraph::get.edgelist(g_orig)
+  ))
 })
 
 context("ORCA interface: Graphlet key")
-test_that("graphlet_key gives correct output for all supported max graphlet sizes", {
-  correct_graphlet_key_2 <- list(max_nodes = 2, id = c("G0"), node_count = c(2))
-  correct_graphlet_key_3 <- list(
-    max_nodes = 3, id = c("G0", "G1", "G2"),
-    node_count = c(2, 3, 3)
-  )
-  correct_graphlet_key_4 <- list(
-    max_nodes = 4,
-    id = c(
-      "G0", "G1", "G2", "G3", "G4", "G5", "G6",
-      "G7", "G8"
-    ),
-    node_count = c(2, 3, 3, 4, 4, 4, 4, 4, 4)
-  )
-  correct_graphlet_key_5 <- list(
-    max_nodes = 5,
-    id = c(
-      "G0", "G1", "G2", "G3", "G4", "G5", "G6",
-      "G7", "G8", "G9", "G10", "G11", "G12",
-      "G13", "G14", "G15", "G16", "G17",
-      "G18", "G19", "G20", "G21", "G22",
-      "G23", "G24", "G25", "G26", "G27",
-      "G28", "G29"
-    ),
-    node_count = c(
-      2, 3, 3, 4, 4, 4, 4, 4, 4,
-      5, 5, 5, 5, 5, 5, 5,
-      5, 5, 5, 5, 5, 5, 5,
-      5, 5, 5, 5, 5, 5, 5
+test_that(
+  "graphlet_key gives correct output for all supported max graphlet sizes",
+  {
+    correct_graphlet_key_2 <- list(
+      max_nodes = 2, id = c("G0"),
+      node_count = c(2)
     )
-  )
-  expect_equal(graphlet_key(2), correct_graphlet_key_2)
-  expect_equal(graphlet_key(3), correct_graphlet_key_3)
-  expect_equal(graphlet_key(4), correct_graphlet_key_4)
-  expect_equal(graphlet_key(5), correct_graphlet_key_5)
-})
+    correct_graphlet_key_3 <- list(
+      max_nodes = 3, id = c("G0", "G1", "G2"),
+      node_count = c(2, 3, 3)
+    )
+    correct_graphlet_key_4 <- list(
+      max_nodes = 4,
+      id = c(
+        "G0", "G1", "G2", "G3", "G4", "G5", "G6",
+        "G7", "G8"
+      ),
+      node_count = c(2, 3, 3, 4, 4, 4, 4, 4, 4)
+    )
+    correct_graphlet_key_5 <- list(
+      max_nodes = 5,
+      id = c(
+        "G0", "G1", "G2", "G3", "G4", "G5", "G6",
+        "G7", "G8", "G9", "G10", "G11", "G12",
+        "G13", "G14", "G15", "G16", "G17",
+        "G18", "G19", "G20", "G21", "G22",
+        "G23", "G24", "G25", "G26", "G27",
+        "G28", "G29"
+      ),
+      node_count = c(
+        2, 3, 3, 4, 4, 4, 4, 4, 4,
+        5, 5, 5, 5, 5, 5, 5,
+        5, 5, 5, 5, 5, 5, 5,
+        5, 5, 5, 5, 5, 5, 5
+      )
+    )
+    expect_equal(graphlet_key(2), correct_graphlet_key_2)
+    expect_equal(graphlet_key(3), correct_graphlet_key_3)
+    expect_equal(graphlet_key(4), correct_graphlet_key_4)
+    expect_equal(graphlet_key(5), correct_graphlet_key_5)
+  }
+)
 
 test_that("graphlet_key gives error for unsupported max graphlet sizes", {
   max_size_too_low <- c(1, 0, -1, -2, -3, -4, -5, -6)
@@ -60,53 +72,56 @@ test_that("graphlet_key gives error for unsupported max graphlet sizes", {
 })
 
 context("ORCA interface: Orbit key")
-test_that("orbit_key gives correct output for all supported max graphlet sizes", {
-  correct_orbit_key_2 <- list(max_nodes = 2, id = c("O0"), node_count = c(2))
-  correct_orbit_key_3 <- list(
-    max_nodes = 3, id = c("O0", "O1", "O2", "O3"),
-    node_count = c(2, 3, 3, 3)
-  )
-  correct_orbit_key_4 <- list(
-    max_nodes = 4,
-    id = c(
-      "O0", "O1", "O2", "O3", "O4", "O5", "O6", "O7", "O8", "O9",
-      "O10", "O11", "O12", "O13", "O14"
-    ),
-    node_count = c(
-      2, 3, 3, 3,
-      4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
+test_that(
+  "orbit_key gives correct output for all supported max graphlet sizes",
+  {
+    correct_orbit_key_2 <- list(max_nodes = 2, id = c("O0"), node_count = c(2))
+    correct_orbit_key_3 <- list(
+      max_nodes = 3, id = c("O0", "O1", "O2", "O3"),
+      node_count = c(2, 3, 3, 3)
     )
-  )
-  correct_orbit_key_5 <- list(
-    max_nodes = 5,
-    id = c(
-      "O0", "O1", "O2", "O3", "O4", "O5", "O6", "O7", "O8", "O9",
-      "O10", "O11", "O12", "O13", "O14", "O15", "O16", "O17",
-      "O18", "O19", "O20", "O21", "O22",
-      "O23", "O24", "O25", "O26", "O27", "O28", "O29",
-      "O30", "O31", "O32", "O33", "O34", "O35", "O36", "O37",
-      "O38", "O39", "O40", "O41", "O42", "O43", "O44", "O45",
-      "O46", "O47", "O48", "O49", "O50", "O51", "O52", "O53",
-      "O54", "O55", "O56", "O57", "O58", "O59", "O60", "O61",
-      "O62", "O63", "O64", "O65", "O66", "O67", "O68", "O69",
-      "O70", "O71", "O72"
-    ),
-    node_count = c(
-      2, 3, 3, 3,
-      4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-      5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-      5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-      5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-      5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-      5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-      5, 5, 5, 5, 5, 5, 5, 5
+    correct_orbit_key_4 <- list(
+      max_nodes = 4,
+      id = c(
+        "O0", "O1", "O2", "O3", "O4", "O5", "O6", "O7", "O8", "O9",
+        "O10", "O11", "O12", "O13", "O14"
+      ),
+      node_count = c(
+        2, 3, 3, 3,
+        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
+      )
     )
-  )
-  expect_equal(orbit_key(2), correct_orbit_key_2)
-  expect_equal(orbit_key(3), correct_orbit_key_3)
-  expect_equal(orbit_key(4), correct_orbit_key_4)
-  expect_equal(orbit_key(5), correct_orbit_key_5)
-})
+    correct_orbit_key_5 <- list(
+      max_nodes = 5,
+      id = c(
+        "O0", "O1", "O2", "O3", "O4", "O5", "O6", "O7", "O8", "O9",
+        "O10", "O11", "O12", "O13", "O14", "O15", "O16", "O17",
+        "O18", "O19", "O20", "O21", "O22",
+        "O23", "O24", "O25", "O26", "O27", "O28", "O29",
+        "O30", "O31", "O32", "O33", "O34", "O35", "O36", "O37",
+        "O38", "O39", "O40", "O41", "O42", "O43", "O44", "O45",
+        "O46", "O47", "O48", "O49", "O50", "O51", "O52", "O53",
+        "O54", "O55", "O56", "O57", "O58", "O59", "O60", "O61",
+        "O62", "O63", "O64", "O65", "O66", "O67", "O68", "O69",
+        "O70", "O71", "O72"
+      ),
+      node_count = c(
+        2, 3, 3, 3,
+        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+        5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+        5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+        5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+        5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+        5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+        5, 5, 5, 5, 5, 5, 5, 5
+      )
+    )
+    expect_equal(orbit_key(2), correct_orbit_key_2)
+    expect_equal(orbit_key(3), correct_orbit_key_3)
+    expect_equal(orbit_key(4), correct_orbit_key_4)
+    expect_equal(orbit_key(5), correct_orbit_key_5)
+  }
+)
 
 context("ORCA interface: Graph cross comparison")
 test_that("cross_comparison_spec works for virus PPI data", {
@@ -143,7 +158,10 @@ test_that("cross_comparison_spec works for virus PPI data", {
 
 context("ORCA interface: Orbit count wrapper")
 test_that("Single and zero node graphs are gracefully handled", {
-  single_node_graph <- igraph::graph_from_adjacency_matrix(0, mode = "undirected")
+  single_node_graph <- igraph::graph_from_adjacency_matrix(
+    0,
+    mode = "undirected"
+  )
   zero_node_graph <- igraph::delete.vertices(single_node_graph, 1)
   names4 <- c(
     "O0", "O1", "O2", "O3", "O4", "O5", "O6", "O7", "O8", "O9",
@@ -248,7 +266,10 @@ test_that("simplify_graph works", {
   # 1a. Loop removal
   expect_equal(
     igraph::as_adjacency_matrix(
-      igraph::graph_from_adjacency_matrix(remove_loops(adj_mat), mode = "directed")
+      igraph::graph_from_adjacency_matrix(
+        remove_loops(adj_mat),
+        mode = "directed"
+      )
     ),
     igraph::as_adjacency_matrix(simplify_graph(
       graph,
@@ -259,7 +280,10 @@ test_that("simplify_graph works", {
   # 1b. Multiple edge removal
   expect_equal(
     igraph::as_adjacency_matrix(
-      igraph::graph_from_adjacency_matrix(remove_multiples(adj_mat), mode = "directed")
+      igraph::graph_from_adjacency_matrix(
+        remove_multiples(adj_mat),
+        mode = "directed"
+      )
     ),
     igraph::as_adjacency_matrix(simplify_graph(
       graph,
@@ -270,7 +294,10 @@ test_that("simplify_graph works", {
   # 1c. Isolate edge removal
   expect_equal(
     igraph::as_adjacency_matrix(
-      igraph::graph_from_adjacency_matrix(remove_isolates(adj_mat), mode = "directed")
+      igraph::graph_from_adjacency_matrix(
+        remove_isolates(adj_mat),
+        mode = "directed"
+      )
     ),
     igraph::as_adjacency_matrix(simplify_graph(
       graph,
@@ -281,7 +308,10 @@ test_that("simplify_graph works", {
   # 1ab. Loop + multiple edge removal
   expect_equal(
     igraph::as_adjacency_matrix(
-      igraph::graph_from_adjacency_matrix(remove_multiples(remove_loops(adj_mat)), mode = "directed")
+      igraph::graph_from_adjacency_matrix(
+        remove_multiples(remove_loops(adj_mat)),
+        mode = "directed"
+      )
     ),
     igraph::as_adjacency_matrix(simplify_graph(
       graph,
@@ -292,7 +322,10 @@ test_that("simplify_graph works", {
   # 1ac. Loop + isolate removal
   expect_equal(
     igraph::as_adjacency_matrix(
-      igraph::graph_from_adjacency_matrix(remove_isolates(remove_loops(adj_mat)), mode = "directed")
+      igraph::graph_from_adjacency_matrix(
+        remove_isolates(remove_loops(adj_mat)),
+        mode = "directed"
+      )
     ),
     igraph::as_adjacency_matrix(simplify_graph(
       graph,
@@ -303,7 +336,10 @@ test_that("simplify_graph works", {
   # 1bc. Multiple + isolate removal
   expect_equal(
     igraph::as_adjacency_matrix(
-      igraph::graph_from_adjacency_matrix(remove_isolates(remove_multiples(adj_mat)), mode = "directed")
+      igraph::graph_from_adjacency_matrix(
+        remove_isolates(remove_multiples(adj_mat)),
+        mode = "directed"
+      )
     ),
     igraph::as_adjacency_matrix(simplify_graph(
       graph,
@@ -314,7 +350,10 @@ test_that("simplify_graph works", {
   # 1abc. Loop + multiple + isolate removal
   expect_equal(
     igraph::as_adjacency_matrix(
-      igraph::graph_from_adjacency_matrix(remove_isolates(remove_multiples(remove_loops(adj_mat))), mode = "directed")
+      igraph::graph_from_adjacency_matrix(
+        remove_isolates(remove_multiples(remove_loops(adj_mat))),
+        mode = "directed"
+      )
     ),
     igraph::as_adjacency_matrix(simplify_graph(
       graph,
@@ -339,7 +378,10 @@ test_that("simplify_graph works", {
   # edges where nodes are mutually connected in adjacency matrix)
   expect_equal(
     igraph::as_adjacency_matrix(
-      igraph::graph_from_adjacency_matrix(remove_multiples(adj_mat), mode = "max")
+      igraph::graph_from_adjacency_matrix(
+        remove_multiples(adj_mat),
+        mode = "max"
+      )
     ),
     igraph::as_adjacency_matrix(simplify_graph(
       graph,
@@ -350,7 +392,10 @@ test_that("simplify_graph works", {
   # 2c. Isolate edge removal
   expect_equal(
     igraph::as_adjacency_matrix(
-      igraph::graph_from_adjacency_matrix(remove_isolates(adj_mat), mode = "plus")
+      igraph::graph_from_adjacency_matrix(
+        remove_isolates(adj_mat),
+        mode = "plus"
+      )
     ),
     igraph::as_adjacency_matrix(simplify_graph(
       graph,
@@ -358,11 +403,14 @@ test_that("simplify_graph works", {
       remove_multiple = FALSE, remove_isolates = TRUE
     ))
   )
-  # 2ab. Loop + multiple edge removal (use mode = "max" to avoid generating multiple
-  # edges where nodes are mutually connected in adjacency matrix)
+  # 2ab. Loop + multiple edge removal (use mode = "max" to avoid generating
+  # multiple edges where nodes are mutually connected in adjacency matrix)
   expect_equal(
     igraph::as_adjacency_matrix(
-      igraph::graph_from_adjacency_matrix(remove_multiples(remove_loops(adj_mat)), mode = "max")
+      igraph::graph_from_adjacency_matrix(
+        remove_multiples(remove_loops(adj_mat)),
+        mode = "max"
+      )
     ),
     igraph::as_adjacency_matrix(simplify_graph(
       graph,
@@ -373,7 +421,10 @@ test_that("simplify_graph works", {
   # 2ac. Loop + isolate removal
   expect_equal(
     igraph::as_adjacency_matrix(
-      igraph::graph_from_adjacency_matrix(remove_isolates(remove_loops(adj_mat)), mode = "plus")
+      igraph::graph_from_adjacency_matrix(
+        remove_isolates(remove_loops(adj_mat)),
+        mode = "plus"
+      )
     ),
     igraph::as_adjacency_matrix(simplify_graph(
       graph,
@@ -381,11 +432,14 @@ test_that("simplify_graph works", {
       remove_multiple = FALSE, remove_isolates = TRUE
     ))
   )
-  # 2bc. Multiple + isolate removal (use mode = "max" to avoid generating multiple
-  # edges where nodes are mutually connected in adjacency matrix)
+  # 2bc. Multiple + isolate removal (use mode = "max" to avoid generating
+  # multiple edges where nodes are mutually connected in adjacency matrix)
   expect_equal(
     igraph::as_adjacency_matrix(
-      igraph::graph_from_adjacency_matrix(remove_isolates(remove_multiples(adj_mat)), mode = "max")
+      igraph::graph_from_adjacency_matrix(
+        remove_isolates(remove_multiples(adj_mat)),
+        mode = "max"
+      )
     ),
     igraph::as_adjacency_matrix(simplify_graph(
       graph,
@@ -393,11 +447,15 @@ test_that("simplify_graph works", {
       remove_multiple = TRUE, remove_isolates = TRUE
     ))
   )
-  # 2abc. Loop + multiple + isolate removal (use mode = "max" to avoid generating multiple
-  # edges where nodes are mutually connected in adjacency matrix)
+  # 2abc. Loop + multiple + isolate removal (use mode = "max" to avoid
+  # generating multiple edges where nodes are mutually connected in adjacency
+  # matrix)
   expect_equal(
     igraph::as_adjacency_matrix(
-      igraph::graph_from_adjacency_matrix(remove_isolates(remove_multiples(remove_loops(adj_mat))), mode = "max")
+      igraph::graph_from_adjacency_matrix(
+        remove_isolates(remove_multiples(remove_loops(adj_mat))),
+        mode = "max"
+      )
     ),
     igraph::as_adjacency_matrix(simplify_graph(
       graph,
@@ -464,7 +522,10 @@ test_that("gdd simplifies works", {
   # 1a. Loop removal
   expect_equal(
     gdd(
-      igraph::graph_from_adjacency_matrix(remove_loops(adj_mat), mode = "directed")
+      igraph::graph_from_adjacency_matrix(
+        remove_loops(adj_mat),
+        mode = "directed"
+      )
     ),
     gdd(simplify_graph(
       graph,
@@ -475,7 +536,10 @@ test_that("gdd simplifies works", {
   # 1b. Multiple edge removal
   expect_equal(
     gdd(
-      igraph::graph_from_adjacency_matrix(remove_multiples(adj_mat), mode = "directed")
+      igraph::graph_from_adjacency_matrix(
+        remove_multiples(adj_mat),
+        mode = "directed"
+      )
     ),
     gdd(simplify_graph(
       graph,
@@ -486,7 +550,10 @@ test_that("gdd simplifies works", {
   # 1c. Isolate edge removal
   expect_equal(
     gdd(
-      igraph::graph_from_adjacency_matrix(remove_isolates(adj_mat), mode = "directed")
+      igraph::graph_from_adjacency_matrix(
+        remove_isolates(adj_mat),
+        mode = "directed"
+      )
     ),
     gdd(simplify_graph(
       graph,
@@ -497,7 +564,10 @@ test_that("gdd simplifies works", {
   # 1ab. Loop + multiple edge removal
   expect_equal(
     gdd(
-      igraph::graph_from_adjacency_matrix(remove_multiples(remove_loops(adj_mat)), mode = "directed")
+      igraph::graph_from_adjacency_matrix(
+        remove_multiples(remove_loops(adj_mat)),
+        mode = "directed"
+      )
     ),
     gdd(simplify_graph(
       graph,
@@ -508,7 +578,10 @@ test_that("gdd simplifies works", {
   # 1ac. Loop + isolate removal
   expect_equal(
     gdd(
-      igraph::graph_from_adjacency_matrix(remove_isolates(remove_loops(adj_mat)), mode = "directed")
+      igraph::graph_from_adjacency_matrix(
+        remove_isolates(remove_loops(adj_mat)),
+        mode = "directed"
+      )
     ),
     gdd(simplify_graph(
       graph,
@@ -519,7 +592,10 @@ test_that("gdd simplifies works", {
   # 1bc. Multiple + isolate removal
   expect_equal(
     gdd(
-      igraph::graph_from_adjacency_matrix(remove_isolates(remove_multiples(adj_mat)), mode = "directed")
+      igraph::graph_from_adjacency_matrix(
+        remove_isolates(remove_multiples(adj_mat)),
+        mode = "directed"
+      )
     ),
     gdd(simplify_graph(
       graph,
@@ -530,7 +606,10 @@ test_that("gdd simplifies works", {
   # 1abc. Loop + multiple + isolate removal
   expect_equal(
     gdd(
-      igraph::graph_from_adjacency_matrix(remove_isolates(remove_multiples(remove_loops(adj_mat))), mode = "directed")
+      igraph::graph_from_adjacency_matrix(
+        remove_isolates(remove_multiples(remove_loops(adj_mat))),
+        mode = "directed"
+      )
     ),
     gdd(simplify_graph(
       graph,
@@ -555,7 +634,10 @@ test_that("gdd simplifies works", {
   # edges where nodes are mutually connected in adjacency matrix)
   expect_equal(
     gdd(
-      igraph::graph_from_adjacency_matrix(remove_multiples(adj_mat), mode = "max")
+      igraph::graph_from_adjacency_matrix(
+        remove_multiples(adj_mat),
+        mode = "max"
+      )
     ),
     gdd(simplify_graph(
       graph,
@@ -566,7 +648,10 @@ test_that("gdd simplifies works", {
   # 2c. Isolate edge removal
   expect_equal(
     gdd(
-      igraph::graph_from_adjacency_matrix(remove_isolates(adj_mat), mode = "plus")
+      igraph::graph_from_adjacency_matrix(
+        remove_isolates(adj_mat),
+        mode = "plus"
+      )
     ),
     gdd(simplify_graph(
       graph,
@@ -574,11 +659,14 @@ test_that("gdd simplifies works", {
       remove_multiple = FALSE, remove_isolates = TRUE
     ))
   )
-  # 2ab. Loop + multiple edge removal (use mode = "max" to avoid generating multiple
-  # edges where nodes are mutually connected in adjacency matrix)
+  # 2ab. Loop + multiple edge removal (use mode = "max" to avoid generating
+  # multiple edges where nodes are mutually connected in adjacency matrix)
   expect_equal(
     gdd(
-      igraph::graph_from_adjacency_matrix(remove_multiples(remove_loops(adj_mat)), mode = "max")
+      igraph::graph_from_adjacency_matrix(
+        remove_multiples(remove_loops(adj_mat)),
+        mode = "max"
+      )
     ),
     gdd(simplify_graph(
       graph,
@@ -589,7 +677,10 @@ test_that("gdd simplifies works", {
   # 2ac. Loop + isolate removal
   expect_equal(
     gdd(
-      igraph::graph_from_adjacency_matrix(remove_isolates(remove_loops(adj_mat)), mode = "plus")
+      igraph::graph_from_adjacency_matrix(
+        remove_isolates(remove_loops(adj_mat)),
+        mode = "plus"
+      )
     ),
     gdd(simplify_graph(
       graph,
@@ -597,11 +688,14 @@ test_that("gdd simplifies works", {
       remove_multiple = FALSE, remove_isolates = TRUE
     ))
   )
-  # 2bc. Multiple + isolate removal (use mode = "max" to avoid generating multiple
-  # edges where nodes are mutually connected in adjacency matrix)
+  # 2bc. Multiple + isolate removal (use mode = "max" to avoid generating
+  # multiple edges where nodes are mutually connected in adjacency matrix)
   expect_equal(
     gdd(
-      igraph::graph_from_adjacency_matrix(remove_isolates(remove_multiples(adj_mat)), mode = "max")
+      igraph::graph_from_adjacency_matrix(
+        remove_isolates(remove_multiples(adj_mat)),
+        mode = "max"
+      )
     ),
     gdd(simplify_graph(
       graph,
@@ -609,11 +703,15 @@ test_that("gdd simplifies works", {
       remove_multiple = TRUE, remove_isolates = TRUE
     ))
   )
-  # 2abc. Loop + multiple + isolate removal (use mode = "max" to avoid generating multiple
-  # edges where nodes are mutually connected in adjacency matrix)
+  # 2abc. Loop + multiple + isolate removal (use mode = "max" to avoid
+  # generating multiple edges where nodes are mutually connected in adjacency
+  # matrix)
   expect_equal(
     gdd(
-      igraph::graph_from_adjacency_matrix(remove_isolates(remove_multiples(remove_loops(adj_mat))), mode = "max")
+      igraph::graph_from_adjacency_matrix(
+        remove_isolates(remove_multiples(remove_loops(adj_mat))),
+        mode = "max"
+      )
     ),
     gdd(simplify_graph(
       graph,
@@ -658,7 +756,10 @@ test_that("Features to Histograms Test", {
   expect_equal(res[[1]]$locations, c(10^-10, 10^-9, 10^-8, 10^-2, 10^3))
   expect_equal(res[[1]]$masses, c(1, 1, 2, 1, 1))
   # irrational
-  c1 <- matrix(c(pi, sqrt(2), sqrt(2) / pi, sqrt(3), sqrt(2), sqrt(2) / pi), nrow = 6)
+  c1 <- matrix(
+    c(pi, sqrt(2), sqrt(2) / pi, sqrt(3), sqrt(2), sqrt(2) / pi),
+    nrow = 6
+  )
   res <- graph_features_to_histograms(c1)
   expect_equal(res[[1]]$locations, c(sqrt(2) / pi, sqrt(2), sqrt(3), pi))
   expect_equal(res[[1]]$masses, c(2, 2, 1, 1))
@@ -736,7 +837,10 @@ test_that("read_simple_graph works", {
   # 1a. Loop removal
   expect_equal(
     igraph::as_adjacency_matrix(
-      igraph::graph_from_adjacency_matrix(remove_loops(adj_mat), mode = "directed")
+      igraph::graph_from_adjacency_matrix(
+        remove_loops(adj_mat),
+        mode = "directed"
+      )
     ),
     igraph::as_adjacency_matrix(read_simple_graph(
       file = path, format = format,
@@ -747,7 +851,10 @@ test_that("read_simple_graph works", {
   # 1b. Multiple edge removal
   expect_equal(
     igraph::as_adjacency_matrix(
-      igraph::graph_from_adjacency_matrix(remove_multiples(adj_mat), mode = "directed")
+      igraph::graph_from_adjacency_matrix(
+        remove_multiples(adj_mat),
+        mode = "directed"
+      )
     ),
     igraph::as_adjacency_matrix(read_simple_graph(
       file = path, format = format,
@@ -758,7 +865,10 @@ test_that("read_simple_graph works", {
   # 1c. Isolate edge removal
   expect_equal(
     igraph::as_adjacency_matrix(
-      igraph::graph_from_adjacency_matrix(remove_isolates(adj_mat), mode = "directed")
+      igraph::graph_from_adjacency_matrix(
+        remove_isolates(adj_mat),
+        mode = "directed"
+      )
     ),
     igraph::as_adjacency_matrix(read_simple_graph(
       file = path, format = format,
@@ -769,7 +879,10 @@ test_that("read_simple_graph works", {
   # 1ab. Loop + multiple edge removal
   expect_equal(
     igraph::as_adjacency_matrix(
-      igraph::graph_from_adjacency_matrix(remove_multiples(remove_loops(adj_mat)), mode = "directed")
+      igraph::graph_from_adjacency_matrix(
+        remove_multiples(remove_loops(adj_mat)),
+        mode = "directed"
+      )
     ),
     igraph::as_adjacency_matrix(read_simple_graph(
       file = path, format = format,
@@ -780,7 +893,10 @@ test_that("read_simple_graph works", {
   # 1ac. Loop + isolate removal
   expect_equal(
     igraph::as_adjacency_matrix(
-      igraph::graph_from_adjacency_matrix(remove_isolates(remove_loops(adj_mat)), mode = "directed")
+      igraph::graph_from_adjacency_matrix(
+        remove_isolates(remove_loops(adj_mat)),
+        mode = "directed"
+      )
     ),
     igraph::as_adjacency_matrix(read_simple_graph(
       file = path, format = format,
@@ -791,7 +907,10 @@ test_that("read_simple_graph works", {
   # 1bc. Multiple + isolate removal
   expect_equal(
     igraph::as_adjacency_matrix(
-      igraph::graph_from_adjacency_matrix(remove_isolates(remove_multiples(adj_mat)), mode = "directed")
+      igraph::graph_from_adjacency_matrix(
+        remove_isolates(remove_multiples(adj_mat)),
+        mode = "directed"
+      )
     ),
     igraph::as_adjacency_matrix(read_simple_graph(
       file = path, format = format,
@@ -802,7 +921,10 @@ test_that("read_simple_graph works", {
   # 1abc. Loop + multiple + isolate removal
   expect_equal(
     igraph::as_adjacency_matrix(
-      igraph::graph_from_adjacency_matrix(remove_isolates(remove_multiples(remove_loops(adj_mat))), mode = "directed")
+      igraph::graph_from_adjacency_matrix(
+        remove_isolates(remove_multiples(remove_loops(adj_mat))),
+        mode = "directed"
+      )
     ),
     igraph::as_adjacency_matrix(read_simple_graph(
       file = path, format = format,
@@ -827,7 +949,10 @@ test_that("read_simple_graph works", {
   # edges where nodes are mutually connected in adjacency matrix)
   expect_equal(
     igraph::as_adjacency_matrix(
-      igraph::graph_from_adjacency_matrix(remove_multiples(adj_mat), mode = "max")
+      igraph::graph_from_adjacency_matrix(
+        remove_multiples(adj_mat),
+        mode = "max"
+      )
     ),
     igraph::as_adjacency_matrix(read_simple_graph(
       file = path, format = format,
@@ -838,7 +963,10 @@ test_that("read_simple_graph works", {
   # 2c. Isolate edge removal
   expect_equal(
     igraph::as_adjacency_matrix(
-      igraph::graph_from_adjacency_matrix(remove_isolates(adj_mat), mode = "plus")
+      igraph::graph_from_adjacency_matrix(
+        remove_isolates(adj_mat),
+        mode = "plus"
+      )
     ),
     igraph::as_adjacency_matrix(read_simple_graph(
       file = path, format = format,
@@ -846,11 +974,14 @@ test_that("read_simple_graph works", {
       remove_multiple = FALSE, remove_isolates = TRUE
     ))
   )
-  # 2ab. Loop + multiple edge removal (use mode = "max" to avoid generating multiple
-  # edges where nodes are mutually connected in adjacency matrix)
+  # 2ab. Loop + multiple edge removal (use mode = "max" to avoid generating
+  # multiple edges where nodes are mutually connected in adjacency matrix)
   expect_equal(
     igraph::as_adjacency_matrix(
-      igraph::graph_from_adjacency_matrix(remove_multiples(remove_loops(adj_mat)), mode = "max")
+      igraph::graph_from_adjacency_matrix(
+        remove_multiples(remove_loops(adj_mat)),
+        mode = "max"
+      )
     ),
     igraph::as_adjacency_matrix(read_simple_graph(
       file = path, format = format,
@@ -861,7 +992,10 @@ test_that("read_simple_graph works", {
   # 2ac. Loop + isolate removal
   expect_equal(
     igraph::as_adjacency_matrix(
-      igraph::graph_from_adjacency_matrix(remove_isolates(remove_loops(adj_mat)), mode = "plus")
+      igraph::graph_from_adjacency_matrix(
+        remove_isolates(remove_loops(adj_mat)),
+        mode = "plus"
+      )
     ),
     igraph::as_adjacency_matrix(read_simple_graph(
       file = path, format = format,
@@ -869,11 +1003,14 @@ test_that("read_simple_graph works", {
       remove_multiple = FALSE, remove_isolates = TRUE
     ))
   )
-  # 2bc. Multiple + isolate removal (use mode = "max" to avoid generating multiple
-  # edges where nodes are mutually connected in adjacency matrix)
+  # 2bc. Multiple + isolate removal (use mode = "max" to avoid generating
+  # multiple edges where nodes are mutually connected in adjacency matrix)
   expect_equal(
     igraph::as_adjacency_matrix(
-      igraph::graph_from_adjacency_matrix(remove_isolates(remove_multiples(adj_mat)), mode = "max")
+      igraph::graph_from_adjacency_matrix(
+        remove_isolates(remove_multiples(adj_mat)),
+        mode = "max"
+      )
     ),
     igraph::as_adjacency_matrix(read_simple_graph(
       file = path, format = format,
@@ -881,11 +1018,15 @@ test_that("read_simple_graph works", {
       remove_multiple = TRUE, remove_isolates = TRUE
     ))
   )
-  # 2abc. Loop + multiple + isolate removal (use mode = "max" to avoid generating multiple
-  # edges where nodes are mutually connected in adjacency matrix)
+  # 2abc. Loop + multiple + isolate removal (use mode = "max" to avoid
+  #  generating multiple edges where nodes are mutually connected in adjacency
+  # matrix)
   expect_equal(
     igraph::as_adjacency_matrix(
-      igraph::graph_from_adjacency_matrix(remove_isolates(remove_multiples(remove_loops(adj_mat))), mode = "max")
+      igraph::graph_from_adjacency_matrix(
+        remove_isolates(remove_multiples(remove_loops(adj_mat))),
+        mode = "max"
+      )
     ),
     igraph::as_adjacency_matrix(read_simple_graph(
       file = path, format = format,
@@ -912,10 +1053,26 @@ test_that("read_simple_files works (all files in a directory)", {
   # Save graphs to temp directory
   format <- "graphml"
   base_dir <- tempdir()
-  igraph::write_graph(graph, file = file.path(base_dir, "oltw54387eNS_1.txt"), format = format)
-  igraph::write_graph(graph, file = file.path(base_dir, "oltw54387eNS_2.txt"), format = format)
-  igraph::write_graph(graph, file = file.path(base_dir, "oltw54387eNS_3.txt"), format = format)
-  igraph::write_graph(graph, file = file.path(base_dir, "oltw54387eNS_4.txt"), format = format)
+  igraph::write_graph(
+    graph,
+    file = file.path(base_dir, "oltw54387eNS_1.txt"),
+    format = format
+  )
+  igraph::write_graph(
+    graph,
+    file = file.path(base_dir, "oltw54387eNS_2.txt"),
+    format = format
+  )
+  igraph::write_graph(
+    graph,
+    file = file.path(base_dir, "oltw54387eNS_3.txt"),
+    format = format
+  )
+  igraph::write_graph(
+    graph,
+    file = file.path(base_dir, "oltw54387eNS_4.txt"),
+    format = format
+  )
 
   # Helper functions to amend adjacency matrix to generate simplified graphs
   remove_loops <- function(adj_mat) {
@@ -967,8 +1124,9 @@ test_that("orbit_to_graphlet_counts summation works", {
   edges <- graph_to_indexed_edges(graph)
   orbit_counts_4 <- orca::count4(edges)
   orbit_counts_5 <- orca::count5(edges)
-  # Define orbit indexes belonging to each graphlet using the xero-based indexing
-  # from the journal papers, adding one to conver tot he one-based indexing of R
+  # Define orbit indexes belonging to each graphlet using the xero-based
+  # indexing from the journal papers, adding one to conver to the one-based
+  # indexing of R
   g0_indexes <- c(0) + 1
   g1_indexes <- c(1:2) + 1
   g2_indexes <- c(3) + 1
@@ -1047,8 +1205,8 @@ test_that("orbit_to_graphlet_counts summation works", {
       "G20", "G21", "G22", "G23", "G24", "G25", "G26", "G27", "G28",
       "G29"
     )
-  # Define epected graphlet count matrix for graphlets up to 4 nodes by selecting
-  # a subset of the matrix for graphlets up to 5 nodes
+  # Define epected graphlet count matrix for graphlets up to 4 nodes by
+  # selecting a subset of the matrix for graphlets up to 5 nodes
   expected_graphlet_counts_4 <- expected_graphlet_counts_5[, 1:9]
   # Calculate actual graphlet counts from functions under test
   actual_graphlet_counts_4 <- orbit_to_graphlet_counts(orbit_counts_4)
@@ -1059,137 +1217,142 @@ test_that("orbit_to_graphlet_counts summation works", {
 })
 
 context("ORCA interface: Named ego networks")
-test_that("make_named_ego_graph labels each ego-network with the correct node name", {
-  # Helper function to sort edgelists in consistent order
-  sort_edge_list <- function(edge_list) {
-    edge_list[order(edge_list[, 1], edge_list[, 2], decreasing = FALSE), ]
-  }
-  # Set up a small sample network with at least one ego-network that contains
-  # at least one of each graphlets
-  elist <- rbind(
-    c("n1", "n2"),
-    c("n2", "n3"),
-    c("n1", "n4"),
-    c("n2", "n5"),
-    c("n1", "n6"),
-    c("n1", "n7"),
-    c("n2", "n4"),
-    c("n4", "n6"),
-    c("n6", "n8"),
-    c("n7", "n8"),
-    c("n7", "n9"),
-    c("n7", "n10"),
-    c("n8", "n9"),
-    c("n8", "n10"),
-    c("n9", "n10")
-  )
-  graph <- igraph::graph_from_edgelist(elist, directed = FALSE)
-  # The expectation below is based on igraph::graph_from_edgelist adding nodes
-  # in the order they appear in the edge list, and igraph::V returning them
-  # in this same order
-  expected_node_names <- c("n1", "n2", "n3", "n4", "n5", "n6", "n7", "n8", "n9", "n10")
+test_that(
+  "make_named_ego_graph labels each ego-network with the correct node name",
+  {
+    # Helper function to sort edgelists in consistent order
+    sort_edge_list <- function(edge_list) {
+      edge_list[order(edge_list[, 1], edge_list[, 2], decreasing = FALSE), ]
+    }
+    # Set up a small sample network with at least one ego-network that contains
+    # at least one of each graphlets
+    elist <- rbind(
+      c("n1", "n2"),
+      c("n2", "n3"),
+      c("n1", "n4"),
+      c("n2", "n5"),
+      c("n1", "n6"),
+      c("n1", "n7"),
+      c("n2", "n4"),
+      c("n4", "n6"),
+      c("n6", "n8"),
+      c("n7", "n8"),
+      c("n7", "n9"),
+      c("n7", "n10"),
+      c("n8", "n9"),
+      c("n8", "n10"),
+      c("n9", "n10")
+    )
+    graph <- igraph::graph_from_edgelist(elist, directed = FALSE)
+    # The expectation below is based on igraph::graph_from_edgelist adding nodes
+    # in the order they appear in the edge list, and igraph::V returning them
+    # in this same order
+    expected_node_names <- c(
+      "n1", "n2", "n3", "n4", "n5", "n6", "n7", "n8", "n9", "n10"
+    )
 
-  # Expected edgelists for ego networks of order 1
-  expected_ego_elist_n1_o1 <- rbind(
-    c("n1", "n2"),
-    c("n1", "n4"),
-    c("n1", "n6"),
-    c("n1", "n7"),
-    c("n2", "n4"),
-    c("n4", "n6")
-  )
-  expected_ego_elist_n2_o1 <- rbind(
-    c("n1", "n2"),
-    c("n1", "n4"),
-    c("n2", "n3"),
-    c("n2", "n4"),
-    c("n2", "n5")
-  )
-  expected_ego_elist_n3_o1 <- rbind(
-    c("n2", "n3")
-  )
-  expected_ego_elist_n4_o1 <- rbind(
-    c("n1", "n2"),
-    c("n1", "n4"),
-    c("n1", "n6"),
-    c("n2", "n4"),
-    c("n4", "n6")
-  )
-  expected_ego_elist_n5_o1 <- rbind(
-    c("n2", "n5")
-  )
-  expected_ego_elist_n6_o1 <- rbind(
-    c("n1", "n4"),
-    c("n1", "n6"),
-    c("n4", "n6"),
-    c("n6", "n8")
-  )
-  expected_ego_elist_n7_o1 <- rbind(
-    c("n1", "n7"),
-    c("n7", "n8"),
-    c("n7", "n9"),
-    c("n7", "n10"),
-    c("n8", "n9"),
-    c("n8", "n10"),
-    c("n9", "n10")
-  )
-  expected_ego_elist_n8_o1 <- rbind(
-    c("n6", "n8"),
-    c("n7", "n8"),
-    c("n7", "n9"),
-    c("n7", "n10"),
-    c("n8", "n9"),
-    c("n8", "n10"),
-    c("n9", "n10")
-  )
-  expected_ego_elist_n9_o1 <- rbind(
-    c("n7", "n8"),
-    c("n7", "n9"),
-    c("n7", "n10"),
-    c("n8", "n9"),
-    c("n8", "n10"),
-    c("n9", "n10")
-  )
-  expected_ego_elist_n10_o1 <- rbind(
-    c("n7", "n8"),
-    c("n7", "n9"),
-    c("n7", "n10"),
-    c("n8", "n9"),
-    c("n8", "n10"),
-    c("n9", "n10")
-  )
+    # Expected edgelists for ego networks of order 1
+    expected_ego_elist_n1_o1 <- rbind(
+      c("n1", "n2"),
+      c("n1", "n4"),
+      c("n1", "n6"),
+      c("n1", "n7"),
+      c("n2", "n4"),
+      c("n4", "n6")
+    )
+    expected_ego_elist_n2_o1 <- rbind(
+      c("n1", "n2"),
+      c("n1", "n4"),
+      c("n2", "n3"),
+      c("n2", "n4"),
+      c("n2", "n5")
+    )
+    expected_ego_elist_n3_o1 <- rbind(
+      c("n2", "n3")
+    )
+    expected_ego_elist_n4_o1 <- rbind(
+      c("n1", "n2"),
+      c("n1", "n4"),
+      c("n1", "n6"),
+      c("n2", "n4"),
+      c("n4", "n6")
+    )
+    expected_ego_elist_n5_o1 <- rbind(
+      c("n2", "n5")
+    )
+    expected_ego_elist_n6_o1 <- rbind(
+      c("n1", "n4"),
+      c("n1", "n6"),
+      c("n4", "n6"),
+      c("n6", "n8")
+    )
+    expected_ego_elist_n7_o1 <- rbind(
+      c("n1", "n7"),
+      c("n7", "n8"),
+      c("n7", "n9"),
+      c("n7", "n10"),
+      c("n8", "n9"),
+      c("n8", "n10"),
+      c("n9", "n10")
+    )
+    expected_ego_elist_n8_o1 <- rbind(
+      c("n6", "n8"),
+      c("n7", "n8"),
+      c("n7", "n9"),
+      c("n7", "n10"),
+      c("n8", "n9"),
+      c("n8", "n10"),
+      c("n9", "n10")
+    )
+    expected_ego_elist_n9_o1 <- rbind(
+      c("n7", "n8"),
+      c("n7", "n9"),
+      c("n7", "n10"),
+      c("n8", "n9"),
+      c("n8", "n10"),
+      c("n9", "n10")
+    )
+    expected_ego_elist_n10_o1 <- rbind(
+      c("n7", "n8"),
+      c("n7", "n9"),
+      c("n7", "n10"),
+      c("n8", "n9"),
+      c("n8", "n10"),
+      c("n9", "n10")
+    )
 
-  # Test ego-networks of order 1.
-  # We compare edgelists as igraphs do not implement comparison
-  order <- 1
-  min_ego_nodes <- 0
-  min_ego_edges <- 0
+    # Test ego-networks of order 1.
+    # We compare edgelists as igraphs do not implement comparison
+    order <- 1
+    min_ego_nodes <- 0
+    min_ego_edges <- 0
 
-  expected_ego_elists_o1 <- list(
-    n1 = dplyr::arrange(data.frame(expected_ego_elist_n1_o1), X1, X2),
-    n2 = dplyr::arrange(data.frame(expected_ego_elist_n2_o1), X1, X2),
-    n3 = dplyr::arrange(data.frame(expected_ego_elist_n3_o1), X1, X2),
-    n4 = dplyr::arrange(data.frame(expected_ego_elist_n4_o1), X1, X2),
-    n5 = dplyr::arrange(data.frame(expected_ego_elist_n5_o1), X1, X2),
-    n6 = dplyr::arrange(data.frame(expected_ego_elist_n6_o1), X1, X2),
-    n7 = dplyr::arrange(data.frame(expected_ego_elist_n7_o1), X1, X2),
-    n8 = dplyr::arrange(data.frame(expected_ego_elist_n8_o1), X1, X2),
-    n9 = dplyr::arrange(data.frame(expected_ego_elist_n9_o1), X1, X2),
-    n10 = dplyr::arrange(data.frame(expected_ego_elist_n10_o1), X1, X2)
-  )
-  # Generate actual ego-networks and convert to edge lists for comparison
-  actual_ego_elists_o1 <-
-    purrr::map(
-      make_named_ego_graph(graph, order,
-        min_ego_nodes = min_ego_nodes,
-        min_ego_edges = min_ego_edges
-      ),
-      function(g) {
-        dplyr::arrange(data.frame(igraph::as_edgelist(g)), X1, X2)
-      }
+    expected_ego_elists_o1 <- list(
+      n1 = dplyr::arrange(data.frame(expected_ego_elist_n1_o1), X1, X2),
+      n2 = dplyr::arrange(data.frame(expected_ego_elist_n2_o1), X1, X2),
+      n3 = dplyr::arrange(data.frame(expected_ego_elist_n3_o1), X1, X2),
+      n4 = dplyr::arrange(data.frame(expected_ego_elist_n4_o1), X1, X2),
+      n5 = dplyr::arrange(data.frame(expected_ego_elist_n5_o1), X1, X2),
+      n6 = dplyr::arrange(data.frame(expected_ego_elist_n6_o1), X1, X2),
+      n7 = dplyr::arrange(data.frame(expected_ego_elist_n7_o1), X1, X2),
+      n8 = dplyr::arrange(data.frame(expected_ego_elist_n8_o1), X1, X2),
+      n9 = dplyr::arrange(data.frame(expected_ego_elist_n9_o1), X1, X2),
+      n10 = dplyr::arrange(data.frame(expected_ego_elist_n10_o1), X1, X2)
     )
-  expect_equal(actual_ego_elists_o1, expected_ego_elists_o1)
-})
+    # Generate actual ego-networks and convert to edge lists for comparison
+    actual_ego_elists_o1 <-
+      purrr::map(
+        make_named_ego_graph(graph, order,
+          min_ego_nodes = min_ego_nodes,
+          min_ego_edges = min_ego_edges
+        ),
+        function(g) {
+          dplyr::arrange(data.frame(igraph::as_edgelist(g)), X1, X2)
+        }
+      )
+    expect_equal(actual_ego_elists_o1, expected_ego_elists_o1)
+  }
+)
 
 context("ORCA interface: Graphlet counts")
 test_that("count_graphlets_for_graph works", {
@@ -1215,7 +1378,9 @@ test_that("count_graphlets_for_graph works", {
   graph <- igraph::graph_from_edgelist(elist, directed = FALSE)
 
   # Setgraphlet labels to use for names in expected counts
-  graphlet_labels <- c("N", "G0", "G1", "G2", "G3", "G4", "G5", "G6", "G7", "G8")
+  graphlet_labels <- c(
+    "N", "G0", "G1", "G2", "G3", "G4", "G5", "G6", "G7", "G8"
+  )
 
   # Manually verified graphlet counts
   expected_counts <- c(10, 15, 18, 6, 21, 3, 1, 11, 1, 1)
@@ -1227,534 +1392,287 @@ test_that("count_graphlets_for_graph works", {
 })
 
 context("ORCA interface: Ego-network graphlet counts")
-test_that("count_graphlets_ego: Ego-network 4-node graphlet counts match manually verified totals for test graph", {
-  # Set up a small sample network with at least one ego-network that contains
-  # at least one of each graphlets
-  elist <- rbind(
-    c("n1", "n2"),
-    c("n2", "n3"),
-    c("n1", "n4"),
-    c("n2", "n5"),
-    c("n1", "n6"),
-    c("n1", "n7"),
-    c("n2", "n4"),
-    c("n4", "n6"),
-    c("n6", "n8"),
-    c("n7", "n8"),
-    c("n7", "n9"),
-    c("n7", "n10"),
-    c("n8", "n9"),
-    c("n8", "n10"),
-    c("n9", "n10")
-  )
-  graph <- igraph::graph_from_edgelist(elist, directed = FALSE)
+test_that(
+  paste(
+    "count_graphlets_ego: Ego-network 4-node graphlet counts match manually",
+    "verified totals for test graph"
+  ),
+  {
+    # Set up a small sample network with at least one ego-network that contains
+    # at least one of each graphlets
+    elist <- rbind(
+      c("n1", "n2"),
+      c("n2", "n3"),
+      c("n1", "n4"),
+      c("n2", "n5"),
+      c("n1", "n6"),
+      c("n1", "n7"),
+      c("n2", "n4"),
+      c("n4", "n6"),
+      c("n6", "n8"),
+      c("n7", "n8"),
+      c("n7", "n9"),
+      c("n7", "n10"),
+      c("n8", "n9"),
+      c("n8", "n10"),
+      c("n9", "n10")
+    )
+    graph <- igraph::graph_from_edgelist(elist, directed = FALSE)
 
-  # Set node and graphlet labels to use for row and col names in expected counts
-  node_labels <- igraph::V(graph)$name
-  graphlet_labels <- c("N", "G0", "G1", "G2", "G3", "G4", "G5", "G6", "G7", "G8")
+    # Set node and graphlet labels to use for row and col names in expected
+    # counts
+    node_labels <- igraph::V(graph)$name
+    graphlet_labels <- c(
+      "N", "G0", "G1", "G2", "G3", "G4", "G5", "G6", "G7", "G8"
+    )
 
-  max_graphlet_size <- 4
-  graphlet_key <- graphlet_key(max_graphlet_size)
-  k <- graphlet_key$node_count
-  # Set manually verified counts
-  # 1-step ego networks
-  expected_counts_order_1 <- rbind(
-    c(5, 6, 5, 2, 0, 1, 0, 2, 1, 0),
-    c(5, 5, 5, 1, 0, 2, 0, 2, 0, 0),
-    c(2, 1, 0, 0, 0, 0, 0, 0, 0, 0),
-    c(4, 5, 2, 2, 0, 0, 0, 0, 1, 0),
-    c(2, 1, 0, 0, 0, 0, 0, 0, 0, 0),
-    c(4, 4, 2, 1, 0, 0, 0, 1, 0, 0),
-    c(5, 7, 3, 4, 0, 0, 0, 3, 0, 1),
-    c(5, 7, 3, 4, 0, 0, 0, 3, 0, 1),
-    c(4, 6, 0, 4, 0, 0, 0, 0, 0, 1),
-    c(4, 6, 0, 4, 0, 0, 0, 0, 0, 1)
-  )
-  rownames(expected_counts_order_1) <- node_labels
-  colnames(expected_counts_order_1) <- graphlet_labels
-  # 2-step ego networks
-  expected_counts_order_2 <- rbind(
-    c(10, 15, 18, 6, 21, 3, 1, 11, 1, 1),
-    c(7, 8, 10, 2, 6, 3, 0, 4, 1, 0),
-    c(5, 5, 5, 1, 0, 2, 0, 2, 0, 0),
-    c(8, 10, 14, 2, 11, 3, 1, 5, 1, 0),
-    c(5, 5, 5, 1, 0, 2, 0, 2, 0, 0),
-    c(8, 13, 13, 6, 15, 1, 1, 9, 1, 1),
-    c(8, 13, 13, 6, 15, 1, 1, 9, 1, 1),
-    c(7, 11, 10, 5, 10, 0, 1, 8, 0, 1),
-    c(6, 9, 8, 4, 4, 0, 1, 6, 0, 1),
-    c(6, 9, 8, 4, 4, 0, 1, 6, 0, 1)
-  )
-  rownames(expected_counts_order_2) <- node_labels
-  colnames(expected_counts_order_2) <- graphlet_labels
+    max_graphlet_size <- 4
+    graphlet_key <- graphlet_key(max_graphlet_size)
+    k <- graphlet_key$node_count
+    # Set manually verified counts
+    # 1-step ego networks
+    expected_counts_order_1 <- rbind(
+      c(5, 6, 5, 2, 0, 1, 0, 2, 1, 0),
+      c(5, 5, 5, 1, 0, 2, 0, 2, 0, 0),
+      c(2, 1, 0, 0, 0, 0, 0, 0, 0, 0),
+      c(4, 5, 2, 2, 0, 0, 0, 0, 1, 0),
+      c(2, 1, 0, 0, 0, 0, 0, 0, 0, 0),
+      c(4, 4, 2, 1, 0, 0, 0, 1, 0, 0),
+      c(5, 7, 3, 4, 0, 0, 0, 3, 0, 1),
+      c(5, 7, 3, 4, 0, 0, 0, 3, 0, 1),
+      c(4, 6, 0, 4, 0, 0, 0, 0, 0, 1),
+      c(4, 6, 0, 4, 0, 0, 0, 0, 0, 1)
+    )
+    rownames(expected_counts_order_1) <- node_labels
+    colnames(expected_counts_order_1) <- graphlet_labels
+    # 2-step ego networks
+    expected_counts_order_2 <- rbind(
+      c(10, 15, 18, 6, 21, 3, 1, 11, 1, 1),
+      c(7, 8, 10, 2, 6, 3, 0, 4, 1, 0),
+      c(5, 5, 5, 1, 0, 2, 0, 2, 0, 0),
+      c(8, 10, 14, 2, 11, 3, 1, 5, 1, 0),
+      c(5, 5, 5, 1, 0, 2, 0, 2, 0, 0),
+      c(8, 13, 13, 6, 15, 1, 1, 9, 1, 1),
+      c(8, 13, 13, 6, 15, 1, 1, 9, 1, 1),
+      c(7, 11, 10, 5, 10, 0, 1, 8, 0, 1),
+      c(6, 9, 8, 4, 4, 0, 1, 6, 0, 1),
+      c(6, 9, 8, 4, 4, 0, 1, 6, 0, 1)
+    )
+    rownames(expected_counts_order_2) <- node_labels
+    colnames(expected_counts_order_2) <- graphlet_labels
 
-  # Count graphlets in each ego network of the graph with only counts requested
-  min_ego_nodes <- 0
-  min_ego_edges <- 0
+    # Count graphlets in each ego network of the graph with only counts
+    # requested
+    min_ego_nodes <- 0
+    min_ego_edges <- 0
 
-  actual_counts_order_1 <-
-    count_graphlets_ego(graph,
-      max_graphlet_size = max_graphlet_size,
-      neighbourhood_size = 1,
+    actual_counts_order_1 <-
+      count_graphlets_ego(graph,
+        max_graphlet_size = max_graphlet_size,
+        neighbourhood_size = 1,
+        min_ego_nodes = min_ego_nodes,
+        min_ego_edges = min_ego_edges
+      )
+    actual_counts_order_2 <-
+      count_graphlets_ego(graph,
+        max_graphlet_size = max_graphlet_size,
+        neighbourhood_size = 2,
+        min_ego_nodes = min_ego_nodes,
+        min_ego_edges = min_ego_edges
+      )
+
+    # Test that actual counts match expected with only counts requested
+    # (default)
+    expect_equal(actual_counts_order_1, expected_counts_order_1)
+    expect_equal(actual_counts_order_2, expected_counts_order_2)
+
+    # Test that actual and returned ego networks match expected
+    # 1. Define expected
+    expected_ego_networks_order_1 <- make_named_ego_graph(graph,
+      order = 1,
       min_ego_nodes = min_ego_nodes,
       min_ego_edges = min_ego_edges
     )
-  actual_counts_order_2 <-
-    count_graphlets_ego(graph,
-      max_graphlet_size = max_graphlet_size,
-      neighbourhood_size = 2,
+    expected_ego_networks_order_2 <- make_named_ego_graph(graph,
+      order = 2,
       min_ego_nodes = min_ego_nodes,
       min_ego_edges = min_ego_edges
     )
-
-  # Test that actual counts match expected with only counts requested (default)
-  expect_equal(actual_counts_order_1, expected_counts_order_1)
-  expect_equal(actual_counts_order_2, expected_counts_order_2)
-
-  # Test that actual and returned ego networks match expected
-  # 1. Define expected
-  expected_ego_networks_order_1 <- make_named_ego_graph(graph,
-    order = 1,
-    min_ego_nodes = min_ego_nodes,
-    min_ego_edges = min_ego_edges
-  )
-  expected_ego_networks_order_2 <- make_named_ego_graph(graph,
-    order = 2,
-    min_ego_nodes = min_ego_nodes,
-    min_ego_edges = min_ego_edges
-  )
-  expected_counts_with_networks_order_1 <-
-    list(
-      graphlet_counts = expected_counts_order_1,
-      ego_networks = expected_ego_networks_order_1
-    )
-  expected_counts_with_networks_order_2 <-
-    list(
-      graphlet_counts = expected_counts_order_2,
-      ego_networks = expected_ego_networks_order_2
-    )
-  # 2. Calculate actual
-  actual_counts_with_networks_order_1 <-
-    count_graphlets_ego(graph,
-      max_graphlet_size = max_graphlet_size,
-      neighbourhood_size = 1,
-      min_ego_nodes = min_ego_nodes,
-      min_ego_edges = min_ego_edges,
-      return_ego_networks = TRUE
+    expected_counts_with_networks_order_1 <-
+      list(
+        graphlet_counts = expected_counts_order_1,
+        ego_networks = expected_ego_networks_order_1
+      )
+    expected_counts_with_networks_order_2 <-
+      list(
+        graphlet_counts = expected_counts_order_2,
+        ego_networks = expected_ego_networks_order_2
+      )
+    # 2. Calculate actual
+    actual_counts_with_networks_order_1 <-
+      count_graphlets_ego(graph,
+        max_graphlet_size = max_graphlet_size,
+        neighbourhood_size = 1,
+        min_ego_nodes = min_ego_nodes,
+        min_ego_edges = min_ego_edges,
+        return_ego_networks = TRUE
+      )
+    actual_counts_with_networks_order_2 <-
+      count_graphlets_ego(graph,
+        max_graphlet_size = max_graphlet_size,
+        neighbourhood_size = 2,
+        min_ego_nodes = min_ego_nodes,
+        min_ego_edges = min_ego_edges,
+        return_ego_networks = TRUE
+      )
+    # Test that actual counts match expected with ego-networks requested
+    expect_equal(
+      actual_counts_with_networks_order_1$graphlet_counts,
+      expected_counts_order_1
     )
-  actual_counts_with_networks_order_2 <-
-    count_graphlets_ego(graph,
-      max_graphlet_size = max_graphlet_size,
-      neighbourhood_size = 2,
-      min_ego_nodes = min_ego_nodes,
-      min_ego_edges = min_ego_edges,
-      return_ego_networks = TRUE
+    expect_equal(
+      actual_counts_with_networks_order_2$graphlet_counts,
+      expected_counts_order_2
     )
-  # Test that actual counts match expected with ego-networks requested
-  expect_equal(actual_counts_with_networks_order_1$graphlet_counts, expected_counts_order_1)
-  expect_equal(actual_counts_with_networks_order_2$graphlet_counts, expected_counts_order_2)
 
-  # 3. Compare
-  # Comparison is not implemented for igraph objects, so convert all igraphs to
-  # indexed edge list and then compare. Do in-situ replacement of igraphs with
-  # indexed edge lists to ensure we are checking full properties of returned
-  # objects (i.e. named lists with matching elements).
-  # 3a. Convert expected and actual ego networks from igraphs to indexed edges
-  expected_counts_with_networks_order_1$ego_networks <-
-    purrr::map(
-      expected_counts_with_networks_order_1$ego_networks,
-      graph_to_indexed_edges
-    )
-  expected_counts_with_networks_order_2$ego_networks <-
-    purrr::map(
-      expected_counts_with_networks_order_2$ego_networks,
-      graph_to_indexed_edges
-    )
-  actual_counts_with_networks_order_1$ego_networks <-
-    purrr::map(
-      actual_counts_with_networks_order_1$ego_networks,
-      graph_to_indexed_edges
+    # 3. Compare
+    # Comparison is not implemented for igraph objects, so convert all igraphs
+    # to indexed edge list and then compare. Do in-situ replacement of igraphs
+    # with indexed edge lists to ensure we are checking full properties of
+    # returned objects (i.e. named lists with matching elements).
+    # 3a. Convert expected and actual ego networks from igraphs to indexed edges
+    expected_counts_with_networks_order_1$ego_networks <-
+      purrr::map(
+        expected_counts_with_networks_order_1$ego_networks,
+        graph_to_indexed_edges
+      )
+    expected_counts_with_networks_order_2$ego_networks <-
+      purrr::map(
+        expected_counts_with_networks_order_2$ego_networks,
+        graph_to_indexed_edges
+      )
+    actual_counts_with_networks_order_1$ego_networks <-
+      purrr::map(
+        actual_counts_with_networks_order_1$ego_networks,
+        graph_to_indexed_edges
+      )
+    actual_counts_with_networks_order_2$ego_networks <-
+      purrr::map(
+        actual_counts_with_networks_order_2$ego_networks,
+        graph_to_indexed_edges
+      )
+    # 3b. Do comparison
+    expect_equal(
+      actual_counts_with_networks_order_1,
+      expected_counts_with_networks_order_1
     )
-  actual_counts_with_networks_order_2$ego_networks <-
-    purrr::map(
-      actual_counts_with_networks_order_2$ego_networks,
-      graph_to_indexed_edges
+    expect_equal(
+      actual_counts_with_networks_order_2,
+      expected_counts_with_networks_order_2
     )
-  # 3b. Do comparison
-  expect_equal(
-    actual_counts_with_networks_order_1,
-    expected_counts_with_networks_order_1
-  )
-  expect_equal(
-    actual_counts_with_networks_order_2,
-    expected_counts_with_networks_order_2
-  )
-})
+  }
+)
 
 context("ORCA interface: Ego-network graphlet counts")
-test_that("ego_to_graphlet_counts: Ego-network 4-node graphlet counts match manually verified totals for test graph", {
-  # Set up a small sample network with at least one ego-network that contains
-  # at least one of each graphlets
-  elist <- rbind(
-    c("n1", "n2"),
-    c("n2", "n3"),
-    c("n1", "n4"),
-    c("n2", "n5"),
-    c("n1", "n6"),
-    c("n1", "n7"),
-    c("n2", "n4"),
-    c("n4", "n6"),
-    c("n6", "n8"),
-    c("n7", "n8"),
-    c("n7", "n9"),
-    c("n7", "n10"),
-    c("n8", "n9"),
-    c("n8", "n10"),
-    c("n9", "n10")
-  )
-  graph <- igraph::graph_from_edgelist(elist, directed = FALSE)
-
-  # Set node and graphlet labels to use for row and col names in expected counts
-  node_labels <- igraph::V(graph)$name
-  graphlet_labels <- c("N", "G0", "G1", "G2", "G3", "G4", "G5", "G6", "G7", "G8")
+test_that(
+  paste(
+    "ego_to_graphlet_counts: Ego-network 4-node graphlet counts match manually",
+    "verified totals for test graph"
+  ),
+  {
+    # Set up a small sample network with at least one ego-network that contains
+    # at least one of each graphlets
+    elist <- rbind(
+      c("n1", "n2"),
+      c("n2", "n3"),
+      c("n1", "n4"),
+      c("n2", "n5"),
+      c("n1", "n6"),
+      c("n1", "n7"),
+      c("n2", "n4"),
+      c("n4", "n6"),
+      c("n6", "n8"),
+      c("n7", "n8"),
+      c("n7", "n9"),
+      c("n7", "n10"),
+      c("n8", "n9"),
+      c("n8", "n10"),
+      c("n9", "n10")
+    )
+    graph <- igraph::graph_from_edgelist(elist, directed = FALSE)
 
-  max_graphlet_size <- 4
-  graphlet_key <- graphlet_key(max_graphlet_size)
-  k <- graphlet_key$node_count
-  # Set manually verified counts
-  # 1-step ego networks
-  expected_counts_order_1 <- rbind(
-    c(5, 6, 5, 2, 0, 1, 0, 2, 1, 0),
-    c(5, 5, 5, 1, 0, 2, 0, 2, 0, 0),
-    c(2, 1, 0, 0, 0, 0, 0, 0, 0, 0),
-    c(4, 5, 2, 2, 0, 0, 0, 0, 1, 0),
-    c(2, 1, 0, 0, 0, 0, 0, 0, 0, 0),
-    c(4, 4, 2, 1, 0, 0, 0, 1, 0, 0),
-    c(5, 7, 3, 4, 0, 0, 0, 3, 0, 1),
-    c(5, 7, 3, 4, 0, 0, 0, 3, 0, 1),
-    c(4, 6, 0, 4, 0, 0, 0, 0, 0, 1),
-    c(4, 6, 0, 4, 0, 0, 0, 0, 0, 1)
-  )
-  rownames(expected_counts_order_1) <- node_labels
-  colnames(expected_counts_order_1) <- graphlet_labels
-  # 2-step ego networks
-  expected_counts_order_2 <- rbind(
-    c(10, 15, 18, 6, 21, 3, 1, 11, 1, 1),
-    c(7, 8, 10, 2, 6, 3, 0, 4, 1, 0),
-    c(5, 5, 5, 1, 0, 2, 0, 2, 0, 0),
-    c(8, 10, 14, 2, 11, 3, 1, 5, 1, 0),
-    c(5, 5, 5, 1, 0, 2, 0, 2, 0, 0),
-    c(8, 13, 13, 6, 15, 1, 1, 9, 1, 1),
-    c(8, 13, 13, 6, 15, 1, 1, 9, 1, 1),
-    c(7, 11, 10, 5, 10, 0, 1, 8, 0, 1),
-    c(6, 9, 8, 4, 4, 0, 1, 6, 0, 1),
-    c(6, 9, 8, 4, 4, 0, 1, 6, 0, 1)
-  )
-  rownames(expected_counts_order_2) <- node_labels
-  colnames(expected_counts_order_2) <- graphlet_labels
+    # Set node and graphlet labels to use for row and col names in expected
+    # counts
+    node_labels <- igraph::V(graph)$name
+    graphlet_labels <- c(
+      "N", "G0", "G1", "G2", "G3", "G4", "G5", "G6", "G7", "G8"
+    )
 
-  # Count graphlets in each ego network of the graph with only counts requested
-  min_ego_nodes <- 0
-  min_ego_edges <- 0
+    max_graphlet_size <- 4
+    graphlet_key <- graphlet_key(max_graphlet_size)
+    k <- graphlet_key$node_count
+    # Set manually verified counts
+    # 1-step ego networks
+    expected_counts_order_1 <- rbind(
+      c(5, 6, 5, 2, 0, 1, 0, 2, 1, 0),
+      c(5, 5, 5, 1, 0, 2, 0, 2, 0, 0),
+      c(2, 1, 0, 0, 0, 0, 0, 0, 0, 0),
+      c(4, 5, 2, 2, 0, 0, 0, 0, 1, 0),
+      c(2, 1, 0, 0, 0, 0, 0, 0, 0, 0),
+      c(4, 4, 2, 1, 0, 0, 0, 1, 0, 0),
+      c(5, 7, 3, 4, 0, 0, 0, 3, 0, 1),
+      c(5, 7, 3, 4, 0, 0, 0, 3, 0, 1),
+      c(4, 6, 0, 4, 0, 0, 0, 0, 0, 1),
+      c(4, 6, 0, 4, 0, 0, 0, 0, 0, 1)
+    )
+    rownames(expected_counts_order_1) <- node_labels
+    colnames(expected_counts_order_1) <- graphlet_labels
+    # 2-step ego networks
+    expected_counts_order_2 <- rbind(
+      c(10, 15, 18, 6, 21, 3, 1, 11, 1, 1),
+      c(7, 8, 10, 2, 6, 3, 0, 4, 1, 0),
+      c(5, 5, 5, 1, 0, 2, 0, 2, 0, 0),
+      c(8, 10, 14, 2, 11, 3, 1, 5, 1, 0),
+      c(5, 5, 5, 1, 0, 2, 0, 2, 0, 0),
+      c(8, 13, 13, 6, 15, 1, 1, 9, 1, 1),
+      c(8, 13, 13, 6, 15, 1, 1, 9, 1, 1),
+      c(7, 11, 10, 5, 10, 0, 1, 8, 0, 1),
+      c(6, 9, 8, 4, 4, 0, 1, 6, 0, 1),
+      c(6, 9, 8, 4, 4, 0, 1, 6, 0, 1)
+    )
+    rownames(expected_counts_order_2) <- node_labels
+    colnames(expected_counts_order_2) <- graphlet_labels
 
-  # Test that actual and returned ego graphlet counts match
-  # 1. Generate ego networks with previously tested function.
-  ego_networks_order_1 <- make_named_ego_graph(graph,
-    order = 1,
-    min_ego_nodes = min_ego_nodes,
-    min_ego_edges = min_ego_edges
-  )
-  ego_networks_order_2 <- make_named_ego_graph(graph,
-    order = 2,
-    min_ego_nodes = min_ego_nodes,
-    min_ego_edges = min_ego_edges
-  )
+    # Count graphlets in each ego network of the graph with only counts
+    # requested
+    min_ego_nodes <- 0
+    min_ego_edges <- 0
 
-  # 2. Calculate counts with ego_to_graphlet_counts.
-  actual_counts_order_1 <-
-    ego_to_graphlet_counts(ego_networks_order_1,
-      max_graphlet_size = max_graphlet_size
+    # Test that actual and returned ego graphlet counts match
+    # 1. Generate ego networks with previously tested function.
+    ego_networks_order_1 <- make_named_ego_graph(graph,
+      order = 1,
+      min_ego_nodes = min_ego_nodes,
+      min_ego_edges = min_ego_edges
     )
-  actual_counts_order_2 <-
-    ego_to_graphlet_counts(ego_networks_order_2,
-      max_graphlet_size = max_graphlet_size
+    ego_networks_order_2 <- make_named_ego_graph(graph,
+      order = 2,
+      min_ego_nodes = min_ego_nodes,
+      min_ego_edges = min_ego_edges
     )
 
-  # 3. Test that actual counts match expected
-  expect_equal(actual_counts_order_1, expected_counts_order_1)
-  expect_equal(actual_counts_order_2, expected_counts_order_2)
-})
+    # 2. Calculate counts with ego_to_graphlet_counts.
+    actual_counts_order_1 <-
+      ego_to_graphlet_counts(ego_networks_order_1,
+        max_graphlet_size = max_graphlet_size
+      )
+    actual_counts_order_2 <-
+      ego_to_graphlet_counts(ego_networks_order_2,
+        max_graphlet_size = max_graphlet_size
+      )
 
-# context("ORCA interface: Graphlet-based degree distributions")
-# test_that("gdd works", {
-#   graph <- netdist::virusppi$EBV
-#   edges <- graph_to_indexed_edges(graph)
-#   # Caclulate expected outputs (NOTE: relies on orbit_to_graphlet_counts and
-#   # orca_counts_to_graphlet_orbit_degree_distribution methods)
-#   orbit_counts_4 <- orca::count4(edges)
-#   orbit_counts_5 <- orca::count5(edges)
-#   graphlet_counts_4 <- orbit_to_graphlet_counts(orbit_counts_4)
-#   graphlet_counts_5 <- orbit_to_graphlet_counts(orbit_counts_5)
-#   gdd_orbit_4_expected <- orca_counts_to_graphlet_orbit_degree_distribution(orbit_counts_4)
-#   gdd_orbit_5_expected <- orca_counts_to_graphlet_orbit_degree_distribution(orbit_counts_5)
-#   gdd_graphlet_4_expected <- orca_counts_to_graphlet_orbit_degree_distribution(graphlet_counts_4)
-#   gdd_graphlet_5_expected <- orca_counts_to_graphlet_orbit_degree_distribution(graphlet_counts_5)
-#   # Calculate actual outputs from code under test
-#   gdd_orbit_4_actual <- gdd(graph, feature_type = "orbit", max_graphlet_size = 4)
-#   gdd_orbit_5_actual <- gdd(graph, feature_type = "orbit", max_graphlet_size = 5)
-#   gdd_graphlet_4_actual <- gdd(graph, feature_type = "graphlet", max_graphlet_size = 4)
-#   gdd_graphlet_5_actual <- gdd(graph, feature_type = "graphlet", max_graphlet_size = 5)
-#   gdd_default_4_actual <- gdd(graph, max_graphlet_size = 4)
-#   gdd_default_5_actual <- gdd(graph, max_graphlet_size = 5)
-#   gdd_orbit_default_actual <- gdd(graph, feature_type = "orbit")
-#   gdd_graphlet_default_actual <- gdd(graph, feature_type = "graphlet")
-#   gdd_default_default_actual <- gdd(graph)
-#   # Compare actual gdd with expected gdd
-#   expect_equal(gdd_orbit_4_actual, gdd_orbit_4_expected)
-#   expect_equal(gdd_orbit_5_actual, gdd_orbit_5_expected)
-#   expect_equal(gdd_graphlet_4_actual, gdd_graphlet_4_expected)
-#   expect_equal(gdd_graphlet_5_actual, gdd_graphlet_5_expected)
-#   expect_equal(gdd_default_4_actual, gdd_orbit_4_expected)
-#   expect_equal(gdd_default_5_actual, gdd_orbit_5_expected)
-#   expect_equal(gdd_orbit_default_actual, gdd_orbit_4_expected)
-#   expect_equal(gdd_graphlet_default_actual, gdd_graphlet_4_expected)
-#   expect_equal(gdd_default_default_actual, gdd_orbit_4_expected)
-#
-#   # Check gdd throws error for invalid feature type
-#   expect_error(gdd(graph, feature_type = "foo", max_graphlet_size = 4))
-#   expect_error(gdd(graph, feature_type = "foo", max_graphlet_size = 5))
-#   # Check gdd throws error for invalid maximum graphlet size
-#   expect_error(gdd(graph, feature_type = "orbit", max_graphlet_size = 2))
-#   expect_error(gdd(graph, feature_type = "orbit", max_graphlet_size = 3))
-#   expect_error(gdd(graph, feature_type = "orbit", max_graphlet_size = 6))
-#   expect_error(gdd(graph, feature_type = "graphlet", max_graphlet_size = 2))
-#   expect_error(gdd(graph, feature_type = "graphlet", max_graphlet_size = 3))
-#   expect_error(gdd(graph, feature_type = "graphlet", max_graphlet_size = 6))
-#
-# })
-#
-# context("ORCA interface: Ego-network graphlet outputs for manually verified networks")
-# test_that("Ego-network 4-node graphlet counts match manually verified totals
-#           and gdd gives expected discrete histograms",{
-#   # Set up a small sample network with at least one ego-network that contains
-#   # at least one of each graphlets
-#   elist <- rbind(
-#     c("n1","n2"),
-#     c("n2","n3"),
-#     c("n1","n4"),
-#     c("n2","n5"),
-#     c("n1","n6"),
-#     c("n1","n7"),
-#     c("n2","n4"),
-#     c("n4","n6"),
-#     c("n6","n8"),
-#     c("n7","n8"),
-#     c("n7","n9"),
-#     c("n7","n10"),
-#     c("n8","n9"),
-#     c("n8","n10"),
-#     c("n9","n10")
-#   )
-#   graph <- igraph::graph_from_edgelist(elist, directed = FALSE)
-#
-#   # Set node and graphlet labels to use for row and col names in expected counts
-#   node_labels <- igraph::V(graph)$name
-#   graphlet_labels <- c("G0", "G1", "G2", "G3", "G4", "G5", "G6", "G7", "G8")
-#
-#   # Count graphlets in each ego network of the graph with neighbourhood sizes of 1 and 2
-#   max_graphlet_size <- 4
-#   actual_counts_order_1 <-
-#     count_graphlets_ego(graph, max_graphlet_size = max_graphlet_size,
-#                         neighbourhood_size = 1)
-#   actual_counts_order_2 <-
-#     count_graphlets_ego(graph, max_graphlet_size = max_graphlet_size,
-#                         neighbourhood_size = 2)
-#
-#   # Set manually verified ego-network graphlet counts
-#   # 1-step ego networks
-#   expected_counts_order_1 <- rbind(
-#     c(6, 5, 2, 0, 1, 0, 2, 1, 0),
-#     c(5, 5, 1, 0, 2, 0, 2, 0, 0),
-#     c(1, 0, 0, 0, 0, 0, 0, 0, 0),
-#     c(5, 2, 2, 0, 0, 0, 0, 1, 0),
-#     c(1, 0, 0, 0, 0, 0, 0, 0, 0),
-#     c(4, 2, 1, 0, 0, 0, 1, 0, 0),
-#     c(7, 3, 4, 0, 0, 0, 3, 0, 1),
-#     c(7, 3, 4, 0, 0, 0, 3, 0, 1),
-#     c(6, 0, 4, 0, 0, 0, 0, 0, 1),
-#     c(6, 0, 4, 0, 0, 0, 0, 0, 1)
-#   )
-#   rownames(expected_counts_order_1) <- node_labels
-#   colnames(expected_counts_order_1) <- graphlet_labels
-#   # 2-step ego networks
-#   expected_counts_order_2 <- rbind(
-#     c(15, 18, 6, 21, 3, 1, 11, 1, 1),
-#     c( 8, 10, 2,  6, 3, 0,  4, 1, 0),
-#     c( 5,  5, 1,  0, 2, 0,  2, 0, 0),
-#     c(10, 14, 2, 11, 3, 1,  5, 1, 0),
-#     c( 5,  5, 1,  0, 2, 0,  2, 0, 0),
-#     c(13, 13, 6, 15, 1, 1,  9, 1, 1),
-#     c(13, 13, 6, 15, 1, 1,  9, 1, 1),
-#     c(11, 10, 5, 10 ,0 ,1,  8, 0, 1),
-#     c( 9,  8, 4,  4, 0, 1,  6, 0, 1),
-#     c( 9,  8, 4,  4, 0, 1,  6, 0, 1)
-#   )
-#   rownames(expected_counts_order_2) <- node_labels
-#   colnames(expected_counts_order_2) <- graphlet_labels
-#
-#   # Test that actual counts match expected with only counts requested (default)
-#   expect_equal(actual_counts_order_1, expected_counts_order_1)
-#   expect_equal(actual_counts_order_2, expected_counts_order_2)
-#
-#   # Test that actual counts and returned ego networks match expected
-#   # 1. Define expected
-#   expected_ego_networks_order_1 <- make_named_ego_graph(graph, order = 1)
-#   expected_ego_networks_order_2 <- make_named_ego_graph(graph, order = 2)
-#   expected_counts_with_networks_order_1 <-
-#     list(graphlet_counts = expected_counts_order_1,
-#          ego_networks = expected_ego_networks_order_1)
-#   expected_counts_with_networks_order_2 <-
-#     list(graphlet_counts = expected_counts_order_2,
-#          ego_networks = expected_ego_networks_order_2)
-#   # 2. Calculate actual
-#   actual_counts_with_networks_order_1 <-
-#     count_graphlets_ego(graph, max_graphlet_size = max_graphlet_size,
-#                         neighbourhood_size = 1, return_ego_networks = TRUE)
-#   actual_counts_with_networks_order_2 <-
-#     count_graphlets_ego(graph, max_graphlet_size = max_graphlet_size,
-#                         neighbourhood_size = 2, return_ego_networks = TRUE)
-#   # 3. Compare
-#   # Comparison is not implemented for igraph objects, so convert all igraphs to
-#   # indexed edge list and then compare. Do in-situ replacement of igraphs with
-#   # indexed edge lists to ensure we are checking full properties of returned
-#   # objects (i.e. named lists with matching elements).
-#   # 3a. Convert expected and actual ego networks from igraphs to indexed edges
-#   expected_counts_with_networks_order_1$ego_networks <-
-#     purrr::map(expected_counts_with_networks_order_1$ego_networks,
-#               graph_to_indexed_edges)
-#   expected_counts_with_networks_order_2$ego_networks <-
-#     purrr::map(expected_counts_with_networks_order_2$ego_networks,
-#               graph_to_indexed_edges)
-#   actual_counts_with_networks_order_1$ego_networks <-
-#     purrr::map(actual_counts_with_networks_order_1$ego_networks,
-#               graph_to_indexed_edges)
-#   actual_counts_with_networks_order_2$ego_networks <-
-#     purrr::map(actual_counts_with_networks_order_2$ego_networks,
-#               graph_to_indexed_edges)
-#   # 3b. Do comparison
-#   expect_equal(actual_counts_with_networks_order_1,
-#                expected_counts_with_networks_order_1)
-#   expect_equal(actual_counts_with_networks_order_2,
-#                expected_counts_with_networks_order_2)
-#
-#   # Test that gdd method gives the expected graphlet degree distributions
-#   # 1-step ego-networks
-#   actual_gdd_order_1 <- gdd(graph, feature_type = "graphlet",
-#                             max_graphlet_size = 4, ego_neighbourhood_size = 1)
-#   expected_gdd_order_1 <- list(
-#     G0 = dhist(locations = c(1, 4, 5, 6, 7), masses = c(2, 1, 2, 3, 2)),
-#     G1 = dhist(locations = c(0, 2, 3, 5), masses = c(4, 2, 2, 2)),
-#     G2 = dhist(locations = c(0, 1, 2, 4), masses = c(2, 2, 2, 4)),
-#     G3 = dhist(locations = c(0), masses = c(10)),
-#     G4 = dhist(locations = c(0, 1, 2), masses = c(8, 1, 1)),
-#     G5 = dhist(locations = c(0), masses = c(10)),
-#     G6 = dhist(locations = c(0, 1, 2, 3), masses = c(5, 1, 2, 2)),
-#     G7 = dhist(locations = c(0, 1), masses = c(8, 2)),
-#     G8 = dhist(locations = c(0, 1), masses = c(6, 4))
-#   )
-#   expect_equal(actual_gdd_order_1, expected_gdd_order_1)
-#   # 2-step ego-networks
-#   actual_gdd_order_2 <- gdd(graph, feature_type = "graphlet",
-#                             max_graphlet_size = 4, ego_neighbourhood_size = 2)
-#   expected_gdd_order_2 <- list(
-#     G0 = dhist(locations = c(5, 8, 9, 10, 11, 13, 15), masses = c(2, 1, 2, 1, 1, 2, 1)),
-#     G1 = dhist(locations = c(5, 8, 10, 13, 14, 18), masses = c(2, 2, 2, 2, 1, 1)),
-#     G2 = dhist(locations = c(1, 2, 4, 5, 6), masses = c(2, 2, 2, 1, 3)),
-#     G3 = dhist(locations = c(0, 4, 6, 10, 11, 15, 21), masses = c(2, 2, 1, 1, 1, 2, 1)),
-#     G4 = dhist(locations = c(0, 1, 2, 3), masses = c(3, 2, 2, 3)),
-#     G5 = dhist(locations = c(0, 1), masses = c(3, 7)),
-#     G6 = dhist(locations = c(2, 4, 5, 6, 8, 9, 11), masses = c(2, 1, 1, 2, 1, 2, 1)),
-#     G7 = dhist(locations = c(0, 1), masses = c(5, 5)),
-#     G8 = dhist(locations = c(0, 1), masses = c(4, 6))
-#   )
-#   expect_equal(actual_gdd_order_2, expected_gdd_order_2)
-#
-#   # Check gdd throws error for invalid feature type
-#   expect_error(gdd(graph, feature_type = "foo", max_graphlet_size = 4,
-#                    ego_neighbourhood_size = 0))
-#   expect_error(gdd(graph, feature_type = "foo", max_graphlet_size = 4,
-#                    ego_neighbourhood_size = 1))
-#   expect_error(gdd(graph, feature_type = "foo", max_graphlet_size = 5,
-#                    ego_neighbourhood_size = 0))
-#   expect_error(gdd(graph, feature_type = "foo", max_graphlet_size = 5,
-#                    ego_neighbourhood_size = 1))
-#   # We don't support orbit feature type for ego networks (i.e. neighbourhood > 0)
-#   expect_error(gdd(graph, feature_type = "orbit", max_graphlet_size = 4,
-#                    ego_neighbourhood_size = 1))
-#   expect_error(gdd(graph, feature_type = "orbit", max_graphlet_size = 5,
-#                    ego_neighbourhood_size = 1))
-#   # Check gdd throws error for invalid maximum graphlet size
-#   expect_error(gdd(graph, feature_type = "graphlet", max_graphlet_size = 2,
-#                    ego_neighbourhood_size = 0))
-#   expect_error(gdd(graph, feature_type = "graphlet", max_graphlet_size = 2,
-#                    ego_neighbourhood_size = 1))
-#   expect_error(gdd(graph, feature_type = "graphlet", max_graphlet_size = 3,
-#                    ego_neighbourhood_size = 0))
-#   expect_error(gdd(graph, feature_type = "graphlet", max_graphlet_size = 3,
-#                    ego_neighbourhood_size = 1))
-#   expect_error(gdd(graph, feature_type = "graphlet", max_graphlet_size = 6,
-#                    ego_neighbourhood_size = 0))
-#   expect_error(gdd(graph, feature_type = "graphlet", max_graphlet_size = 6,
-#                    ego_neighbourhood_size = 1))
-# })
-#
-# context("ORCA interface: GDD for all graphs in a directory")
-# test_that("gdd_for_all_graphs works", {
-#   # Set source directory and file properties for Virus PPI graph edge files
-#   source_dir <- system.file(file.path("extdata", "VRPINS"), package = "netdist")
-#   edge_format = "ncol"
-#   file_pattern = ".txt"
-#
-#   # Set number of threads to use at once for parallel processing.
-#   num_threads = getOption("mc.cores", 2L)
-#
-#   # Use previously tested gdd code to calculate expected gdds
-#   expected_gdd_fn <- function(feature_type, max_graphlet_size, ego_neighbourhood_size) {
-#     gdds <- list(
-#       gdd(virusppi$EBV, feature_type, max_graphlet_size, ego_neighbourhood_size),
-#       gdd(virusppi$ECL, feature_type, max_graphlet_size, ego_neighbourhood_size),
-#       gdd(virusppi$HSV, feature_type, max_graphlet_size, ego_neighbourhood_size),
-#       gdd(virusppi$KSHV, feature_type, max_graphlet_size, ego_neighbourhood_size),
-#       gdd(virusppi$VZV, feature_type, max_graphlet_size, ego_neighbourhood_size)
-#     )
-#     names(gdds) <- c("EBV", "ECL", "HSV-1", "KSHV", "VZV")
-#     gdds
-#   }
-#
-#   # Use code under test to generate actual gdds
-#   actual_gdd_fn <- function (feature_type, max_graphlet_size, ego_neighbourhood_size) {
-#     gdd_for_all_graphs(source_dir = source_dir, format = edge_format,
-#                        pattern = file_pattern, feature_type = feature_type,
-#                        max_graphlet_size = max_graphlet_size,
-#                        ego_neighbourhood_size = ego_neighbourhood_size,
-#                        mc.cores = num_threads)
-#   }
-#   # Helper function to make comparison code clearer
-#   compare_fn <- function(feature_type, max_graphlet_size, ego_neighbourhood_size) {
-#     actual_gdds <- actual_gdd_fn(feature_type, max_graphlet_size, ego_neighbourhood_size)
-#     expected_gdds <- expected_gdd_fn(feature_type, max_graphlet_size, ego_neighbourhood_size)
-#     expect_equal(actual_gdds, expected_gdds)
-#   }
-#   # Map over test parameters, comparing actual gdds to expected
-#   # No ego-networks
-#   compare_fn(feature_type = "orbit", max_graphlet_size = 4, ego_neighbourhood_size = 0)
-#   compare_fn(feature_type = "orbit", max_graphlet_size = 5, ego_neighbourhood_size = 0)
-#   compare_fn(feature_type = "graphlet", max_graphlet_size = 4, ego_neighbourhood_size = 0)
-#   compare_fn(feature_type = "graphlet", max_graphlet_size = 5, ego_neighbourhood_size = 0)
-#   # Ego networks of order 1
-#   compare_fn(feature_type = "graphlet", max_graphlet_size = 4, ego_neighbourhood_size = 1)
-#   compare_fn(feature_type = "graphlet", max_graphlet_size = 5, ego_neighbourhood_size = 1)
-#   # Ego networks of order 2
-#   compare_fn(feature_type = "graphlet", max_graphlet_size = 4, ego_neighbourhood_size = 2)
-#   compare_fn(feature_type = "graphlet", max_graphlet_size = 5, ego_neighbourhood_size = 2)
-# })
+    # 3. Test that actual counts match expected
+    expect_equal(actual_counts_order_1, expected_counts_order_1)
+    expect_equal(actual_counts_order_2, expected_counts_order_2)
+  }
+)
diff --git a/tests/testthat/test_utility_functions.R b/tests/testthat/test_utility_functions.R
index 091e02af..370e6e8b 100755
--- a/tests/testthat/test_utility_functions.R
+++ b/tests/testthat/test_utility_functions.R
@@ -2,63 +2,171 @@ context("Utility Functions")
 
 test_that("rotl_vec rotates left by specified number of places", {
   test_vec <- c(10, 20, 30, 40, 50, 60, 70, 80, 90, 100)
-  expect_equal(rotl_vec(test_vec, -13), c(80, 90, 100, 10, 20, 30, 40, 50, 60, 70))
-  expect_equal(rotl_vec(test_vec, -12), c(90, 100, 10, 20, 30, 40, 50, 60, 70, 80))
-  expect_equal(rotl_vec(test_vec, -11), c(100, 10, 20, 30, 40, 50, 60, 70, 80, 90))
-  expect_equal(rotl_vec(test_vec, -10), c(10, 20, 30, 40, 50, 60, 70, 80, 90, 100))
-  expect_equal(rotl_vec(test_vec, -9), c(20, 30, 40, 50, 60, 70, 80, 90, 100, 10))
-  expect_equal(rotl_vec(test_vec, -8), c(30, 40, 50, 60, 70, 80, 90, 100, 10, 20))
-  expect_equal(rotl_vec(test_vec, -7), c(40, 50, 60, 70, 80, 90, 100, 10, 20, 30))
-  expect_equal(rotl_vec(test_vec, -6), c(50, 60, 70, 80, 90, 100, 10, 20, 30, 40))
-  expect_equal(rotl_vec(test_vec, -5), c(60, 70, 80, 90, 100, 10, 20, 30, 40, 50))
-  expect_equal(rotl_vec(test_vec, -4), c(70, 80, 90, 100, 10, 20, 30, 40, 50, 60))
-  expect_equal(rotl_vec(test_vec, -3), c(80, 90, 100, 10, 20, 30, 40, 50, 60, 70))
-  expect_equal(rotl_vec(test_vec, -2), c(90, 100, 10, 20, 30, 40, 50, 60, 70, 80))
-  expect_equal(rotl_vec(test_vec, -1), c(100, 10, 20, 30, 40, 50, 60, 70, 80, 90))
-  expect_equal(rotl_vec(test_vec, 0), c(10, 20, 30, 40, 50, 60, 70, 80, 90, 100))
-  expect_equal(rotl_vec(test_vec, 1), c(20, 30, 40, 50, 60, 70, 80, 90, 100, 10))
-  expect_equal(rotl_vec(test_vec, 2), c(30, 40, 50, 60, 70, 80, 90, 100, 10, 20))
-  expect_equal(rotl_vec(test_vec, 3), c(40, 50, 60, 70, 80, 90, 100, 10, 20, 30))
-  expect_equal(rotl_vec(test_vec, 4), c(50, 60, 70, 80, 90, 100, 10, 20, 30, 40))
-  expect_equal(rotl_vec(test_vec, 5), c(60, 70, 80, 90, 100, 10, 20, 30, 40, 50))
-  expect_equal(rotl_vec(test_vec, 6), c(70, 80, 90, 100, 10, 20, 30, 40, 50, 60))
-  expect_equal(rotl_vec(test_vec, 7), c(80, 90, 100, 10, 20, 30, 40, 50, 60, 70))
-  expect_equal(rotl_vec(test_vec, 8), c(90, 100, 10, 20, 30, 40, 50, 60, 70, 80))
-  expect_equal(rotl_vec(test_vec, 9), c(100, 10, 20, 30, 40, 50, 60, 70, 80, 90))
-  expect_equal(rotl_vec(test_vec, 10), c(10, 20, 30, 40, 50, 60, 70, 80, 90, 100))
-  expect_equal(rotl_vec(test_vec, 11), c(20, 30, 40, 50, 60, 70, 80, 90, 100, 10))
-  expect_equal(rotl_vec(test_vec, 12), c(30, 40, 50, 60, 70, 80, 90, 100, 10, 20))
-  expect_equal(rotl_vec(test_vec, 13), c(40, 50, 60, 70, 80, 90, 100, 10, 20, 30))
+  expect_equal(
+    rotl_vec(test_vec, -13), c(80, 90, 100, 10, 20, 30, 40, 50, 60, 70)
+  )
+  expect_equal(rotl_vec(
+    test_vec, -12
+  ), c(90, 100, 10, 20, 30, 40, 50, 60, 70, 80))
+  expect_equal(
+    rotl_vec(test_vec, -11), c(100, 10, 20, 30, 40, 50, 60, 70, 80, 90)
+  )
+  expect_equal(
+    rotl_vec(test_vec, -10), c(10, 20, 30, 40, 50, 60, 70, 80, 90, 100)
+  )
+  expect_equal(
+    rotl_vec(test_vec, -9), c(20, 30, 40, 50, 60, 70, 80, 90, 100, 10)
+  )
+  expect_equal(
+    rotl_vec(test_vec, -8), c(30, 40, 50, 60, 70, 80, 90, 100, 10, 20)
+  )
+  expect_equal(
+    rotl_vec(test_vec, -7), c(40, 50, 60, 70, 80, 90, 100, 10, 20, 30)
+  )
+  expect_equal(
+    rotl_vec(test_vec, -6), c(50, 60, 70, 80, 90, 100, 10, 20, 30, 40)
+  )
+  expect_equal(
+    rotl_vec(test_vec, -5), c(60, 70, 80, 90, 100, 10, 20, 30, 40, 50)
+  )
+  expect_equal(
+    rotl_vec(test_vec, -4), c(70, 80, 90, 100, 10, 20, 30, 40, 50, 60)
+  )
+  expect_equal(
+    rotl_vec(test_vec, -3), c(80, 90, 100, 10, 20, 30, 40, 50, 60, 70)
+  )
+  expect_equal(
+    rotl_vec(test_vec, -2), c(90, 100, 10, 20, 30, 40, 50, 60, 70, 80)
+  )
+  expect_equal(
+    rotl_vec(test_vec, -1), c(100, 10, 20, 30, 40, 50, 60, 70, 80, 90)
+  )
+  expect_equal(
+    rotl_vec(test_vec, 0), c(10, 20, 30, 40, 50, 60, 70, 80, 90, 100)
+  )
+  expect_equal(
+    rotl_vec(test_vec, 1), c(20, 30, 40, 50, 60, 70, 80, 90, 100, 10)
+  )
+  expect_equal(
+    rotl_vec(test_vec, 2), c(30, 40, 50, 60, 70, 80, 90, 100, 10, 20)
+  )
+  expect_equal(
+    rotl_vec(test_vec, 3), c(40, 50, 60, 70, 80, 90, 100, 10, 20, 30)
+  )
+  expect_equal(
+    rotl_vec(test_vec, 4), c(50, 60, 70, 80, 90, 100, 10, 20, 30, 40)
+  )
+  expect_equal(
+    rotl_vec(test_vec, 5), c(60, 70, 80, 90, 100, 10, 20, 30, 40, 50)
+  )
+  expect_equal(
+    rotl_vec(test_vec, 6), c(70, 80, 90, 100, 10, 20, 30, 40, 50, 60)
+  )
+  expect_equal(
+    rotl_vec(test_vec, 7), c(80, 90, 100, 10, 20, 30, 40, 50, 60, 70)
+  )
+  expect_equal(
+    rotl_vec(test_vec, 8), c(90, 100, 10, 20, 30, 40, 50, 60, 70, 80)
+  )
+  expect_equal(
+    rotl_vec(test_vec, 9), c(100, 10, 20, 30, 40, 50, 60, 70, 80, 90)
+  )
+  expect_equal(
+    rotl_vec(test_vec, 10), c(10, 20, 30, 40, 50, 60, 70, 80, 90, 100)
+  )
+  expect_equal(
+    rotl_vec(test_vec, 11), c(20, 30, 40, 50, 60, 70, 80, 90, 100, 10)
+  )
+  expect_equal(
+    rotl_vec(test_vec, 12), c(30, 40, 50, 60, 70, 80, 90, 100, 10, 20)
+  )
+  expect_equal(
+    rotl_vec(test_vec, 13), c(40, 50, 60, 70, 80, 90, 100, 10, 20, 30)
+  )
 })
 
 
 test_that("rotr_vec rotates right by specified number of places", {
   test_vec <- c(10, 20, 30, 40, 50, 60, 70, 80, 90, 100)
-  expect_equal(rotr_vec(test_vec, 13), c(80, 90, 100, 10, 20, 30, 40, 50, 60, 70))
-  expect_equal(rotr_vec(test_vec, 12), c(90, 100, 10, 20, 30, 40, 50, 60, 70, 80))
-  expect_equal(rotr_vec(test_vec, 11), c(100, 10, 20, 30, 40, 50, 60, 70, 80, 90))
-  expect_equal(rotr_vec(test_vec, 10), c(10, 20, 30, 40, 50, 60, 70, 80, 90, 100))
-  expect_equal(rotr_vec(test_vec, 9), c(20, 30, 40, 50, 60, 70, 80, 90, 100, 10))
-  expect_equal(rotr_vec(test_vec, 8), c(30, 40, 50, 60, 70, 80, 90, 100, 10, 20))
-  expect_equal(rotr_vec(test_vec, 7), c(40, 50, 60, 70, 80, 90, 100, 10, 20, 30))
-  expect_equal(rotr_vec(test_vec, 6), c(50, 60, 70, 80, 90, 100, 10, 20, 30, 40))
-  expect_equal(rotr_vec(test_vec, 5), c(60, 70, 80, 90, 100, 10, 20, 30, 40, 50))
-  expect_equal(rotr_vec(test_vec, 4), c(70, 80, 90, 100, 10, 20, 30, 40, 50, 60))
-  expect_equal(rotr_vec(test_vec, 3), c(80, 90, 100, 10, 20, 30, 40, 50, 60, 70))
-  expect_equal(rotr_vec(test_vec, 2), c(90, 100, 10, 20, 30, 40, 50, 60, 70, 80))
-  expect_equal(rotr_vec(test_vec, 1), c(100, 10, 20, 30, 40, 50, 60, 70, 80, 90))
-  expect_equal(rotr_vec(test_vec, 0), c(10, 20, 30, 40, 50, 60, 70, 80, 90, 100))
-  expect_equal(rotr_vec(test_vec, -1), c(20, 30, 40, 50, 60, 70, 80, 90, 100, 10))
-  expect_equal(rotr_vec(test_vec, -2), c(30, 40, 50, 60, 70, 80, 90, 100, 10, 20))
-  expect_equal(rotr_vec(test_vec, -3), c(40, 50, 60, 70, 80, 90, 100, 10, 20, 30))
-  expect_equal(rotr_vec(test_vec, -4), c(50, 60, 70, 80, 90, 100, 10, 20, 30, 40))
-  expect_equal(rotr_vec(test_vec, -5), c(60, 70, 80, 90, 100, 10, 20, 30, 40, 50))
-  expect_equal(rotr_vec(test_vec, -6), c(70, 80, 90, 100, 10, 20, 30, 40, 50, 60))
-  expect_equal(rotr_vec(test_vec, -7), c(80, 90, 100, 10, 20, 30, 40, 50, 60, 70))
-  expect_equal(rotr_vec(test_vec, -8), c(90, 100, 10, 20, 30, 40, 50, 60, 70, 80))
-  expect_equal(rotr_vec(test_vec, -9), c(100, 10, 20, 30, 40, 50, 60, 70, 80, 90))
-  expect_equal(rotr_vec(test_vec, -10), c(10, 20, 30, 40, 50, 60, 70, 80, 90, 100))
-  expect_equal(rotr_vec(test_vec, -11), c(20, 30, 40, 50, 60, 70, 80, 90, 100, 10))
-  expect_equal(rotr_vec(test_vec, -12), c(30, 40, 50, 60, 70, 80, 90, 100, 10, 20))
-  expect_equal(rotr_vec(test_vec, -13), c(40, 50, 60, 70, 80, 90, 100, 10, 20, 30))
+  expect_equal(
+    rotr_vec(test_vec, 13), c(80, 90, 100, 10, 20, 30, 40, 50, 60, 70)
+  )
+  expect_equal(
+    rotr_vec(test_vec, 12), c(90, 100, 10, 20, 30, 40, 50, 60, 70, 80)
+  )
+  expect_equal(
+    rotr_vec(test_vec, 11), c(100, 10, 20, 30, 40, 50, 60, 70, 80, 90)
+  )
+  expect_equal(
+    rotr_vec(test_vec, 10), c(10, 20, 30, 40, 50, 60, 70, 80, 90, 100)
+  )
+  expect_equal(
+    rotr_vec(test_vec, 9), c(20, 30, 40, 50, 60, 70, 80, 90, 100, 10)
+  )
+  expect_equal(
+    rotr_vec(test_vec, 8), c(30, 40, 50, 60, 70, 80, 90, 100, 10, 20)
+  )
+  expect_equal(
+    rotr_vec(test_vec, 7), c(40, 50, 60, 70, 80, 90, 100, 10, 20, 30)
+  )
+  expect_equal(
+    rotr_vec(test_vec, 6), c(50, 60, 70, 80, 90, 100, 10, 20, 30, 40)
+  )
+  expect_equal(
+    rotr_vec(test_vec, 5), c(60, 70, 80, 90, 100, 10, 20, 30, 40, 50)
+  )
+  expect_equal(
+    rotr_vec(test_vec, 4), c(70, 80, 90, 100, 10, 20, 30, 40, 50, 60)
+  )
+  expect_equal(
+    rotr_vec(test_vec, 3), c(80, 90, 100, 10, 20, 30, 40, 50, 60, 70)
+  )
+  expect_equal(
+    rotr_vec(test_vec, 2), c(90, 100, 10, 20, 30, 40, 50, 60, 70, 80)
+  )
+  expect_equal(
+    rotr_vec(test_vec, 1), c(100, 10, 20, 30, 40, 50, 60, 70, 80, 90)
+  )
+  expect_equal(
+    rotr_vec(test_vec, 0), c(10, 20, 30, 40, 50, 60, 70, 80, 90, 100)
+  )
+  expect_equal(
+    rotr_vec(test_vec, -1), c(20, 30, 40, 50, 60, 70, 80, 90, 100, 10)
+  )
+  expect_equal(
+    rotr_vec(test_vec, -2), c(30, 40, 50, 60, 70, 80, 90, 100, 10, 20)
+  )
+  expect_equal(
+    rotr_vec(test_vec, -3), c(40, 50, 60, 70, 80, 90, 100, 10, 20, 30)
+  )
+  expect_equal(
+    rotr_vec(test_vec, -4), c(50, 60, 70, 80, 90, 100, 10, 20, 30, 40)
+  )
+  expect_equal(
+    rotr_vec(test_vec, -5), c(60, 70, 80, 90, 100, 10, 20, 30, 40, 50)
+  )
+  expect_equal(
+    rotr_vec(test_vec, -6), c(70, 80, 90, 100, 10, 20, 30, 40, 50, 60)
+  )
+  expect_equal(
+    rotr_vec(test_vec, -7), c(80, 90, 100, 10, 20, 30, 40, 50, 60, 70)
+  )
+  expect_equal(
+    rotr_vec(test_vec, -8), c(90, 100, 10, 20, 30, 40, 50, 60, 70, 80)
+  )
+  expect_equal(
+    rotr_vec(test_vec, -9), c(100, 10, 20, 30, 40, 50, 60, 70, 80, 90)
+  )
+  expect_equal(
+    rotr_vec(test_vec, -10), c(10, 20, 30, 40, 50, 60, 70, 80, 90, 100)
+  )
+  expect_equal(
+    rotr_vec(test_vec, -11), c(20, 30, 40, 50, 60, 70, 80, 90, 100, 10)
+  )
+  expect_equal(
+    rotr_vec(test_vec, -12), c(30, 40, 50, 60, 70, 80, 90, 100, 10, 20)
+  )
+  expect_equal(
+    rotr_vec(test_vec, -13), c(40, 50, 60, 70, 80, 90, 100, 10, 20, 30)
+  )
 })

From 9c0a716391e02f75990914fcd25ddda8f4880f33 Mon Sep 17 00:00:00 2001
From: jack89roberts <jack89roberts@users.noreply.github.com>
Date: Tue, 7 Jun 2022 11:20:22 +0000
Subject: [PATCH 64/84] Update documentation

---
 man/count_graphlets_ego.Rd          |   5 +-
 man/emd_cs.Rd                       |   6 +-
 man/gdd.Rd                          |   8 +-
 man/gdd_for_all_graphs.Rd           |  10 +--
 man/graph_features_to_histograms.Rd |  10 ++-
 man/graphlet_key.Rd                 |   3 +-
 man/min_emd.Rd                      |   4 +-
 man/min_emd_exhaustive.Rd           |   8 +-
 man/netdis.plot.Rd                  |  17 +++--
 man/netemd.plot.Rd                  |  17 +++--
 man/netemd_many_to_many.Rd          |  29 +++++---
 man/netemd_one_to_one.Rd            | 110 +++++++++++++++++++++-------
 man/netemd_single_pair.Rd           |  29 +++++---
 man/orbit_key.Rd                    |   3 +-
 man/read_simple_graphs.Rd           |  13 +++-
 15 files changed, 186 insertions(+), 86 deletions(-)

diff --git a/man/count_graphlets_ego.Rd b/man/count_graphlets_ego.Rd
index 5ae422c8..35afac8d 100644
--- a/man/count_graphlets_ego.Rd
+++ b/man/count_graphlets_ego.Rd
@@ -20,8 +20,9 @@ count_graphlets_ego(
 Only graphlets containing up to \code{max_graphlet_size} nodes will be
 counted. Currently only size 4 (default) and 5 are supported.}
 
-\item{neighbourhood_size}{The number of steps from the source node used to select the
-neighboring nodes to be included in the source node ego-network. (Default 2).}
+\item{neighbourhood_size}{The number of steps from the source node used to
+select the neighboring nodes to be included in the source node ego-network.
+(Default 2).}
 
 \item{min_ego_nodes}{Only ego networks with at least \code{min_ego_nodes}
 nodes are returned. (Default 3).}
diff --git a/man/emd_cs.Rd b/man/emd_cs.Rd
index 1e580760..99a45be7 100755
--- a/man/emd_cs.Rd
+++ b/man/emd_cs.Rd
@@ -20,7 +20,7 @@ Distance between the two histograms by summing the absolute difference
 between the two cumulative histograms.
 }
 \references{
-Calculation of the Wasserstein Distance Between Probability Distributions on the Line
-S. S. Vallender, Theory of Probability & Its Applications 1974 18:4, 784-786
-\url{http://dx.doi.org/10.1137/1118101}
+Calculation of the Wasserstein Distance Between Probability Distributions on
+the Line S. S. Vallender, Theory of Probability & Its Applications 1974 18:4,
+784-786 \url{http://dx.doi.org/10.1137/1118101}
 }
diff --git a/man/gdd.Rd b/man/gdd.Rd
index 7444bbe7..3102d4b3 100644
--- a/man/gdd.Rd
+++ b/man/gdd.Rd
@@ -22,7 +22,8 @@ the number of graphlet orbits each node participates in.}
 Only graphlets containing up to \code{max_graphlet_size} nodes will be
 counted. Currently only size 4 and 5 are supported.}
 
-\item{ego_neighbourhood_size}{The number of steps from the source node used to select the
+\item{ego_neighbourhood_size}{The number of steps from the source node used
+to select the
 neighboring nodes to be included in the source node ego-network.}
 }
 \value{
@@ -30,6 +31,7 @@ List of graphlet-based degree distributions, with each distribution
 represented as a \code{dhist} discrete histogram object.
 }
 \description{
-Short-cut function to create graphlet-based degree distributions from \code{igraph} graph object
-using the ORCA fast graphlet orbit counting package.
+Short-cut function to create graphlet-based degree distributions from
+\code{igraph} graph object using the ORCA fast graphlet orbit counting
+package.
 }
diff --git a/man/gdd_for_all_graphs.Rd b/man/gdd_for_all_graphs.Rd
index 7e6956bf..c9703631 100644
--- a/man/gdd_for_all_graphs.Rd
+++ b/man/gdd_for_all_graphs.Rd
@@ -25,12 +25,12 @@ gdd_for_all_graphs(
 \item{feature_type}{Type of graphlet-based degree distributions. Can be
 \code{graphlet} to count graphlets or \code{orbit} to count orbits.}
 
-\item{max_graphlet_size}{Maximum size of graphlets to use when generating GDD.
-Currently only size 4 and 5 are supported.}
+\item{max_graphlet_size}{Maximum size of graphlets to use when generating
+GDD. Currently only size 4 and 5 are supported.}
 
-\item{ego_neighbourhood_size}{The number of steps from the source node used to select the
-neighboring nodes to be included in the source node ego-network. If set to 0, ego-networks will not be
-used.}
+\item{ego_neighbourhood_size}{The number of steps from the source node used
+to select the neighboring nodes to be included in the source node
+ego-network. If set to 0, ego-networks will not be used.}
 
 \item{mc.cores}{Number of cores to use for parallel processing. Defaults to
 the \code{mc.cores} option set in the R environment.}
diff --git a/man/graph_features_to_histograms.Rd b/man/graph_features_to_histograms.Rd
index ae526218..816a847d 100644
--- a/man/graph_features_to_histograms.Rd
+++ b/man/graph_features_to_histograms.Rd
@@ -8,14 +8,16 @@ each feature.}
 graph_features_to_histograms(features_matrix)
 }
 \arguments{
-\item{features_matrix}{A matrix whose rows represent nodes and whose columns represent different node level features. This means that entry ij provides the value of feature j for node i.}
+\item{features_matrix}{A matrix whose rows represent nodes and whose columns
+represent different node level features. This means that entry ij provides
+the value of feature j for node i.}
 }
 \value{
-Feature histograms: List of "discrete histograms" for each
-feature
+Feature histograms: List of "discrete histograms" for each feature
 }
 \description{
 Converts a matrix of node level features (e.g. for example counts
 of multiple graphlets or orbits at each node) to
-a set of histogram like objects (observed frequency distribution of each feature/column)
+a set of histogram like objects (observed frequency distribution of each
+feature/column)
 }
diff --git a/man/graphlet_key.Rd b/man/graphlet_key.Rd
index 23f2289b..b63c9315 100644
--- a/man/graphlet_key.Rd
+++ b/man/graphlet_key.Rd
@@ -7,7 +7,8 @@
 graphlet_key(max_graphlet_size)
 }
 \arguments{
-\item{max_graphlet_size}{Maximum number of nodes graphlets can contain. Currently only size 2 to 5 are supported.}
+\item{max_graphlet_size}{Maximum number of nodes graphlets can contain.
+Currently only size 2 to 5 are supported.}
 }
 \value{
 Metadata list with the following named fields:
diff --git a/man/min_emd.Rd b/man/min_emd.Rd
index 2799f48a..08cf27f7 100644
--- a/man/min_emd.Rd
+++ b/man/min_emd.Rd
@@ -15,8 +15,8 @@ min_emd(dhist1, dhist2, method = "optimise")
 offsets for each pair of histograms. Default is "optimise" to use
 R's built-in \code{stats::optimise} method to efficiently find the offset
 with the minimal EMD. However, this is not guaranteed to find the global
-minimum if multiple local minima EMDs exist. You can alternatively specify the
-"exhaustive" method, which will exhaustively evaluate the EMD between the
+minimum if multiple local minima EMDs exist. You can alternatively specify
+the "exhaustive" method, which will exhaustively evaluate the EMD between the
 histograms at all offsets that are candidates for the minimal EMD.}
 }
 \value{
diff --git a/man/min_emd_exhaustive.Rd b/man/min_emd_exhaustive.Rd
index cd02830b..91b46ce3 100644
--- a/man/min_emd_exhaustive.Rd
+++ b/man/min_emd_exhaustive.Rd
@@ -25,8 +25,8 @@ to calculate the EMD at all offsets where any knots from the two ECMFs align
 to ensure that the offset with the global minimum EMD is found.
 
 This is because of the piece-wise linear nature of the two ECMFs. Between any
-two offsets where knots from the two ECMFs align, EMD will be either constant,
-or uniformly increasing or decreasing. Therefore, there the EMD between two
-sets of aligned knots cannot be smaller than the EMD at one or other of the
-bounding offsets.
+two offsets where knots from the two ECMFs align, EMD will be either
+constant, or uniformly increasing or decreasing. Therefore, there the EMD
+between two sets of aligned knots cannot be smaller than the EMD at one or
+other of the bounding offsets.
 }
diff --git a/man/netdis.plot.Rd b/man/netdis.plot.Rd
index 44cc33ed..c5bb8bd6 100644
--- a/man/netdis.plot.Rd
+++ b/man/netdis.plot.Rd
@@ -15,17 +15,24 @@ netdis.plot(
 \arguments{
 \item{netdislist}{Default output of \code{netdis_many_to_many}.}
 
-\item{whatrow}{Selection of the row in \code{netdis_many_to_many$comp_spec} to be used for plotting.}
+\item{whatrow}{Selection of the row in \code{netdis_many_to_many$comp_spec}
+to be used for plotting.}
 
-\item{clustering_method}{Clustering method as allowed in the \code{pheatmap} function from the \code{pheatmap} package. The dendrogram will appear if \code{docluster} is TRUE (default).}
+\item{clustering_method}{Clustering method as allowed in the \code{pheatmap}
+function from the \code{pheatmap} package. The dendrogram will appear if
+\code{docluster} is TRUE (default).}
 
 \item{main}{Title of the plot.}
 
-\item{docluster}{controls the order of the rows and columns. If TRUE (default) the rows and columns will be reordered to create the dendrogram. If FALSE, then only the heatmap is drawn.}
+\item{docluster}{controls the order of the rows and columns. If TRUE
+(default) the rows and columns will be reordered to create the dendrogram. If
+FALSE, then only the heatmap is drawn.}
 }
 \value{
-Provides a heatmap and dendrogram for the network comparisons via \code{pheatmap}.
+Provides a heatmap and dendrogram for the network comparisons via
+\code{pheatmap}.
 }
 \description{
-Provides a heatmap and dendrogram for the network comparisons via \code{pheatmap}.
+Provides a heatmap and dendrogram for the network comparisons via
+\code{pheatmap}.
 }
diff --git a/man/netemd.plot.Rd b/man/netemd.plot.Rd
index 269e8010..91177187 100644
--- a/man/netemd.plot.Rd
+++ b/man/netemd.plot.Rd
@@ -12,19 +12,26 @@ netemd.plot(
 )
 }
 \arguments{
-\item{clustering_method}{Clustering method as allowed in the \code{pheatmap} function from the \code{pheatmap} package. The dendrogram will appear if \code{docluster} is TRUE (default).}
+\item{clustering_method}{Clustering method as allowed in the \code{pheatmap}
+function from the \code{pheatmap} package. The dendrogram will appear if
+\code{docluster} is TRUE (default).}
 
 \item{main}{Title of the plot.}
 
-\item{docluster}{controls the order of the rows and columns. If TRUE (default) the rows and columns will be reordered to create the dendrogram. If FALSE, then only the heatmap is drawn.}
+\item{docluster}{controls the order of the rows and columns. If TRUE
+(default) the rows and columns will be reordered to create the dendrogram. If
+FALSE, then only the heatmap is drawn.}
 
 \item{netdislist}{Default output of \code{netdis_many_to_many}.}
 
-\item{whatrow}{Selection of the row in \code{netdis_many_to_many$comp_spec} to be used for plotting.}
+\item{whatrow}{Selection of the row in \code{netdis_many_to_many$comp_spec}
+to be used for plotting.}
 }
 \value{
-Provides a heat map and dendrogram for the network comparisons via \code{pheatmap}.
+Provides a heat map and dendrogram for the network comparisons via
+\code{pheatmap}.
 }
 \description{
-Provides a heatmap and dendrogram for the network comparisons via \code{pheatmap}.
+Provides a heatmap and dendrogram for the network comparisons via
+\code{pheatmap}.
 }
diff --git a/man/netemd_many_to_many.Rd b/man/netemd_many_to_many.Rd
index 0e7e8da9..0ce1f608 100644
--- a/man/netemd_many_to_many.Rd
+++ b/man/netemd_many_to_many.Rd
@@ -18,16 +18,22 @@ netemd_many_to_many(
 )
 }
 \arguments{
-\item{graphs}{A list of network/graph objects from the \code{igraph} package. \code{graphs} can be set to \code{NULL} (default) if \code{dhists} is provided.}
+\item{graphs}{A list of network/graph objects from the \code{igraph} package.
+\code{graphs} can be set to \code{NULL} (default) if \code{dhists} is
+provided.}
 
-\item{dhists}{A list whose elements contain either: A  list of \code{dhist} discrete histogram objects for each graph, or a list  a matrix of network features (each column representing a feature). \code{dhists} can be set to \code{NULL} (default) if \code{graphs} is provided.  A \code{dhist} object can be obtained from \code{graph_features_to_histograms}.}
+\item{dhists}{A list whose elements contain either: A  list of \code{dhist}
+discrete histogram objects for each graph, or a list  a matrix of network
+features (each column representing a feature). \code{dhists} can be set to
+\code{NULL} (default) if \code{graphs} is provided.  A \code{dhist} object
+can be obtained from \code{graph_features_to_histograms}.}
 
 \item{method}{The method to use to find the minimum EMD across all potential
 offsets for each pair of histograms. Default is "optimise" to use
 R's built-in \code{stats::optimise} method to efficiently find the offset
 with the minimal EMD. However, this is not guaranteed to find the global
-minimum if multiple local minima EMDs exist. You can alternatively specify the
-"exhaustive" method, which will exhaustively evaluate the EMD between the
+minimum if multiple local minima EMDs exist. You can alternatively specify
+the "exhaustive" method, which will exhaustively evaluate the EMD between the
 histograms at all offsets that are candidates for the minimal EMD.}
 
 \item{smoothing_window_width}{Width of "top-hat" smoothing window to apply to
@@ -43,15 +49,16 @@ minimal EMDs and associated offsets for all pairs of histograms}
 the \code{mc.cores} option set in the R environment.}
 
 \item{feature_type}{Type of graphlet-based feature to count: "graphlet"
-counts the number of graphlets each node participates in; "orbit" (default) calculates
-the number of graphlet orbits each node participates in.}
+counts the number of graphlets each node participates in; "orbit" (default)
+calculates the number of graphlet orbits each node participates in.}
 
 \item{max_graphlet_size}{Determines the maximum size of graphlets to count.
 Only graphlets containing up to \code{max_graphlet_size} nodes will be
 counted. Possible values are 4, and 5 (default).}
 
 \item{ego_neighbourhood_size}{The number of steps from the source node to
-include nodes for each ego-network. NetEmd was proposed for individual nodes alone, hence the default value is 0.}
+include nodes for each ego-network. NetEmd was proposed for individual nodes
+alone, hence the default value is 0.}
 }
 \value{
 NetEMD measures between all pairs of graphs for which features
@@ -60,10 +67,10 @@ parameter. If set to FALSE, a list is returned with the following named
 elements:\code{net_emd}: a vector of NetEMDs for each pair of graphs,
 \code{comp_spec}: a comparison specification table containing the graph names
 and indices within the input GDD list for each pair of graphs compared.
-If \code{return_details} is set to FALSE, the list also contains the following
-matrices for each graph pair: \code{min_emds}: the minimal EMD for each GDD
-used to compute the NetEMD, \code{min_offsets}: the associated offsets giving
-the minimal EMD for each GDD
+If \code{return_details} is set to FALSE, the list also contains the
+following matrices for each graph pair: \code{min_emds}: the minimal EMD for
+each GDD used to compute the NetEMD, \code{min_offsets}: the associated
+offsets giving the minimal EMD for each GDD
 }
 \description{
 NetEMDs between all graph pairs using provided Graphlet-based Degree
diff --git a/man/netemd_one_to_one.Rd b/man/netemd_one_to_one.Rd
index caebd1e7..7750c68b 100644
--- a/man/netemd_one_to_one.Rd
+++ b/man/netemd_one_to_one.Rd
@@ -18,21 +18,30 @@ netemd_one_to_one(
 )
 }
 \arguments{
-\item{graph_1}{A network/graph object from the \code{igraph} package. \code{graph_1} can be set to \code{NULL} (default) if \code{dhists_1} is provided.}
+\item{graph_1}{A network/graph object from the \code{igraph} package.
+\code{graph_1} can be set to \code{NULL} (default) if \code{dhists_1} is
+provided.}
 
-\item{graph_2}{A network/graph object from the \code{igraph} package. \code{graph_2} can be set to \code{NULL} (default) if \code{dhists_2} is provided.}
+\item{graph_2}{A network/graph object from the \code{igraph} package.
+\code{graph_2} can be set to \code{NULL} (default) if \code{dhists_2} is
+provided.}
 
-\item{dhists_1}{Either, a \code{dhist} discrete histogram object, or list of such objects, or a matrix of network features (each column representing a feature). \code{dhists_1} can be set to \code{NULL} (default) if \code{graph_1} is provided. A \code{dhist} object can be obtained from \code{graph_features_to_histograms}.}
+\item{dhists_1}{Either, a \code{dhist} discrete histogram object, or list of
+such objects, or a matrix of network features (each column representing a
+feature). \code{dhists_1} can be set to \code{NULL} (default) if
+\code{graph_1} is provided. A \code{dhist} object can be obtained from
+\code{graph_features_to_histograms}.}
 
 \item{dhists_2}{Same as \code{dhists_1}.}
 
-\item{method}{The method to be used to find the minimum EMD across all potential
-offsets for each pair of histograms. Default is "optimise" to use
+\item{method}{The method to be used to find the minimum EMD across all
+potential offsets for each pair of histograms. Default is "optimise" to use
 R's built-in \code{stats::optimise} method to efficiently find the offset
 with the minimal EMD. However, this is not guaranteed to find the global
-minimum if multiple local minima EMDs exist. You can alternatively specify the
-"exhaustive" method, which will exhaustively evaluate the EMD between the
-histograms at all offsets that are candidates for the minimal EMD at the cost of computational time.}
+minimum if multiple local minima EMDs exist. You can alternatively specify
+the "exhaustive" method, which will exhaustively evaluate the EMD between the
+histograms at all offsets that are candidates for the minimal EMD at the cost
+of computational time.}
 
 \item{return_details}{Logical indicating whether to return the individual
 minimal EMDs and associated offsets for all pairs of histograms.}
@@ -44,43 +53,59 @@ which  results in no smoothing. Care should be taken to select a
 (e.g.for the integer domain a width of 1 is the natural choice).}
 
 \item{feature_type}{Type of graphlet-based feature to count: "graphlet"
-counts the number of graphlets each node participates in; "orbit" (default) calculates
-the number of graphlet orbits each node participates in.}
+counts the number of graphlets each node participates in; "orbit" (default)
+calculates the number of graphlet orbits each node participates in.}
 
 \item{max_graphlet_size}{Determines the maximum size of graphlets to count.
 Only graphlets containing up to \code{max_graphlet_size} nodes will be
 counted. Possible values are 4, and 5 (default).}
 
 \item{ego_neighbourhood_size}{The number of steps from the source node to
-include nodes for each ego-network. NetEmd was proposed for individual nodes alone, hence the default value is 0.}
+include nodes for each ego-network. NetEmd was proposed for individual nodes
+alone, hence the default value is 0.}
 }
 \value{
-NetEMD measure for the two sets of discrete histograms (or graphs). If
-(\code{return_details = FALSE}) then a list with the following named elements is returned
-\code{net_emd}: the NetEMD for the set of histogram pairs (or graphs), \code{min_emds}:
-the minimal EMD for each pair of histograms, \code{min_offsets}: the associated
-offsets giving the minimal EMD for each pair of histograms
+NetEMD measure for the two sets of discrete histograms (or graphs).
+If (\code{return_details = FALSE}) then a list with the following named
+elements is returned \code{net_emd}: the NetEMD for the set of histogram
+pairs (or graphs), \code{min_emds}: the minimal EMD for each pair of
+histograms, \code{min_offsets}: the associated offsets giving the minimal EMD
+for each pair of histograms
 }
 \description{
 Calculates the network Earth Mover's Distance (EMD) between
-two sets of network features. This is done by individually normalising the distribution
-of each feature so that they have unit mass and unit variance. Then the minimun EMD between the same pair of features (one for each corresponding graph) is calculated by considering all possible translations of the feature distributions. Finally the average over all features is reported.
+two sets of network features. This is done by individually normalising the
+distribution of each feature so that they have unit mass and unit variance.
+Then the minimun EMD between the same pair of features (one for each
+corresponding graph) is calculated by considering all possible translations
+of the feature distributions. Finally the average over all features is
+reported.
 This is calculated as follows:
   1. Normalise each feature histogram to have unit mass and unit variance.
-  2. For each feature, find the minimum EMD between each pair of histograms considering all possible  histogram translations.
+  2. For each feature, find the minimum EMD between each pair of histograms
+     considering all possible  histogram translations.
   3. Take the average minimum EMD across all features.
 }
 \examples{
 require(igraph)
 graph_1 <- graph.lattice(c(8, 8))
 graph_2 <- graph.lattice(c(44, 44))
-netemd_one_to_one(graph_1 = graph_1, graph_2 = graph_2, feature_type = "orbit", max_graphlet_size = 5)
+netemd_one_to_one(
+  graph_1 = graph_1,
+  graph_2 = graph_2,
+  feature_type = "orbit",
+  max_graphlet_size = 5
+)
 
 # Providing a matrix of network features
 props_a <- count_orbits_per_node(graph = graph_1, max_graphlet_size = 5)
 props_b <- count_orbits_per_node(graph = graph_2, max_graphlet_size = 5)
 
-netemd_one_to_one(dhists_1 = props_a, dhists_2 = props_b, smoothing_window_width = 1)
+netemd_one_to_one(
+  dhists_1 = props_a,
+  dhists_2 = props_b,
+  smoothing_window_width = 1
+)
 
 # Providing the network features as lists of dhist objects
 dhists_1 <- graph_features_to_histograms(props_a)
@@ -91,17 +116,46 @@ netemd_one_to_one(dhists_1 = dhists_1, dhists_2 = dhists_2)
 
 # A variation of NetEmd: Using the Laplacian spectrum
 # Laplacian
-Lapg_1 <- igraph::laplacian_matrix(graph = graph_1, normalized = FALSE, sparse = FALSE)
-Lapg_2 <- igraph::laplacian_matrix(graph = graph_2, normalized = FALSE, sparse = FALSE)
+Lapg_1 <- igraph::laplacian_matrix(
+  graph = graph_1,
+  normalized = FALSE,
+  sparse = FALSE
+)
+Lapg_2 <- igraph::laplacian_matrix(
+  graph = graph_2,
+  normalized = FALSE,
+  sparse = FALSE
+)
 
 # Normalized Laplacian
-NLapg_1 <- igraph::laplacian_matrix(graph = graph_1, normalized = TRUE, sparse = FALSE)
-NLapg_2 <- igraph::laplacian_matrix(graph = graph_2, normalized = TRUE, sparse = FALSE)
+NLapg_1 <- igraph::laplacian_matrix(
+  graph = graph_1,
+  normalized = TRUE,
+  sparse = FALSE
+)
+NLapg_2 <- igraph::laplacian_matrix(
+  graph = graph_2,
+  normalized = TRUE,
+  sparse = FALSE
+)
 
 # Spectra (This may take a couple of minutes).
-props_1 <- cbind(L.Spectra = eigen(Lapg_1)$values, NL.Spectra = eigen(NLapg_1)$values)
-props_2 <- cbind(L.Spectra = eigen(Lapg_2)$values, NL.Spectra = eigen(NLapg_2)$values)
+props_1 <- cbind(
+  L.Spectra = eigen(Lapg_1)$values,
+  NL.Spectra = eigen(NLapg_1)$values
+)
+props_2 <- cbind(
+  L.Spectra = eigen(Lapg_2)$values,
+  NL.Spectra = eigen(NLapg_2)$values
+)
 
-netemd_one_to_one(dhists_1 = props_1, dhists_2 = props_2, smoothing_window_width = 0) # Use of smoothing window 1 is given for discrete integer distributions. If the network features are considered continuous variables smoothing_window_width equal to zero is recommended.
+# Use of smoothing window 1 is given for discrete integer distributions. If
+# the network features are considered continuous variables
+# smoothing_window_width equal to zero is recommended.
+netemd_one_to_one(
+  dhists_1 = props_1,
+  dhists_2 = props_2,
+  smoothing_window_width = 0
+)
 
 }
diff --git a/man/netemd_single_pair.Rd b/man/netemd_single_pair.Rd
index 5cf3b3ed..0c805821 100644
--- a/man/netemd_single_pair.Rd
+++ b/man/netemd_single_pair.Rd
@@ -2,7 +2,8 @@
 % Please edit documentation in R/measures_net_emd.R
 \name{netemd_single_pair}
 \alias{netemd_single_pair}
-\title{Internal function to compute the minimum Earth Mover's Distance between standarized and translated histograms}
+\title{Internal function to compute the minimum Earth Mover's Distance between
+standarized and translated histograms}
 \usage{
 netemd_single_pair(
   dhist1,
@@ -16,8 +17,8 @@ netemd_single_pair(
 offsets for each pair of histograms. Default is "optimise" to use
 R's built-in \code{stats::optimise} method to efficiently find the offset
 with the minimal EMD. However, this is not guaranteed to find the global
-minimum if multiple local minima EMDs exist. You can alternatively specify the
-"exhaustive" method, which will exhaustively evaluate the EMD between the
+minimum if multiple local minima EMDs exist. You can alternatively specify
+the "exhaustive" method, which will exhaustively evaluate the EMD between the
 histograms at all offsets that are candidates for the minimal EMD.}
 
 \item{smoothing_window_width}{Width of "top-hat" smoothing window to apply to
@@ -26,18 +27,23 @@ which  results in no smoothing. Care should be taken to select a
 \code{smoothing_window_width} that is appropriate for the discrete domain
 (e.g.for the integer domain a width of 1 is the natural choice)}
 
-\item{dhists_1}{A \code{dhist} discrete histogram object or a list of such objects}
+\item{dhists_1}{A \code{dhist} discrete histogram object or a list of such
+objects}
 
-\item{dhists_2}{A \code{dhist} discrete histogram object or a list of such objects}
+\item{dhists_2}{A \code{dhist} discrete histogram object or a list of such
+objects}
 }
 \value{
 A list with the following named elements
-\code{net_emd}: the NetEMD for the set of histogram pairs, \code{min_offsets}: the associated
-offsets giving the minimal EMD for each pair of histograms and \code{min_offset_std}: Offset used in the standardised histograms.
+\code{net_emd}: the NetEMD for the set of histogram pairs,
+\code{min_offsets}: the associated offsets giving the minimal EMD for each
+pair of histograms and \code{min_offset_std}: Offset used in the standardised
+histograms.
 }
 \description{
 Calculates the minimum Earth Mover's Distance (EMD) between two
-discrete histograms after normalising each histogram to unit mass and variance.
+discrete histograms after normalising each histogram to unit mass and
+variance.
 This is calculated as follows:
   1. Normalise each histogram to have unit mass and unit variance
   2. Find the minimum EMD between the histograms
@@ -51,5 +57,10 @@ props_2 <- count_orbits_per_node(graph = goldstd_2, max_graphlet_size = 5)
 dhists_1 <- graph_features_to_histograms(props_1)
 dhists_2 <- graph_features_to_histograms(props_2)
 # Obtain the minimum NetEMD_edges between the histograms
-netemd_single_pair(dhists_1[[1]], dhists_2[[1]], method = "optimise", smoothing_window_width = 0)
+netemd_single_pair(
+  dhists_1[[1]],
+  dhists_2[[1]],
+  method = "optimise",
+  smoothing_window_width = 0
+)
 }
diff --git a/man/orbit_key.Rd b/man/orbit_key.Rd
index 79d8afb4..e7921d92 100644
--- a/man/orbit_key.Rd
+++ b/man/orbit_key.Rd
@@ -7,7 +7,8 @@
 orbit_key(max_graphlet_size)
 }
 \arguments{
-\item{max_graphlet_size}{Maximum number of nodes graphlets can contain. Currently only size 2 to 5 are supported.}
+\item{max_graphlet_size}{Maximum number of nodes graphlets can contain.
+Currently only size 2 to 5 are supported.}
 }
 \value{
 Metadata list with the following named fields:
diff --git a/man/read_simple_graphs.Rd b/man/read_simple_graphs.Rd
index 95416894..5a4addc9 100644
--- a/man/read_simple_graphs.Rd
+++ b/man/read_simple_graphs.Rd
@@ -50,10 +50,17 @@ following order:
      previous alterations)
 }
 \examples{
-# Set source directory for Virus protein-protein interaction edge files stored in the netdist package.
-source_dir <- system.file(file.path("extdata", "VRPINS"), package = "netdist")
+# Set source directory for Virus protein-protein interaction edge files
+# stored in the netdist package.
+source_dir <- system.file(
+  file.path("extdata", "VRPINS"),
+  package = "netdist"
+)
 print(source_dir)
 # Load query graphs as igraph objects
-graph_1 <- read_simple_graph(file.path(source_dir, "EBV.txt"), format = "ncol")
+graph_1 <- read_simple_graph(
+  file.path(source_dir, "EBV.txt"),
+  format = "ncol"
+)
 graph_1
 }

From 681106cb34c81130038f3a5831e24e840fa8936e Mon Sep 17 00:00:00 2001
From: jack89roberts <jroberts@turing.ac.uk>
Date: Tue, 7 Jun 2022 12:30:16 +0100
Subject: [PATCH 65/84] styler vignettes

---
 .github/workflows/quality.yaml            |   2 +-
 vignettes/ManyToMany.Rmd                  |  84 +++++++-------
 vignettes/NetEmdTimeOrdering.Rmd          |  54 ++++-----
 vignettes/NetdisGPStepByStep.Rmd          | 126 ++++++++++++---------
 vignettes/NetdisStepByStep.Rmd            | 128 +++++++++++++---------
 vignettes/NewNetdisCustomisations.Rmd     | 128 ++++++++++++----------
 vignettes/PreComputedProps.Rmd            |  90 +++++++--------
 vignettes/default_pairwise_usage.Rmd      |  79 ++++++-------
 vignettes/dendrogram_example_net_dis.Rmd  |  95 ++++++++--------
 vignettes/dendrogram_example_net_emd.Rmd  |  65 +++++------
 vignettes/netdis_customisations.Rmd       |  74 +++++++------
 vignettes/netdis_pairwise_comparisons.Rmd |  47 ++++----
 vignettes/quickstart_netdis_2graphs.Rmd   | 108 ++++++++++--------
 13 files changed, 585 insertions(+), 495 deletions(-)

diff --git a/.github/workflows/quality.yaml b/.github/workflows/quality.yaml
index 7a53a048..8a29ff60 100644
--- a/.github/workflows/quality.yaml
+++ b/.github/workflows/quality.yaml
@@ -28,7 +28,7 @@ jobs:
             any::styler
 
       - name: Styler
-        run: styler::style_pkg(dry='fail')
+        run: styler::style_pkg(filetype = c('R', 'Rprofile', 'Rmd'), dry='fail')
         shell: Rscript {0}
 
       - name: Lintr
diff --git a/vignettes/ManyToMany.Rmd b/vignettes/ManyToMany.Rmd
index 722a0c0b..89dc2ea5 100644
--- a/vignettes/ManyToMany.Rmd
+++ b/vignettes/ManyToMany.Rmd
@@ -12,10 +12,10 @@ editor_options:
 ---
   
 ```{r, include = FALSE}
-  knitr::opts_chunk$set(
+knitr::opts_chunk$set(
   collapse = TRUE,
   comment = "#>"
-  )
+)
 ```
   
 # Introduction
@@ -46,20 +46,20 @@ Generation of regular grid, ring and tree-like networks with 400 nodes and 1600
 ```{r, netwokrs,fig.align='center',fig.dim=c(8,4)}
 # Create networks
 set.seed(3171)
-gLat_1 <- igraph::graph.lattice(c(20,20)) 
-gLat_2 <- igraph::graph.lattice(c(40,40))  
-gRing_1 <- igraph::make_ring(20^2) 
+gLat_1 <- igraph::graph.lattice(c(20, 20))
+gLat_2 <- igraph::graph.lattice(c(40, 40))
+gRing_1 <- igraph::make_ring(20^2)
 gRing_2 <- igraph::make_ring(40^2)
-gTree_1 <- igraph::as.undirected( make_tree(n = 20^2,children = 3) )
-gTree_2 <- igraph::as.undirected( make_tree(n = 40^2,children = 3) )
+gTree_1 <- igraph::as.undirected(make_tree(n = 20^2, children = 3))
+gTree_2 <- igraph::as.undirected(make_tree(n = 40^2, children = 3))
 
-par(mfrow=c(1,2))
-plot(gLat_1,vertex.size=0.8,vertex.label=NA)
-plot(gLat_2,vertex.size=0.8,vertex.label=NA)
-plot(gRing_1,vertex.size=0.8,vertex.label=NA)
-plot(gRing_2,vertex.size=0.8,vertex.label=NA)
-plot(gTree_1,vertex.size=0.8,vertex.label=NA)
-plot(gTree_2,vertex.size=0.8,vertex.label=NA)
+par(mfrow = c(1, 2))
+plot(gLat_1, vertex.size = 0.8, vertex.label = NA)
+plot(gLat_2, vertex.size = 0.8, vertex.label = NA)
+plot(gRing_1, vertex.size = 0.8, vertex.label = NA)
+plot(gRing_2, vertex.size = 0.8, vertex.label = NA)
+plot(gTree_1, vertex.size = 0.8, vertex.label = NA)
+plot(gTree_2, vertex.size = 0.8, vertex.label = NA)
 ```
 
 ## NetEmd using subgraph counts
@@ -67,9 +67,9 @@ plot(gTree_2,vertex.size=0.8,vertex.label=NA)
 Subgraph count based NetEmd comparisons:
 ```{r, netemdorbits,fig.align='center'}
 # NetEMD using subgraph counts.
-glist <- list(Lat_1=gLat_1, Lat_2=gLat_2, Ring_1=gRing_1, Ring_2=gRing_1, Tree_1=gTree_1, Tree_2=gTree_2)
+glist <- list(Lat_1 = gLat_1, Lat_2 = gLat_2, Ring_1 = gRing_1, Ring_2 = gRing_1, Tree_1 = gTree_1, Tree_2 = gTree_2)
 
-netemdlist <- netemd_many_to_many(graphs = glist,smoothing_window_width = 1,mc.cores = 1) #Use of smoothing window 1 is given for discrete integer distributions. If the network features are considered continuous variables smoothing_window_width equal to zero is recommended.
+netemdlist <- netemd_many_to_many(graphs = glist, smoothing_window_width = 1, mc.cores = 1) # Use of smoothing window 1 is given for discrete integer distributions. If the network features are considered continuous variables smoothing_window_width equal to zero is recommended.
 
 netemdlist
 ```
@@ -83,7 +83,7 @@ mat
 
 Illustration of the multiple NetEmd comparisons based on subgraph counts.
 ```{r,netemdorbitsPLOT,fig.align='center',fig.dim=c(8,8)}
-netemd.plot(netemdlist=netemdlist,clustering_method="ward.D",main="NetEmd subgraph counts")
+netemd.plot(netemdlist = netemdlist, clustering_method = "ward.D", main = "NetEmd subgraph counts")
 ```
 
 ## NetEmd using the Laplacian and Normalized Laplacian Spectrum
@@ -91,20 +91,20 @@ netemd.plot(netemdlist=netemdlist,clustering_method="ward.D",main="NetEmd subgra
 Pre-compute the Laplacian and normalized Laplacian for each graph considered:
 ```{r, netemdspectrum}
 # NetEMD using the Laplacian and normalized Laplacian Spectrum.
-SPECT<-list()
+SPECT <- list()
 
-#This step may take several minutes.
-for(i in 1:length(glist)){ 
-  Lapg <- igraph::laplacian_matrix(graph = glist[[i]],normalized = FALSE,sparse = FALSE)
-  NLap <- igraph::laplacian_matrix(graph = glist[[i]],normalized = TRUE,sparse = FALSE)
-  SPECT[[ names(glist)[i] ]] <- cbind(L.Spectra= eigen(Lapg)$values, NL.Spectra= eigen(NLap)$values) 
+# This step may take several minutes.
+for (i in 1:length(glist)) {
+  Lapg <- igraph::laplacian_matrix(graph = glist[[i]], normalized = FALSE, sparse = FALSE)
+  NLap <- igraph::laplacian_matrix(graph = glist[[i]], normalized = TRUE, sparse = FALSE)
+  SPECT[[names(glist)[i]]] <- cbind(L.Spectra = eigen(Lapg)$values, NL.Spectra = eigen(NLap)$values)
 }
 str(SPECT)
 ```
 
 Compute NetEmd:
 ```{r}
-netemdlist <- netemd_many_to_many(dhists = SPECT,smoothing_window_width = 0) #Use of smoothing window 1 is given for discrete integer distributions. If the network features are considered continuous variables smoothing_window_width equal to zero is recommended.
+netemdlist <- netemd_many_to_many(dhists = SPECT, smoothing_window_width = 0) # Use of smoothing window 1 is given for discrete integer distributions. If the network features are considered continuous variables smoothing_window_width equal to zero is recommended.
 
 netemdlist
 ```
@@ -112,7 +112,7 @@ netemdlist
 
 ### Illustration of the multiple NetEmd comparisons based on the Laplacian and Normalized Laplacian spectra
 ```{r,netemdspectrumPLOT ,fig.align='center',fig.dim=c(8,8)}
-netemd.plot(netemdlist=netemdlist,clustering_method="ward.D",main="NetEmd Spectra")
+netemd.plot(netemdlist = netemdlist, clustering_method = "ward.D", main = "NetEmd Spectra")
 ```
 
 
@@ -135,26 +135,26 @@ Generation of regular grid, ring and tree-like networks with 400 nodes and 1600
 ```{r, netdisgoldstandnetworks,fig.align='center',fig.dim=c(8,4)}
 # Create networks
 set.seed(3171)
-gLat_1 <- igraph::graph.lattice(c(20,20)) 
-gLat_2 <- igraph::graph.lattice(c(40,40))  
-gRing_1 <- igraph::make_ring(20^2) 
+gLat_1 <- igraph::graph.lattice(c(20, 20))
+gLat_2 <- igraph::graph.lattice(c(40, 40))
+gRing_1 <- igraph::make_ring(20^2)
 gRing_2 <- igraph::make_ring(40^2)
-gTree_1 <- igraph::as.undirected( make_tree(n = 20^2,children = 3) )
-gTree_2 <- igraph::as.undirected( make_tree(n = 40^2,children = 3) )
+gTree_1 <- igraph::as.undirected(make_tree(n = 20^2, children = 3))
+gTree_2 <- igraph::as.undirected(make_tree(n = 40^2, children = 3))
 
 # Create a random graph to be used as a gold-standard
-gst_1 <- igraph::as.undirected( graph.star(20^2) )
-gst_2 <- igraph::as.undirected( graph.star(40^2) )
+gst_1 <- igraph::as.undirected(graph.star(20^2))
+gst_2 <- igraph::as.undirected(graph.star(40^2))
 
-par(mfrow=c(1,2))
-plot(gst_1,vertex.size=0.8,vertex.label=NA)
-plot(gst_2,vertex.size=0.8,vertex.label=NA)
+par(mfrow = c(1, 2))
+plot(gst_1, vertex.size = 0.8, vertex.label = NA)
+plot(gst_2, vertex.size = 0.8, vertex.label = NA)
 ```
 
 
 Obtain the comparison via Netdis using each of the reference graph networks.
 ```{r,netdisgoldstand ,fig.align='center'}
-glist <- list(Lat_1=gLat_1, Lat_2=gLat_2, Ring_1=gRing_1, Ring_2=gRing_1, Tree_1=gTree_1, Tree_2=gTree_2)
+glist <- list(Lat_1 = gLat_1, Lat_2 = gLat_2, Ring_1 = gRing_1, Ring_2 = gRing_1, Tree_1 = gTree_1, Tree_2 = gTree_2)
 
 # Netdis using the goldstd_1 graph as gold-standard reference point
 netdis_mat_gst1 <- netdis_many_to_many(graphs = glist, ref_graph = gst_1)
@@ -177,11 +177,11 @@ cross_comp_to_matrix(measure = netdis_mat_gst2$netdis, cross_comparison_spec = n
 
 Heatmap of the Netdis comparisons:
 ```{r,netdisgoldstandPLOT ,fig.align='center',fig.dim=c(8,8)}
-#Network comparisons heatmap with Gold-Standard 1 
-netdis.plot(netdislist = netdis_mat_gst1, whatrow = 2,main = "Netdis GoldStd-1")
+# Network comparisons heatmap with Gold-Standard 1
+netdis.plot(netdislist = netdis_mat_gst1, whatrow = 2, main = "Netdis GoldStd-1")
 
-#Network comparisons heatmap with Gold-Standard 2
-netdis.plot(netdislist = netdis_mat_gst2, whatrow = 2,main = "Netdis GoldStd-2")
+# Network comparisons heatmap with Gold-Standard 2
+netdis.plot(netdislist = netdis_mat_gst2, whatrow = 2, main = "Netdis GoldStd-2")
 ```
 
 
@@ -196,7 +196,7 @@ netdis_mat
 ```
 
 ```{r,netdisGPPLOT ,fig.align='center',fig.dim=c(8,8)}
-netdis.plot(netdislist = netdis_mat, whatrow = 2,main = "Netdis-GP")
+netdis.plot(netdislist = netdis_mat, whatrow = 2, main = "Netdis-GP")
 ```
 
 ### Using Netdis with no expectation ($E_w=0$)
@@ -210,7 +210,7 @@ netdis_mat
 ```
 
 ```{r,netdiszeroPLOT ,fig.align='center',fig.dim=c(8,8)}
-netdis.plot(netdislist = netdis_mat, whatrow = 2,main = "Netdis-zero")
+netdis.plot(netdislist = netdis_mat, whatrow = 2, main = "Netdis-zero")
 ```
 
 -------------------------
diff --git a/vignettes/NetEmdTimeOrdering.Rmd b/vignettes/NetEmdTimeOrdering.Rmd
index 01c91197..2a199a41 100644
--- a/vignettes/NetEmdTimeOrdering.Rmd
+++ b/vignettes/NetEmdTimeOrdering.Rmd
@@ -28,17 +28,17 @@ For other vignettes in this package see the ["Menu"](V-Menu.html).
 The package contains the world trade networks and pre-computed subgraph/graphlet counts in the R data object ` worldtradesub`. This object contains a list of two lists. The first list is `worldtradesub$wtnets` which contains a small sample of the trade networks (2001-2014) and the second `worldtradesub$Counts` which contains pre-computed counts for a larger set of trade networks going from 1985 to 2014.
 
 ```{r, message=FALSE}
-  library("netdist")
-  library("igraph")
-  data(worldtradesub)
-  summary(worldtradesub)
-  wtnets<- worldtradesub$wtnets
-  summary(wtnets)
+library("netdist")
+library("igraph")
+data(worldtradesub)
+summary(worldtradesub)
+wtnets <- worldtradesub$wtnets
+summary(wtnets)
 ```
 
 These world trade networks are denser than typically sparse social networks. For example, the edge density for the network in 2001 is `r igraph::graph.density(worldtradesub$wtnets$wtn2001)`. Here is a plot of this network highlighting the relatively large number of edges:
 ```{r,fig.align='center',fig.dim=c(5,5)}
-  plot(wtnets$wtn2001,vertex.size=5,vertex.label.cex=0.4)
+plot(wtnets$wtn2001, vertex.size = 5, vertex.label.cex = 0.4)
 ```
 
 
@@ -47,11 +47,11 @@ These world trade networks are denser than typically sparse social networks. For
   
 In this example **NetEmd** will consider orbit counts of subgraphs containing up to 5 nodes.  If NetEmd is to be called a single time, then the command `netemd_many_to_many(graphs = wtnets)` would suffice. The following code provides such an example: 
 ```{r}
-  # As the trade networks are considerable dense, this example first considers a small number of networks.
-  #This example may take some minutes to run.
-  netemd_result <- netemd_many_to_many(graphs = wtnets[1:4],mc.cores = 1)
+# As the trade networks are considerable dense, this example first considers a small number of networks.
+# This example may take some minutes to run.
+netemd_result <- netemd_many_to_many(graphs = wtnets[1:4], mc.cores = 1)
 
-  print(netemd_result)
+print(netemd_result)
 ```
 However, if there are pre-computed counts or features NetEmd can be called via these features instead.
 
@@ -61,26 +61,26 @@ World trade networks consist of relatively dense networks, thus leading to longe
 
 ```{r}
 # This example may take more than a few minutes to run  (approx. 20 mins) , and it is not necessary to run it for the upcoming examples as a larger set of counts has been already computed.
-if(FALSE){# It is not necessary to run, as these counts are already available in.
-    Counts <- list()
-    for(i in 1:length(wtnets)){
-      Counts[[ names(wtnets)[i] ]] <- count_orbits_per_node(graph = wtnets[[i]],max_graphlet_size = 5)
-    }
+if (FALSE) { # It is not necessary to run, as these counts are already available in.
+  Counts <- list()
+  for (i in 1:length(wtnets)) {
+    Counts[[names(wtnets)[i]]] <- count_orbits_per_node(graph = wtnets[[i]], max_graphlet_size = 5)
+  }
 }
 ```
   
 Now, with pre-computed counts NetEmd can be calculated more rapidly as the computations of the counts are often the bottle neck in the computational time of NetEmd. NetEmd will be called `r length(worldtradesub$Counts) * (length(worldtradesub$Counts) - 1)/2 ` times in order to obtain all pairwise comparisons between the trade networks from 1985 to 2014 (networks with pre-computed subgraph counts):
 ```{r}
-  # The pre-computed counts already in the package
-  Counts <- worldtradesub$Counts
-  
-  #Calling NetEmd 
-  netemd_result <- netemd_many_to_many(dhists = Counts ,mc.cores = 1)
-  
-  #Results
-  netemd_matrix <- cross_comp_to_matrix(measure = netemd_result$netemds, cross_comparison_spec = netemd_result$comp_spec)
-  
-  print(netemd_matrix[1:10,1:5])
+# The pre-computed counts already in the package
+Counts <- worldtradesub$Counts
+
+# Calling NetEmd
+netemd_result <- netemd_many_to_many(dhists = Counts, mc.cores = 1)
+
+# Results
+netemd_matrix <- cross_comp_to_matrix(measure = netemd_result$netemds, cross_comparison_spec = netemd_result$comp_spec)
+
+print(netemd_matrix[1:10, 1:5])
 ```
   
 # Evidence of change in world trade
@@ -88,7 +88,7 @@ Now, with pre-computed counts NetEmd can be calculated more rapidly as the compu
 Based on the comparison of the world trade networks across the years, we can identify periods of time where possible considerable changes in world trade have occurred. The following heat map clearly shows the existence of two potential
  changes in the world trade system, and which correspond to 1995-1996 and 2010-2011.
 ```{r,fig.align='center',fig.dim=c(8.5,8.5)}
-  netemd.plot(netemdlist=netemd_result,clustering_method="ward.D",main="NetEmd",docluster = FALSE)
+netemd.plot(netemdlist = netemd_result, clustering_method = "ward.D", main = "NetEmd", docluster = FALSE)
 ```
 
 The World Trade Organization (WTO) said the following about these years:
diff --git a/vignettes/NetdisGPStepByStep.Rmd b/vignettes/NetdisGPStepByStep.Rmd
index f7ee738f..988efe97 100644
--- a/vignettes/NetdisGPStepByStep.Rmd
+++ b/vignettes/NetdisGPStepByStep.Rmd
@@ -13,10 +13,10 @@ chunk_output_type: console
 ---
 
 ```{r, include = FALSE}
-  knitr::opts_chunk$set(
+knitr::opts_chunk$set(
   collapse = TRUE,
   comment = "#>"
-  )
+)
 ```
 
 # Introduction
@@ -92,11 +92,11 @@ Generation of tree-like networks with 400 nodes and 1600 nodes.
 ```{r, netwokrs,fig.align='center',fig.dim=c(6,6)}
 # Create networks
 set.seed(34)
-gTree_1 <- igraph::as.undirected( make_tree(n = 20^2,children = 3) )
-gTree_2 <- igraph::as.undirected( make_tree(n = 40^2,children = 3) )
+gTree_1 <- igraph::as.undirected(make_tree(n = 20^2, children = 3))
+gTree_2 <- igraph::as.undirected(make_tree(n = 40^2, children = 3))
 
-plot(gTree_1,vertex.size=0.8,vertex.label=NA)
-plot(gTree_2,vertex.size=0.8,vertex.label=NA)
+plot(gTree_1, vertex.size = 0.8, vertex.label = NA)
+plot(gTree_2, vertex.size = 0.8, vertex.label = NA)
 ```
 
 
@@ -126,17 +126,19 @@ num_bins <- 100
 One of the first steps in Netdis is the extraction of all ego-networks in each of the query networks:
 ```{r}
 # Get ego-networks for query graphs
-ego_1 <- make_named_ego_graph(gTree_1, 
-                              order = neighbourhood_size, 
-                              min_ego_nodes = min_ego_nodes, 
-                              min_ego_edges = min_ego_edges)
-
-ego_2 <- make_named_ego_graph(gTree_2, 
-                              order = neighbourhood_size, 
-                              min_ego_nodes = min_ego_nodes, 
-                              min_ego_edges = min_ego_edges)
-head(ego_1,n=2)
-head(ego_2,n=2)
+ego_1 <- make_named_ego_graph(gTree_1,
+  order = neighbourhood_size,
+  min_ego_nodes = min_ego_nodes,
+  min_ego_edges = min_ego_edges
+)
+
+ego_2 <- make_named_ego_graph(gTree_2,
+  order = neighbourhood_size,
+  min_ego_nodes = min_ego_nodes,
+  min_ego_edges = min_ego_edges
+)
+head(ego_1, n = 2)
+head(ego_2, n = 2)
 ```
 
 ## Count the number of nodes and the subgraphs in the ego-networks of each graph ($N_w$)
@@ -159,13 +161,17 @@ densities_1 <- ego_network_density(graphlet_counts = subgraph_counts_1)
 densities_2 <- ego_network_density(graphlet_counts = subgraph_counts_2)
 
 # Adaptively bin ego-network densities
-binned_densities_1 <- binned_densities_adaptive(densities = densities_1, 
-                                                min_counts_per_interval = min_bin_count, 
-                                                num_intervals = num_bins)
-
-binned_densities_2 <- binned_densities_adaptive(densities = densities_2, 
-                                                min_counts_per_interval = min_bin_count, 
-                                                num_intervals = num_bins)
+binned_densities_1 <- binned_densities_adaptive(
+  densities = densities_1,
+  min_counts_per_interval = min_bin_count,
+  num_intervals = num_bins
+)
+
+binned_densities_2 <- binned_densities_adaptive(
+  densities = densities_2,
+  min_counts_per_interval = min_bin_count,
+  num_intervals = num_bins
+)
 
 str(binned_densities_1)
 str(binned_densities_2)
@@ -174,13 +180,17 @@ str(binned_densities_2)
 ## Calculate expected subgraph counts in each density bin by the Geometric-Poisson approximation ($E_w$)
 With the ego-network binning obtained, the Geometric-Poisson approximation of the expected subgraph counts, $E_w$, can be obtained for each subgraph $w$ and each density bin:
 ```{r}
-binned_gp_subgraph_counts_1 <- density_binned_counts_gp(graphlet_counts = subgraph_counts_1, 
-                                                     density_interval_indexes =  binned_densities_1$interval_indexes, 
-                                                     max_graphlet_size = max_subgraph_size)
-
-binned_gp_subgraph_counts_2 <- density_binned_counts_gp(graphlet_counts = subgraph_counts_2, 
-                                                     density_interval_indexes =  binned_densities_2$interval_indexes, 
-                                                     max_graphlet_size = max_subgraph_size)
+binned_gp_subgraph_counts_1 <- density_binned_counts_gp(
+  graphlet_counts = subgraph_counts_1,
+  density_interval_indexes = binned_densities_1$interval_indexes,
+  max_graphlet_size = max_subgraph_size
+)
+
+binned_gp_subgraph_counts_2 <- density_binned_counts_gp(
+  graphlet_counts = subgraph_counts_2,
+  density_interval_indexes = binned_densities_2$interval_indexes,
+  max_graphlet_size = max_subgraph_size
+)
 binned_gp_subgraph_counts_1
 binned_gp_subgraph_counts_2
 ```
@@ -189,27 +199,35 @@ binned_gp_subgraph_counts_2
 With $E_w$ now obtained, Netdis-GP, can be compute as per its construction by first centring the observed counts:
 ```{r}
 # Calculate expected subgraph counts for each ego network
-exp_gp_subgraph_counts_1 <- netdis_expected_counts(graphlet_counts = subgraph_counts_1,
-                                                density_breaks = binned_densities_1$breaks, 
-                                                density_binned_reference_counts =  binned_gp_subgraph_counts_1, 
-                                                max_graphlet_size = max_subgraph_size,
-                                                scale_fn=NULL)
-
-
-exp_gp_subgraph_counts_2 <- netdis_expected_counts(graphlet_counts = subgraph_counts_2,
-                                                density_breaks = binned_densities_2$breaks, 
-                                                density_binned_reference_counts =  binned_gp_subgraph_counts_2, 
-                                                max_graphlet_size = max_subgraph_size,
-                                                scale_fn=NULL)
+exp_gp_subgraph_counts_1 <- netdis_expected_counts(
+  graphlet_counts = subgraph_counts_1,
+  density_breaks = binned_densities_1$breaks,
+  density_binned_reference_counts = binned_gp_subgraph_counts_1,
+  max_graphlet_size = max_subgraph_size,
+  scale_fn = NULL
+)
+
+
+exp_gp_subgraph_counts_2 <- netdis_expected_counts(
+  graphlet_counts = subgraph_counts_2,
+  density_breaks = binned_densities_2$breaks,
+  density_binned_reference_counts = binned_gp_subgraph_counts_2,
+  max_graphlet_size = max_subgraph_size,
+  scale_fn = NULL
+)
 
 # Centre subgraph counts by subtracting expected counts
-centred_subgraph_counts_1 <- netdis_subtract_exp_counts(graphlet_counts = subgraph_counts_1,
-                                                        exp_graphlet_counts = exp_gp_subgraph_counts_1, 
-                                                        max_graphlet_size = max_subgraph_size)
-
-centred_subgraph_counts_2 <- netdis_subtract_exp_counts(graphlet_counts = subgraph_counts_2,
-                                                        exp_graphlet_counts = exp_gp_subgraph_counts_2, 
-                                                        max_graphlet_size = max_subgraph_size)
+centred_subgraph_counts_1 <- netdis_subtract_exp_counts(
+  graphlet_counts = subgraph_counts_1,
+  exp_graphlet_counts = exp_gp_subgraph_counts_1,
+  max_graphlet_size = max_subgraph_size
+)
+
+centred_subgraph_counts_2 <- netdis_subtract_exp_counts(
+  graphlet_counts = subgraph_counts_2,
+  exp_graphlet_counts = exp_gp_subgraph_counts_2,
+  max_graphlet_size = max_subgraph_size
+)
 
 head(centred_subgraph_counts_1)
 head(centred_subgraph_counts_2)
@@ -229,9 +247,11 @@ sum_subgraph_counts_2
 Finally, the total centred counts can be used to obtain the Netdis statistic:
 ```{r}
 
-netdis_result <- netdis_uptok(centred_graphlet_count_vector_1 = sum_subgraph_counts_1,
-                              centred_graphlet_count_vector_2 = sum_subgraph_counts_2, 
-                              max_graphlet_size =  max_subgraph_size)
+netdis_result <- netdis_uptok(
+  centred_graphlet_count_vector_1 = sum_subgraph_counts_1,
+  centred_graphlet_count_vector_2 = sum_subgraph_counts_2,
+  max_graphlet_size = max_subgraph_size
+)
 
 print(netdis_result)
 ```
diff --git a/vignettes/NetdisStepByStep.Rmd b/vignettes/NetdisStepByStep.Rmd
index 9b69845e..7162f142 100644
--- a/vignettes/NetdisStepByStep.Rmd
+++ b/vignettes/NetdisStepByStep.Rmd
@@ -13,10 +13,10 @@ chunk_output_type: console
 ---
 
 ```{r, include = FALSE}
-  knitr::opts_chunk$set(
+knitr::opts_chunk$set(
   collapse = TRUE,
   comment = "#>"
-  )
+)
 ```
 
 # Introduction
@@ -80,11 +80,11 @@ Generation of tree-like networks with 400 nodes and 1600 nodes.
 ```{r, netwokrs,fig.align='center',fig.dim=c(6,6)}
 # Create networks
 set.seed(34)
-gTree_1 <- igraph::as.undirected( make_tree(n = 20^2,children = 3) )
-gTree_2 <- igraph::as.undirected( make_tree(n = 40^2,children = 3) )
+gTree_1 <- igraph::as.undirected(make_tree(n = 20^2, children = 3))
+gTree_2 <- igraph::as.undirected(make_tree(n = 40^2, children = 3))
 
-plot(gTree_1,vertex.size=0.8,vertex.label=NA)
-plot(gTree_2,vertex.size=0.8,vertex.label=NA)
+plot(gTree_1, vertex.size = 0.8, vertex.label = NA)
+plot(gTree_2, vertex.size = 0.8, vertex.label = NA)
 ```
 
 
@@ -115,17 +115,19 @@ num_bins <- 100
 One of the first steps in Netdis is the extraction of all ego-networks in each of the query networks:
 ```{r}
 # Get ego-networks for query graphs
-ego_1 <- make_named_ego_graph(gTree_1, 
-                              order = neighbourhood_size, 
-                              min_ego_nodes = min_ego_nodes, 
-                              min_ego_edges = min_ego_edges)
-
-ego_2 <- make_named_ego_graph(gTree_2, 
-                              order = neighbourhood_size, 
-                              min_ego_nodes = min_ego_nodes, 
-                              min_ego_edges = min_ego_edges)
-tail(ego_1,n=2)
-tail(ego_2,n=2)
+ego_1 <- make_named_ego_graph(gTree_1,
+  order = neighbourhood_size,
+  min_ego_nodes = min_ego_nodes,
+  min_ego_edges = min_ego_edges
+)
+
+ego_2 <- make_named_ego_graph(gTree_2,
+  order = neighbourhood_size,
+  min_ego_nodes = min_ego_nodes,
+  min_ego_edges = min_ego_edges
+)
+tail(ego_1, n = 2)
+tail(ego_2, n = 2)
 ```
 
 ## Count the number of nodes and the subgraphs in ego-networks of query graphs ($N_w$)
@@ -158,8 +160,8 @@ For this case the user must provide the gold-standard network of their choosing.
 The following considers a tree-like network with `r 30^2` nodes as the gold-standard.
 ```{r,fig.align='center',fig.dim=c(6,6)}
 # Network used as gold-standard
-gst_1 <- erdos.renyi.game(n = 30^2,p.or.m = graph.density(graph = gTree_2))
-plot(gst_1,vertex.size=0.8,vertex.label=NA)
+gst_1 <- erdos.renyi.game(n = 30^2, p.or.m = graph.density(graph = gTree_2))
+plot(gst_1, vertex.size = 0.8, vertex.label = NA)
 ```
 
 ### Obtain the gold-standard ego-network counts and their binning according to their edge-density ($\rho(.)$)
@@ -167,25 +169,31 @@ plot(gst_1,vertex.size=0.8,vertex.label=NA)
 To calculate the expected counts, $E_w$, the counts of the ego-networks of the gold-standard network need to be obtained first:
 ```{r}
 # Obtain subgraph counts and binning for gold-standard
-ego_gst_1 <- make_named_ego_graph(graph = gst_1, 
-                              order = neighbourhood_size, 
-                              min_ego_nodes = min_ego_nodes, 
-                              min_ego_edges = min_ego_edges)
-
-subgraph_counts_gst_1 <- ego_to_graphlet_counts(ego_networks = ego_gst_1,
-                                                max_graphlet_size = max_subgraph_size)
+ego_gst_1 <- make_named_ego_graph(
+  graph = gst_1,
+  order = neighbourhood_size,
+  min_ego_nodes = min_ego_nodes,
+  min_ego_edges = min_ego_edges
+)
+
+subgraph_counts_gst_1 <- ego_to_graphlet_counts(
+  ego_networks = ego_gst_1,
+  max_graphlet_size = max_subgraph_size
+)
 
 head(subgraph_counts_gst_1)
 ```
 
 Subsequently, these ego-networks are binned according to their edge density:
 ```{r}
-densities_gst_1<- ego_network_density(graphlet_counts = subgraph_counts_gst_1)
+densities_gst_1 <- ego_network_density(graphlet_counts = subgraph_counts_gst_1)
 
 # Adaptively bin ego-network densities
-binned_densities_gst_1 <- binned_densities_adaptive(densities = densities_gst_1, 
-                                                min_counts_per_interval = min_bin_count, 
-                                                num_intervals = num_bins)
+binned_densities_gst_1 <- binned_densities_adaptive(
+  densities = densities_gst_1,
+  min_counts_per_interval = min_bin_count,
+  num_intervals = num_bins
+)
 
 str(binned_densities_gst_1)
 ```
@@ -197,8 +205,10 @@ $E_w$ is estimated based on the average subgraph counts of ego-networks per dens
 ```{r}
 # Scale ego-network subgraph counts by dividing by the total number of k-tuples in the
 # ego-network (where k is the subgraph size)
-scaled_subgraph_counts_ref <- scale_graphlet_counts_ego(graphlet_counts = subgraph_counts_gst_1,
-                                                        max_graphlet_size =max_subgraph_size)
+scaled_subgraph_counts_ref <- scale_graphlet_counts_ego(
+  graphlet_counts = subgraph_counts_gst_1,
+  max_graphlet_size = max_subgraph_size
+)
 str(scaled_subgraph_counts_ref)
 ```
 
@@ -215,27 +225,35 @@ ref_binned_canonical_subgraph_counts
 After obtaining the average scaled subgraph counts per density bin, the subgraph counts of the query networks can be centred:
 ```{r}
 # Scale the reference counts of the gold-standard network to the sizes of each of the query ego-networks.
-exp_subgraph_counts_1 <- netdis_expected_counts(graphlet_counts = subgraph_counts_1,
-                                                density_breaks = binned_densities_gst_1$breaks, 
-                                                density_binned_reference_counts =  ref_binned_canonical_subgraph_counts, 
-                                                max_graphlet_size = max_subgraph_size,
-                                                scale_fn=count_graphlet_tuples)
-
-
-exp_subgraph_counts_2 <- netdis_expected_counts(graphlet_counts = subgraph_counts_2,
-                                                density_breaks = binned_densities_gst_1$breaks, 
-                                                density_binned_reference_counts =  ref_binned_canonical_subgraph_counts, 
-                                                max_graphlet_size = max_subgraph_size,
-                                                scale_fn=count_graphlet_tuples)
+exp_subgraph_counts_1 <- netdis_expected_counts(
+  graphlet_counts = subgraph_counts_1,
+  density_breaks = binned_densities_gst_1$breaks,
+  density_binned_reference_counts = ref_binned_canonical_subgraph_counts,
+  max_graphlet_size = max_subgraph_size,
+  scale_fn = count_graphlet_tuples
+)
+
+
+exp_subgraph_counts_2 <- netdis_expected_counts(
+  graphlet_counts = subgraph_counts_2,
+  density_breaks = binned_densities_gst_1$breaks,
+  density_binned_reference_counts = ref_binned_canonical_subgraph_counts,
+  max_graphlet_size = max_subgraph_size,
+  scale_fn = count_graphlet_tuples
+)
 
 # Centre subgraph counts by subtracting expected counts
-centred_subgraph_counts_1 <- netdis_subtract_exp_counts(graphlet_counts = subgraph_counts_1,
-                                                        exp_graphlet_counts = exp_subgraph_counts_1, 
-                                                        max_graphlet_size = max_subgraph_size)
-
-centred_subgraph_counts_2 <- netdis_subtract_exp_counts(graphlet_counts = subgraph_counts_2,
-                                                        exp_graphlet_counts = exp_subgraph_counts_2, 
-                                                        max_graphlet_size = max_subgraph_size)
+centred_subgraph_counts_1 <- netdis_subtract_exp_counts(
+  graphlet_counts = subgraph_counts_1,
+  exp_graphlet_counts = exp_subgraph_counts_1,
+  max_graphlet_size = max_subgraph_size
+)
+
+centred_subgraph_counts_2 <- netdis_subtract_exp_counts(
+  graphlet_counts = subgraph_counts_2,
+  exp_graphlet_counts = exp_subgraph_counts_2,
+  max_graphlet_size = max_subgraph_size
+)
 
 tail(centred_subgraph_counts_1)
 tail(centred_subgraph_counts_2)
@@ -256,9 +274,11 @@ sum_subgraph_counts_2
 Finally, the total centred counts can be used to obtain the Netdis statistic:
 ```{r}
 
-netdis_result <- netdis_uptok(centred_graphlet_count_vector_1 = sum_subgraph_counts_1,
-                              centred_graphlet_count_vector_2 = sum_subgraph_counts_2, 
-                              max_graphlet_size =  max_subgraph_size)
+netdis_result <- netdis_uptok(
+  centred_graphlet_count_vector_1 = sum_subgraph_counts_1,
+  centred_graphlet_count_vector_2 = sum_subgraph_counts_2,
+  max_graphlet_size = max_subgraph_size
+)
 
 print(netdis_result)
 ```
diff --git a/vignettes/NewNetdisCustomisations.Rmd b/vignettes/NewNetdisCustomisations.Rmd
index ba6c49b6..e8cbe645 100644
--- a/vignettes/NewNetdisCustomisations.Rmd
+++ b/vignettes/NewNetdisCustomisations.Rmd
@@ -12,10 +12,10 @@ chunk_output_type: console
 ---
 
 ```{r, include = FALSE}
-  knitr::opts_chunk$set(
+knitr::opts_chunk$set(
   collapse = TRUE,
   comment = "#>"
-  )
+)
 ```
   
 # Introduction
@@ -43,14 +43,14 @@ Generation of regular grid, ring and tree-like networks with 400 nodes and 1600
 # Create lattice, Ring and Tree like networks of sizes 20^2 and 40^2.
 # Create networks
 set.seed(3171)
-gLat_1 <- graph.lattice(c(20,20)) 
-gLat_2 <- graph.lattice(c(40,40))  
-gRing_1 <- make_ring(20^2) 
+gLat_1 <- graph.lattice(c(20, 20))
+gLat_2 <- graph.lattice(c(40, 40))
+gRing_1 <- make_ring(20^2)
 gRing_2 <- make_ring(40^2)
-gTree_1 <- igraph::as.undirected( make_tree(n = 20^2,children = 3) )
-gTree_2 <- igraph::as.undirected( make_tree(n = 40^2,children = 3) )
+gTree_1 <- igraph::as.undirected(make_tree(n = 20^2, children = 3))
+gTree_2 <- igraph::as.undirected(make_tree(n = 40^2, children = 3))
 
-glist <- list(Lat_1=gLat_1, Lat_2=gLat_2, Ring_1=gRing_1, Ring_2=gRing_1, Tree_1=gTree_1, Tree_2=gTree_2)
+glist <- list(Lat_1 = gLat_1, Lat_2 = gLat_2, Ring_1 = gRing_1, Ring_2 = gRing_1, Tree_1 = gTree_1, Tree_2 = gTree_2)
 
 # Create a random graph to be used as a gold-standard
 gst <- igraph::as.undirected(graph.star(1000))
@@ -61,34 +61,38 @@ gst <- igraph::as.undirected(graph.star(1000))
 ## Using Netdis with a reference graph as a proxy for $E_w$
 For this variant a reference graph or gold-standard graph has to be given in `ref_graph`:
 ```{r,netdisgoldstand,fig.align='center',fig.dim=c(7,7)}
-netdis_mat_gst <- netdis_many_to_many(graphs = glist,
-                                      ref_graph = gst
-                                      )
-netdis.plot(netdislist = netdis_mat_gst,whatrow = 2, main = "Netdis with reference graph")
+netdis_mat_gst <- netdis_many_to_many(
+  graphs = glist,
+  ref_graph = gst
+)
+netdis.plot(netdislist = netdis_mat_gst, whatrow = 2, main = "Netdis with reference graph")
 ```
 
 ## Using Netdis with a constant valued expectation, $E_w=k$
 
 For this variant please set `ref_graph` to the desired constant $k$. In this example we consider $k=0$ and $k=5$. Considering $k=0$ is equivalent to computing Netdis without background expectations:
 ```{r,netdisconstant,fig.align='center',fig.dim=c(7,7)}
-netdis_mat_zero <- netdis_many_to_many(graphs = glist,
-                                      ref_graph = 0
-                                      )
-netdis.plot(netdislist = netdis_mat_zero,whatrow = 2, main = "Netdis Ew=0")
-
-netdis_mat_5 <- netdis_many_to_many(graphs = glist,
-                                      ref_graph = 5
-                                      )
-netdis.plot(netdislist = netdis_mat_5,whatrow = 2, main = "Netdis Ew=5")
+netdis_mat_zero <- netdis_many_to_many(
+  graphs = glist,
+  ref_graph = 0
+)
+netdis.plot(netdislist = netdis_mat_zero, whatrow = 2, main = "Netdis Ew=0")
+
+netdis_mat_5 <- netdis_many_to_many(
+  graphs = glist,
+  ref_graph = 5
+)
+netdis.plot(netdislist = netdis_mat_5, whatrow = 2, main = "Netdis Ew=5")
 ```
 
 
 ## Using Netdis-GP, Geometric-Poisson approximation for $E_w$
 In order to obtain the Netdis-GP variant set `ref_graph=NULL` (default).
 ```{r,fig.align='center',fig.dim=c(7,7)}
-netdisgp_mat <- netdis_many_to_many(graphs = glist,
-                                      ref_graph = NULL
-                                      )
+netdisgp_mat <- netdis_many_to_many(
+  graphs = glist,
+  ref_graph = NULL
+)
 netdis.plot(netdisgp_mat, whatrow = 2, main = "Netdis-GP")
 ```
 
@@ -103,7 +107,7 @@ mybinning <- function(densities) {
   min_counts_per_interval <- 5
   num_intervals <- 3
   #
-  if( length(densities) < min_counts_per_interval) min_counts_per_interval <- length(densities)
+  if (length(densities) < min_counts_per_interval) min_counts_per_interval <- length(densities)
   breaks <- adaptive_breaks(densities,
     min_count = min_counts_per_interval,
     breaks = num_intervals
@@ -118,7 +122,7 @@ mybinning <- function(densities) {
     breaks = breaks
   )
 }
-  
+
 # Let us see an example output of the binning function
 binning_example <- mybinning(runif(20))
 binning_example$breaks
@@ -126,12 +130,13 @@ binning_example$interval_indexes
 binning_example$densities
 
 # Calculate Netdis
-netdisgp_mat_mybin <- netdis_many_to_many(graphs = glist,
-                                      ref_graph = NULL, 
-                                      binning_fn = mybinning
-                                      )
+netdisgp_mat_mybin <- netdis_many_to_many(
+  graphs = glist,
+  ref_graph = NULL,
+  binning_fn = mybinning
+)
 
-netdis.plot(netdislist = netdisgp_mat_mybin,whatrow = 2, main = "Netdis-GP with mybinning")
+netdis.plot(netdislist = netdisgp_mat_mybin, whatrow = 2, main = "Netdis-GP with mybinning")
 ```
 
 Note that whenever $E_w$ is taken as a constant value, then the binning will not have an effect on the  computation of Netdis. 
@@ -163,42 +168,45 @@ These values can be directly imputed and changed into the shortcut Netdis functi
 
 ```{r,fig.align='center',fig.dim=c(7,7)}
 # (We only recommend changing these default values for those users that have a clear understanding of graph theory behind it)
-#(change values with care as not all combinations may be possible).
+# (change values with care as not all combinations may be possible).
 
-#Defining a new binning function:
+# Defining a new binning function:
 binning_fn <- purrr::partial(binned_densities_adaptive,
-                             min_counts_per_interval = 10, #10-egos required per bin
-                             num_intervals = 20) #Start binning with 20 bins
-
-#Changing parameter values in Netdis:
-netdisgp_mat_custom <- netdis_many_to_many(graphs = glist,
-                                      ref_graph = NULL,
-                                      max_graphlet_size = 4, #Subgraphs/graphlets up to size 4 considered.
-                                      neighbourhood_size = 3,# 3-step ego-networks
-                                      min_ego_nodes = 5, #ego-networks with at least five nodes
-                                      min_ego_edges = 4, #ego-networks with at least 4 edges
-                                      binning_fn = binning_fn #Providing a custom binning function
-                                    )
+  min_counts_per_interval = 10, # 10-egos required per bin
+  num_intervals = 20
+) # Start binning with 20 bins
+
+# Changing parameter values in Netdis:
+netdisgp_mat_custom <- netdis_many_to_many(
+  graphs = glist,
+  ref_graph = NULL,
+  max_graphlet_size = 4, # Subgraphs/graphlets up to size 4 considered.
+  neighbourhood_size = 3, # 3-step ego-networks
+  min_ego_nodes = 5, # ego-networks with at least five nodes
+  min_ego_edges = 4, # ego-networks with at least 4 edges
+  binning_fn = binning_fn # Providing a custom binning function
+)
 ```
 
 Here the default parameters are used, and a heatmap of the result of Netdis with default parameters and Netdis with the previously modified parameters is given:
 ```{r ,fig.align='center',fig.dim=c(7,7)}
-#Default binning
+# Default binning
 binning_fn <- purrr::partial(binned_densities_adaptive,
-                             min_counts_per_interval = min_bin_count,
-                             num_intervals = num_bins)
-#Default computation of Netdis
-netdisgp_mat <- netdis_many_to_many(graphs = glist,
-                                      ref_graph = NULL,
-                                      max_graphlet_size = max_graphlet_size,
-                                      neighbourhood_size = neighbourhood_size,
-                                      min_ego_nodes = min_ego_nodes,
-                                      min_ego_edges = min_ego_edges,
-                                      binning_fn = binning_fn
-                                    )
-netdis.plot(netdislist = netdisgp_mat,whatrow = 2, main = "Netdis-GP: Default parameter values")
-netdis.plot(netdislist = netdisgp_mat_custom,whatrow = 2, main = "Netdis-GP: illustrative parameter changes")
-
+  min_counts_per_interval = min_bin_count,
+  num_intervals = num_bins
+)
+# Default computation of Netdis
+netdisgp_mat <- netdis_many_to_many(
+  graphs = glist,
+  ref_graph = NULL,
+  max_graphlet_size = max_graphlet_size,
+  neighbourhood_size = neighbourhood_size,
+  min_ego_nodes = min_ego_nodes,
+  min_ego_edges = min_ego_edges,
+  binning_fn = binning_fn
+)
+netdis.plot(netdislist = netdisgp_mat, whatrow = 2, main = "Netdis-GP: Default parameter values")
+netdis.plot(netdislist = netdisgp_mat_custom, whatrow = 2, main = "Netdis-GP: illustrative parameter changes")
 ```
 
 
diff --git a/vignettes/PreComputedProps.Rmd b/vignettes/PreComputedProps.Rmd
index 815f7e85..564c9b1c 100644
--- a/vignettes/PreComputedProps.Rmd
+++ b/vignettes/PreComputedProps.Rmd
@@ -41,35 +41,35 @@ NetEmd and Netdis use subgraph counts, however, NetEmd takes counts directly fro
 
 ```{r, netemd,fig.align='center',fig.dim=c(8,4)}
 # Create lattice networks
-gLat_1 <- igraph::graph.lattice(c(20,20)) 
-gLat_2 <- igraph::graph.lattice(c(44,44))  
+gLat_1 <- igraph::graph.lattice(c(20, 20))
+gLat_2 <- igraph::graph.lattice(c(44, 44))
 
-par(mfrow=c(1,2))
-plot(gLat_1,vertex.size=4,vertex.label=NA)
-plot(gLat_2,vertex.size=4,vertex.label=NA)
+par(mfrow = c(1, 2))
+plot(gLat_1, vertex.size = 4, vertex.label = NA)
+plot(gLat_2, vertex.size = 4, vertex.label = NA)
 ```
 
 The simple computation of NetEmd without pre-computed features:
 ```{r}
-netemd_one_to_one(graph_1=gLat_1,graph_2=gLat_2,feature_type="orbit",smoothing_window_width = 1)
+netemd_one_to_one(graph_1 = gLat_1, graph_2 = gLat_2, feature_type = "orbit", smoothing_window_width = 1)
 ```
 
 
 ### Providing a matrix of network features
 ```{r}
-counts_1= count_orbits_per_node(graph = gLat_1,max_graphlet_size = 5)
-counts_2= count_orbits_per_node(graph = gLat_2,max_graphlet_size = 5)
-head(counts_1[,1:4])
+counts_1 <- count_orbits_per_node(graph = gLat_1, max_graphlet_size = 5)
+counts_2 <- count_orbits_per_node(graph = gLat_2, max_graphlet_size = 5)
+head(counts_1[, 1:4])
 
-netemd_one_to_one(dhists_1=counts_1, dhists_2=counts_2,smoothing_window_width = 1)
+netemd_one_to_one(dhists_1 = counts_1, dhists_2 = counts_2, smoothing_window_width = 1)
 ```
 
 ### Providing the network features as lists of dhist objects
 ```{r}
-dhists_1<- graph_features_to_histograms(features_matrix = counts_1)
-dhists_2<- graph_features_to_histograms(features_matrix = counts_2)
+dhists_1 <- graph_features_to_histograms(features_matrix = counts_1)
+dhists_2 <- graph_features_to_histograms(features_matrix = counts_2)
 
-netemd_one_to_one(dhists_1=dhists_1, dhists_2=dhists_2,smoothing_window_width = 1)
+netemd_one_to_one(dhists_1 = dhists_1, dhists_2 = dhists_2, smoothing_window_width = 1)
 ```
 ___
 
@@ -78,35 +78,35 @@ ___
 Computation of the Laplacian and Normalized Laplacian:
 ```{r, netemdEigen}
 # Networks
-gLat_1 <- graph.lattice(c(20,20)) 
-gLat_2 <- graph.lattice(c(44,44))
+gLat_1 <- graph.lattice(c(20, 20))
+gLat_2 <- graph.lattice(c(44, 44))
 
-#Laplacian
-Lapg_1 <- igraph::laplacian_matrix(graph = gLat_1,normalized = FALSE,sparse = FALSE)
-Lapg_2 <- igraph::laplacian_matrix(graph = gLat_2,normalized = FALSE,sparse = FALSE)
+# Laplacian
+Lapg_1 <- igraph::laplacian_matrix(graph = gLat_1, normalized = FALSE, sparse = FALSE)
+Lapg_2 <- igraph::laplacian_matrix(graph = gLat_2, normalized = FALSE, sparse = FALSE)
 
-#Normalized Laplacian
-NLapg_1 <- igraph::laplacian_matrix(graph = gLat_1,normalized = TRUE,sparse = FALSE)
-NLapg_2 <- igraph::laplacian_matrix(graph = gLat_2,normalized = TRUE,sparse = FALSE)
+# Normalized Laplacian
+NLapg_1 <- igraph::laplacian_matrix(graph = gLat_1, normalized = TRUE, sparse = FALSE)
+NLapg_2 <- igraph::laplacian_matrix(graph = gLat_2, normalized = TRUE, sparse = FALSE)
 
 
 # Providing a matrix of network features (e.g. Spectra). (This may take a couple of minutes).
-spec_1 <- cbind(L.Spectra= eigen(Lapg_1)$values, NL.Spectra= eigen(NLapg_1)$values) 
-spec_2 <- cbind(L.Spectra= eigen(Lapg_2)$values, NL.Spectra= eigen(NLapg_2)$values)
+spec_1 <- cbind(L.Spectra = eigen(Lapg_1)$values, NL.Spectra = eigen(NLapg_1)$values)
+spec_2 <- cbind(L.Spectra = eigen(Lapg_2)$values, NL.Spectra = eigen(NLapg_2)$values)
 
 head(spec_1)
 ```
 
 Similarly to counts, all other features can be given as a matrix or as dhist objects:
 ```{r}
-netemd_one_to_one(dhists_1 = spec_1,dhists_2 = spec_2, smoothing_window_width = 0)
+netemd_one_to_one(dhists_1 = spec_1, dhists_2 = spec_2, smoothing_window_width = 0)
 
 
 # Providing pre-computed dhist objects from network features
-dhists_1<- graph_features_to_histograms(spec_1)
-dhists_2<- graph_features_to_histograms(spec_2)
+dhists_1 <- graph_features_to_histograms(spec_1)
+dhists_2 <- graph_features_to_histograms(spec_2)
 
-netemd_one_to_one(dhists_1=dhists_1, dhists_2=dhists_2, smoothing_window_width = 0)
+netemd_one_to_one(dhists_1 = dhists_1, dhists_2 = dhists_2, smoothing_window_width = 0)
 ```
 -------------------------
 
@@ -124,15 +124,15 @@ The selection of a gold-standard graph as a substitute for $E_w$ could be done w
 source_dir <- system.file(file.path("extdata", "VRPINS"), package = "netdist")
 
 # Load query graphs as igraph objects
-graph_1 <- read_simple_graph(file.path(source_dir, "EBV.txt"),format = "ncol")
-graph_2 <- read_simple_graph(file.path(source_dir, "ECL.txt"),format = "ncol")
+graph_1 <- read_simple_graph(file.path(source_dir, "EBV.txt"), format = "ncol")
+graph_2 <- read_simple_graph(file.path(source_dir, "ECL.txt"), format = "ncol")
 ```
 
 For illustration purposes consider the lattice networks as possible gold-standard networks:
 ```{r,fig.align='center'}
 # Lattice graphs to be used as gold-standard as a reference point comparison
-goldstd_1 <- igraph::graph.lattice(c(20,20)) #Graph with 8^2 nodes
-goldstd_2 <- igraph::graph.lattice(c(44,44)) #Graph with 44^2 nodes
+goldstd_1 <- igraph::graph.lattice(c(20, 20)) # Graph with 8^2 nodes
+goldstd_2 <- igraph::graph.lattice(c(44, 44)) # Graph with 44^2 nodes
 ```
 
 Now obtain the subgraph counts for all networks.
@@ -146,17 +146,17 @@ props_goldstd_2 <- count_graphlets_ego(graph = goldstd_2)
 
 Compute Netdis using the pre-computed counts and any of the example gold-standard networks.
 ```{r}
-netdis_one_to_one(graphlet_counts_1= props_1,graphlet_counts_2= props_2, graphlet_counts_ref = props_goldstd_1)
-netdis_one_to_one(graphlet_counts_1= props_1,graphlet_counts_2= props_2, graphlet_counts_ref = props_goldstd_2)
+netdis_one_to_one(graphlet_counts_1 = props_1, graphlet_counts_2 = props_2, graphlet_counts_ref = props_goldstd_1)
+netdis_one_to_one(graphlet_counts_1 = props_1, graphlet_counts_2 = props_2, graphlet_counts_ref = props_goldstd_2)
 ```
 
 Comparison to the result of Netdis without pre-computed counts.
 ```{r}
 # Netdis using the goldstd_1 graph as gold-standard reference point
-netdis_one_to_one(graph_1= graph_1, graph_2= graph_2,  ref_graph = goldstd_1)
+netdis_one_to_one(graph_1 = graph_1, graph_2 = graph_2, ref_graph = goldstd_1)
 
 # Netdis using the goldstd_2 graph as gold-standard reference point
-netdis_one_to_one(graph_1= graph_1, graph_2= graph_2,  ref_graph = goldstd_2)
+netdis_one_to_one(graph_1 = graph_1, graph_2 = graph_2, ref_graph = goldstd_2)
 ```
 
 
@@ -169,17 +169,17 @@ This Netdis variant focuses on detecting more meso-level discrepancies between t
 source_dir <- system.file(file.path("extdata", "VRPINS"), package = "netdist")
 
 # Load query graphs as igraph objects
-graph_1 <- read_simple_graph(file.path(source_dir, "EBV.txt"),format = "ncol")
-graph_2 <- read_simple_graph(file.path(source_dir, "ECL.txt"),format = "ncol")
+graph_1 <- read_simple_graph(file.path(source_dir, "EBV.txt"), format = "ncol")
+graph_2 <- read_simple_graph(file.path(source_dir, "ECL.txt"), format = "ncol")
 
-# Netdis using the Geometric-Poisson approximation as a way to obtain background expectations. 
-netdis_one_to_one(graph_1= graph_1, graph_2= graph_2,  ref_graph = NULL)
+# Netdis using the Geometric-Poisson approximation as a way to obtain background expectations.
+netdis_one_to_one(graph_1 = graph_1, graph_2 = graph_2, ref_graph = NULL)
 
 # Providing pre-calculated subgraph counts.
 props_1 <- count_graphlets_ego(graph = graph_1)
 props_2 <- count_graphlets_ego(graph = graph_2)
 
-netdis_one_to_one(graphlet_counts_1= props_1,graphlet_counts_2= props_2, ref_graph = NULL)
+netdis_one_to_one(graphlet_counts_1 = props_1, graphlet_counts_2 = props_2, ref_graph = NULL)
 ```
 
 
@@ -192,17 +192,17 @@ Comparing the networks via their observed ego counts without centring them, (equ
 source_dir <- system.file(file.path("extdata", "VRPINS"), package = "netdist")
 
 # Load query graphs as igraph objects
-graph_1 <- read_simple_graph(file.path(source_dir, "EBV.txt"),format = "ncol")
-graph_2 <- read_simple_graph(file.path(source_dir, "ECL.txt"),format = "ncol")
+graph_1 <- read_simple_graph(file.path(source_dir, "EBV.txt"), format = "ncol")
+graph_2 <- read_simple_graph(file.path(source_dir, "ECL.txt"), format = "ncol")
 
-#Netdis using no expectations (or equivalently, expectation equal to zero).
-netdis_one_to_one(graph_1= graph_1, graph_2= graph_2,  ref_graph = 0)
+# Netdis using no expectations (or equivalently, expectation equal to zero).
+netdis_one_to_one(graph_1 = graph_1, graph_2 = graph_2, ref_graph = 0)
 
 # Providing pre-calculated subgraph counts.
 props_1 <- count_graphlets_ego(graph = graph_1)
 props_2 <- count_graphlets_ego(graph = graph_2)
 
-netdis_one_to_one(graphlet_counts_1= props_1,graphlet_counts_2= props_2, ref_graph = 0)  
+netdis_one_to_one(graphlet_counts_1 = props_1, graphlet_counts_2 = props_2, ref_graph = 0)
 ```
 
 -------------------------
diff --git a/vignettes/default_pairwise_usage.Rmd b/vignettes/default_pairwise_usage.Rmd
index 74db95f1..6041a04d 100644
--- a/vignettes/default_pairwise_usage.Rmd
+++ b/vignettes/default_pairwise_usage.Rmd
@@ -11,8 +11,8 @@ vignette: >
   
 ```{r, include = FALSE}
 knitr::opts_chunk$set(
-collapse = TRUE,
-comment = "#>"
+  collapse = TRUE,
+  comment = "#>"
 )
 ```
 
@@ -41,25 +41,29 @@ Although the `virusppi` list of PPI networks is loaded along with the `netdist`
 source_dir <- system.file(file.path("extdata", "VRPINS"), package = "netdist")
 
 # Load query graphs as undirected igraph objects, with no loops, multiple edges or degree zero nodes.
-graph_1 <- read_simple_graph(file = file.path(source_dir, "EBV.txt"),
-                             format = "ncol")
+graph_1 <- read_simple_graph(
+  file = file.path(source_dir, "EBV.txt"),
+  format = "ncol"
+)
 
-graph_2 <- read_simple_graph(file = file.path(source_dir, "ECL.txt"),
-                             format = "ncol")
+graph_2 <- read_simple_graph(
+  file = file.path(source_dir, "ECL.txt"),
+  format = "ncol"
+)
 
 # Herpes virus EBV protein-protein interaction graph with 60 nodes and 208 edges.
 graph_1
-#Note this graph is the same as
+# Note this graph is the same as
 # virusppi$EBV
 
 # Herpes virus ECL protein-protein interaction graph with 1941 nodes and 3989 edges.
 graph_2
-#Note this graph is the same as
+# Note this graph is the same as
 # virusppi$ECL
 
-#A simple visualization of the graphs.
-plot(graph_1,vertex.size=4,vertex.label=NA)
-plot(graph_2,vertex.size=4,vertex.label=NA)
+# A simple visualization of the graphs.
+plot(graph_1, vertex.size = 4, vertex.label = NA)
+plot(graph_2, vertex.size = 4, vertex.label = NA)
 ```
 
 Other networks loaded in this package are discussed in ["NetEmd: World trade networks"](NetEmdTimeOrdering.html). You can also see `?virusppi` and `?worldtradesub`.
@@ -97,34 +101,36 @@ source_dir <- system.file(file.path("extdata", "VRPINS"), package = "netdist")
 # Load query graphs as igraph objects
 # Herpes virus EBV protein-protein interaction graph with 60 nodes and 208 edges.
 graph_1 <- read_simple_graph(file.path(source_dir, "EBV.txt"),
-                             format = "ncol")
+  format = "ncol"
+)
 
 # Herpes virus ECL protein-protein interaction graph with 1941 nodes and 3989 edges.
 graph_2 <- read_simple_graph(file.path(source_dir, "ECL.txt"),
-                             format = "ncol")
+  format = "ncol"
+)
 
 # One to one NetEmd comparison.
-netemd_one_to_one(graph_1=graph_1,graph_2=graph_2,feature_type="orbit",smoothing_window_width = 1)#Use of smoothing window 1 is given for discrete integer distributions. If the network features are considered continuous variables smoothing_window_width equal to zero is recommended.
+netemd_one_to_one(graph_1 = graph_1, graph_2 = graph_2, feature_type = "orbit", smoothing_window_width = 1) # Use of smoothing window 1 is given for discrete integer distributions. If the network features are considered continuous variables smoothing_window_width equal to zero is recommended.
 ```
 
 ## Comparing two graphs with NetEmd via their Laplacian spectrum
 ```{r, netemdEigen,fig.align='center'}
-#Laplacian
-Lapg_1 <- igraph::laplacian_matrix(graph = graph_1,normalized = FALSE,sparse = FALSE)
-Lapg_2 <- igraph::laplacian_matrix(graph = graph_2,normalized = FALSE,sparse = FALSE)
+# Laplacian
+Lapg_1 <- igraph::laplacian_matrix(graph = graph_1, normalized = FALSE, sparse = FALSE)
+Lapg_2 <- igraph::laplacian_matrix(graph = graph_2, normalized = FALSE, sparse = FALSE)
 
-#Normalized Laplacian
-NLapg_1 <- igraph::laplacian_matrix(graph = graph_1,normalized = TRUE,sparse = FALSE)
-NLapg_2 <- igraph::laplacian_matrix(graph = graph_2,normalized = TRUE,sparse = FALSE)
+# Normalized Laplacian
+NLapg_1 <- igraph::laplacian_matrix(graph = graph_1, normalized = TRUE, sparse = FALSE)
+NLapg_2 <- igraph::laplacian_matrix(graph = graph_2, normalized = TRUE, sparse = FALSE)
 
-#Spectra (this may take a couple of minutes).
-props_1 <- cbind(L.Spectra= eigen(Lapg_1)$values, NL.Spectra= eigen(NLapg_1)$values) 
-props_2 <- cbind(L.Spectra= eigen(Lapg_2)$values, NL.Spectra= eigen(NLapg_2)$values) 
+# Spectra (this may take a couple of minutes).
+props_1 <- cbind(L.Spectra = eigen(Lapg_1)$values, NL.Spectra = eigen(NLapg_1)$values)
+props_2 <- cbind(L.Spectra = eigen(Lapg_2)$values, NL.Spectra = eigen(NLapg_2)$values)
 
-head(props_1,n=3)
-head(props_2,n=3)
+head(props_1, n = 3)
+head(props_2, n = 3)
 
-netemd_one_to_one(dhists_1 = props_1,dhists_2 = props_2,smoothing_window_width = 0)#If the network features are considered continuous variables smoothing_window_width equal to zero is recommended.
+netemd_one_to_one(dhists_1 = props_1, dhists_2 = props_2, smoothing_window_width = 0) # If the network features are considered continuous variables smoothing_window_width equal to zero is recommended.
 ```
 -------------------------
 
@@ -168,18 +174,18 @@ The selection of a gold-standard graph as a substitute for $E_w$ could be done w
 
 ```{r,netdisgoldstand,fig.align='center',fig.dim=c(6,6)}
 # Lattice graphs to be used as a gold-standard reference point
-goldstd_1 <- igraph::graph.lattice(c(8,8)) #Graph with 8^2 nodes
-goldstd_2 <- igraph::graph.lattice(c(44,44)) #Graph with 44^2 nodes
+goldstd_1 <- igraph::graph.lattice(c(8, 8)) # Graph with 8^2 nodes
+goldstd_2 <- igraph::graph.lattice(c(44, 44)) # Graph with 44^2 nodes
 
-plot(goldstd_1,vertex.size=4,vertex.label=NA)
-plot(goldstd_2,vertex.size=4,vertex.label=NA)
+plot(goldstd_1, vertex.size = 4, vertex.label = NA)
+plot(goldstd_2, vertex.size = 4, vertex.label = NA)
 
 
 # Netdis using the goldstd_1 graph as gold-standard reference point
-netdis_one_to_one(graph_1= graph_1, graph_2= graph_2,  ref_graph = goldstd_1)
+netdis_one_to_one(graph_1 = graph_1, graph_2 = graph_2, ref_graph = goldstd_1)
 
 # Netdis using the goldstd_2 graph as gold-standard reference point
-netdis_one_to_one(graph_1= graph_1, graph_2= graph_2,  ref_graph = goldstd_2)
+netdis_one_to_one(graph_1 = graph_1, graph_2 = graph_2, ref_graph = goldstd_2)
 ```
 
 ## Netdis-GP: Using a Geometric-Poisson approximation
@@ -201,8 +207,8 @@ where $l$ is the number of connected subgraphs of size $k$, for example, $l=6$ f
 This variant focuses on detecting more meso-level discrepancies between the ego-network structures.
 
 ```{r, netdisGP}
-#Netdis using the Geometric-Poisson approximation as a way to obtain background expectations. 
-netdis_one_to_one(graph_1= graph_1, graph_2= graph_2,  ref_graph = NULL)
+# Netdis using the Geometric-Poisson approximation as a way to obtain background expectations.
+netdis_one_to_one(graph_1 = graph_1, graph_2 = graph_2, ref_graph = NULL)
 ```
 
 
@@ -210,9 +216,8 @@ netdis_one_to_one(graph_1= graph_1, graph_2= graph_2,  ref_graph = NULL)
 Comparing the networks via their observed ego counts without centring them, (equivalent to using expectation equal to zero). This variant thus focus on detecting small discrepancies between the networks.
 
 ```{r,netdiszero}
-#Netdis using no expectations (or equivalently, expectation equal to zero).
-netdis_one_to_one(graph_1= graph_1, graph_2= graph_2,  ref_graph = 0)
-
+# Netdis using no expectations (or equivalently, expectation equal to zero).
+netdis_one_to_one(graph_1 = graph_1, graph_2 = graph_2, ref_graph = 0)
 ```
 
 -------------------------
diff --git a/vignettes/dendrogram_example_net_dis.Rmd b/vignettes/dendrogram_example_net_dis.Rmd
index 5c0a3445..1090a4ab 100644
--- a/vignettes/dendrogram_example_net_dis.Rmd
+++ b/vignettes/dendrogram_example_net_dis.Rmd
@@ -33,31 +33,35 @@ supported values for the `format` parameter.
 ```{r, message=FALSE}
 library("netdist")
 library("igraph")
-edge_format = "ncol"
+edge_format <- "ncol"
 # Load reference graph (used for Netdis. Not required for NetEmd
-ref_path = file.path(system.file(file.path("extdata", "random"), package = "netdist"),
-                     "ER_1250_10_1")
+ref_path <- file.path(
+  system.file(file.path("extdata", "random"), package = "netdist"),
+  "ER_1250_10_1"
+)
 ref_graph <- read_simple_graph(ref_path, format = edge_format)
 
 # Set source directory and file properties for Virus PPI graph edge files
 source_dir <- system.file(file.path("extdata", "VRPINS"),
-                          package = "netdist")
+  package = "netdist"
+)
 edge_format <- "ncol"
 file_pattern <- "*"
 
 # Load all graphs in the source folder matching the filename pattern
 query_graphs <- read_simple_graphs(source_dir,
-                                   format = edge_format, 
-                                   pattern = file_pattern)
+  format = edge_format,
+  pattern = file_pattern
+)
 print(names(query_graphs))
 ```
 
 ```{r,fig.align='center',fig.dim=c(5,5)}
-plot(query_graphs$EBV,vertex.label=NA,vertex.size=8)
-plot(query_graphs$`HSV-1`,vertex.label=NA,vertex.size=8)
-plot(query_graphs$KSHV,vertex.label=NA,vertex.size=8)
-plot(query_graphs$VZV,vertex.label=NA,vertex.size=8)
-plot(query_graphs$ECL,vertex.label=NA,vertex.size=4)
+plot(query_graphs$EBV, vertex.label = NA, vertex.size = 8)
+plot(query_graphs$`HSV-1`, vertex.label = NA, vertex.size = 8)
+plot(query_graphs$KSHV, vertex.label = NA, vertex.size = 8)
+plot(query_graphs$VZV, vertex.label = NA, vertex.size = 8)
+plot(query_graphs$ECL, vertex.label = NA, vertex.size = 4)
 ```
 
 # Generate Netdis measures between each pair of query graphs
@@ -74,10 +78,12 @@ neighbourhood_size <- 2
 ## Netdis using an ER reference graph
 ```{r}
 # Calculate netdis measure for subgraphs up to size max_subgraph_size
-netdis_result <- netdis_many_to_many(graphs = query_graphs,
-                                     ref_graph = ref_graph,
-                                     max_graphlet_size = max_subgraph_size,
-                                     neighbourhood_size = neighbourhood_size)
+netdis_result <- netdis_many_to_many(
+  graphs = query_graphs,
+  ref_graph = ref_graph,
+  max_graphlet_size = max_subgraph_size,
+  neighbourhood_size = neighbourhood_size
+)
 
 # Netdis measure for subgraphs of size 3
 res3 <- netdis_result$netdis["netdis3", ]
@@ -105,31 +111,33 @@ cex <- 1
 # Dendrogram based on Netdis measure for subgraphs of size 3
 title <- paste("Netdis: subgraph size = ", 3, sep = "")
 plot(phangorn::upgma(as.dist(netdis3_mat), method = "average"),
-     use.edge.length = FALSE, 
-     edge.width = cex*2,
-     main = title,
-     cex.lab = cex, cex.axis = cex,
-     cex.main = cex, cex.sub = cex,
-     cex = cex)
+  use.edge.length = FALSE,
+  edge.width = cex * 2,
+  main = title,
+  cex.lab = cex, cex.axis = cex,
+  cex.main = cex, cex.sub = cex,
+  cex = cex
+)
 
 # Dendrogram based on Netdis measure for subgraphs of size 4
-title = paste("Netdis: subgraph size = ", 4, sep = "")
+title <- paste("Netdis: subgraph size = ", 4, sep = "")
 plot(phangorn::upgma(as.dist(netdis4_mat), method = "average"),
-     use.edge.length = FALSE, 
-     edge.width = cex*2,
-     main = title,
-     cex.lab = cex, cex.axis = cex,
-     cex.main = cex, cex.sub = cex,
-     cex = cex)
+  use.edge.length = FALSE,
+  edge.width = cex * 2,
+  main = title,
+  cex.lab = cex, cex.axis = cex,
+  cex.main = cex, cex.sub = cex,
+  cex = cex
+)
 ```
 
 <!-- comment 
 # Application using NetEmd
 ```{r,fig.align='center',fig.dim=c(6,6),eval=FALSE,include=FALSE}
-netemdlist <- netemd_many_to_many(graphs = query_graphs,smoothing_window_width = 0,mc.cores = 1)
+netemdlist <- netemd_many_to_many(graphs = query_graphs, smoothing_window_width = 0, mc.cores = 1)
 netemd_mat <- cross_comp_to_matrix(measure = netemdlist$netemds, cross_comparison_spec = netemdlist$comp_spec)
 
-netemdlist_smooth <- netemd_many_to_many(graphs = query_graphs,smoothing_window_width = 1,mc.cores = 1)
+netemdlist_smooth <- netemd_many_to_many(graphs = query_graphs, smoothing_window_width = 1, mc.cores = 1)
 netemd_mat_smooth <- cross_comp_to_matrix(measure = netemdlist_smooth$netemds, cross_comparison_spec = netemdlist_smooth$comp_spec)
 
 par(mfrow = c(1, 2))
@@ -137,23 +145,24 @@ cex <- 1
 
 graphNetEMDdists_smooth <- as.dist(netemd_mat_smooth)
 plot(phangorn::upgma(as.dist(graphNetEMDdists_smooth), method = "average"),
-     use.edge.length = FALSE, 
-     edge.width = cex*2,
-     main = "NetEmd: with smoothing",
-     cex.lab = cex, cex.axis = cex,
-     cex.main = cex, cex.sub = cex,
-     cex = cex)
+  use.edge.length = FALSE,
+  edge.width = cex * 2,
+  main = "NetEmd: with smoothing",
+  cex.lab = cex, cex.axis = cex,
+  cex.main = cex, cex.sub = cex,
+  cex = cex
+)
 
 
 graphNetEMDdists <- as.dist(netemd_mat)
 plot(phangorn::upgma(as.dist(graphNetEMDdists), method = "average"),
-     use.edge.length = FALSE, 
-     edge.width = cex*2,
-     main = "NetEmd: without smoothing",
-     cex.lab = cex, cex.axis = cex,
-     cex.main = cex, cex.sub = cex,
-     cex = cex)
-
+  use.edge.length = FALSE,
+  edge.width = cex * 2,
+  main = "NetEmd: without smoothing",
+  cex.lab = cex, cex.axis = cex,
+  cex.main = cex, cex.sub = cex,
+  cex = cex
+)
 ```
 -->
 
diff --git a/vignettes/dendrogram_example_net_emd.Rmd b/vignettes/dendrogram_example_net_emd.Rmd
index 4a70fb8d..e0601588 100644
--- a/vignettes/dendrogram_example_net_emd.Rmd
+++ b/vignettes/dendrogram_example_net_emd.Rmd
@@ -13,41 +13,42 @@ vignette: >
 library("netdist")
 # Set source directory and file properties for Virus PPI graph edge files
 source_dir <- system.file(file.path("extdata", "VRPINS"), package = "netdist")
-edge_format = "ncol"
-file_pattern = ".txt"
+edge_format <- "ncol"
+file_pattern <- ".txt"
 
-# Calculate graphlet-based degree distributions for all orbits in graphlets 
-# comprising up to 4 nodes for all graphs. This only needs to be done once 
-# per graph (feature_type = "orbit", max_graphlet_size = 4).. 
+# Calculate graphlet-based degree distributions for all orbits in graphlets
+# comprising up to 4 nodes for all graphs. This only needs to be done once
+# per graph (feature_type = "orbit", max_graphlet_size = 4)..
 # If feature_type is set to "feature_type", orbit counts for orbits in the
 # same graphlet will be summed to generate graphlet counts
-# If max_graphlet_size is set to 5, graphlet-based degree distributions will  
+# If max_graphlet_size is set to 5, graphlet-based degree distributions will
 # be calculated for graphlets comprising up to 5 nodes.
 virus_gdds <- gdd_for_all_graphs(
-  source_dir = source_dir, format = edge_format, pattern = file_pattern, 
-  feature_type = "orbit", max_graphlet_size = 4)
+  source_dir = source_dir, format = edge_format, pattern = file_pattern,
+  feature_type = "orbit", max_graphlet_size = 4
+)
 names(virus_gdds)
 
-# Compute NetEMDs between all virus PPI graphs based on the computed graphlet- 
+# Compute NetEMDs between all virus PPI graphs based on the computed graphlet-
 # based degree distributions using the default fast "optimise" method and no
 # smoothing (default). The "optimise" method uses the built-in R optimise
 # function to efficiently find the offset with the minimum EMD, but is not
 # guaranteed to find the global minimum if EMD as a function of offset
-# is non-convex and/or multimodal. The smoothing window width determines 
+# is non-convex and/or multimodal. The smoothing window width determines
 # whether to calculate the NetEMD from the unaltered discrete GDD histograms
-# (smoothing_window_width = 0; default) or to first apply "nearest neighbour" 
-# smoothing by "smearing" the discrete GDD histogram point masses across bins 
+# (smoothing_window_width = 0; default) or to first apply "nearest neighbour"
+# smoothing by "smearing" the discrete GDD histogram point masses across bins
 # of unit width (smoothing_window_width = 1). Returns a named list containing:
-# (i) the NetEMDs and (ii) a table containing the graph names and indices 
+# (i) the NetEMDs and (ii) a table containing the graph names and indices
 # within the input GDD list for each pair of graphs compared.
-res <- netemd_many_to_many(dhists= virus_gdds, smoothing_window_width = 0)
+res <- netemd_many_to_many(dhists = virus_gdds, smoothing_window_width = 0)
 
-# You can also specify method = "fixed_step" to use the much slower method of 
-# exhaustively evaluating the EMD at all offsets separated by a fixed step. 
-# The default step size is 1/2 the the minimum spacing between locations in 
-# either histogram after normalising to unit variance. However, you can 
+# You can also specify method = "fixed_step" to use the much slower method of
+# exhaustively evaluating the EMD at all offsets separated by a fixed step.
+# The default step size is 1/2 the the minimum spacing between locations in
+# either histogram after normalising to unit variance. However, you can
 # specifiy your own fixed step using the optional "step_size" parameter.
-# Note that this step size is applied to the histograms after they have been 
+# Note that this step size is applied to the histograms after they have been
 # normalised to unit variance
 
 # Convert to matrix for input to dendrogram method
@@ -56,25 +57,27 @@ netemd_mat
 ```
 
 ```{r}
-cex=1
-title = paste("NetEMD: max graphlet size = ", 4, sep = "")
-plot(phangorn::upgma(as.dist(netemd_mat), method="average"), use.edge.length=FALSE, 
-     edge.width=cex*2, main=title, cex.lab=cex, cex.axis=cex, cex.main=cex, 
-     cex.sub=cex, cex=cex)
+cex <- 1
+title <- paste("NetEMD: max graphlet size = ", 4, sep = "")
+plot(phangorn::upgma(as.dist(netemd_mat), method = "average"),
+  use.edge.length = FALSE,
+  edge.width = cex * 2, main = title, cex.lab = cex, cex.axis = cex, cex.main = cex,
+  cex.sub = cex, cex = cex
+)
 
-# The gdd_for_all_graphs and netemd_many_to_many functions will run in 
+# The gdd_for_all_graphs and netemd_many_to_many functions will run in
 # parallel using multiple threads where supported. The number of threads
-# used is determined by the global R option "mc.cores". You can inspect the 
-# current value of this using options("mc.cores") and set it with 
+# used is determined by the global R option "mc.cores". You can inspect the
+# current value of this using options("mc.cores") and set it with
 # options("mc.cores" = <num_cores>). To fully utilise a modern consumer
-# processor, this should be set to 2x the number of available processor 
+# processor, this should be set to 2x the number of available processor
 # cores as each core supports two threads.
 ```
 
 ```{r}
-cex=1.5
-col <- colorRampPalette(colors = c("blue","white"))(100)
-title = paste("NetEMD: max graphlet size = ", 4, sep = "")
+cex <- 1.5
+col <- colorRampPalette(colors = c("blue", "white"))(100)
+title <- paste("NetEMD: max graphlet size = ", 4, sep = "")
 heatmap(netemd_mat, Rowv = NULL, Colv = NULL, col = col, main = title, cexRow = cex, cexCol = cex, symm = TRUE)
 ```
 
diff --git a/vignettes/netdis_customisations.Rmd b/vignettes/netdis_customisations.Rmd
index 0c1c724a..0040450c 100644
--- a/vignettes/netdis_customisations.Rmd
+++ b/vignettes/netdis_customisations.Rmd
@@ -29,10 +29,10 @@ min_ego_nodes <- 3
 min_ego_edges <- 1
 
 # Reference graph
-ref_path <- system.file(file.path("extdata", "random", "ER_1250_10_1"), 
-                        package = "netdist")
+ref_path <- system.file(file.path("extdata", "random", "ER_1250_10_1"),
+  package = "netdist"
+)
 ref_graph <- read_simple_graph(ref_path, format = "ncol")
-
 ```
 
 ## Load query graphs
@@ -46,11 +46,12 @@ graphs <- read_simple_graphs(source_dir, format = "ncol", pattern = "*")
 
 # Calculate netdis statistics
 results <- netdis_many_to_many(graphs,
-                               ref_graph,
-                               max_graphlet_size = max_graphlet_size,
-                               neighbourhood_size = neighbourhood_size,
-                               min_ego_nodes = min_ego_nodes,
-                               min_ego_edges = min_ego_edges)
+  ref_graph,
+  max_graphlet_size = max_graphlet_size,
+  neighbourhood_size = neighbourhood_size,
+  min_ego_nodes = min_ego_nodes,
+  min_ego_edges = min_ego_edges
+)
 
 print(results$netdis)
 print(results$comp_spec)
@@ -60,23 +61,23 @@ print(results$comp_spec)
 ```{r}
 
 binning_fn <- purrr::partial(binned_densities_adaptive,
-                             min_counts_per_interval = 10,
-                             num_intervals = 50)
+  min_counts_per_interval = 10,
+  num_intervals = 50
+)
 
 
 # Calculate netdis statistics
 results <- netdis_many_to_many(graphs,
-                               ref_graph,
-                               max_graphlet_size = max_graphlet_size,
-                               neighbourhood_size = neighbourhood_size,
-                               min_ego_nodes = min_ego_nodes,
-                               min_ego_edges = min_ego_edges,
-                               binning_fn = binning_fn)
+  ref_graph,
+  max_graphlet_size = max_graphlet_size,
+  neighbourhood_size = neighbourhood_size,
+  min_ego_nodes = min_ego_nodes,
+  min_ego_edges = min_ego_edges,
+  binning_fn = binning_fn
+)
 
 print(results$netdis)
 print(results$comp_spec)
-
-
 ```
 
 ## With Modified Expected Counts: Geometric Poisson
@@ -84,17 +85,19 @@ print(results$comp_spec)
 bin_counts_fn <- density_binned_counts_gp
 
 exp_counts_fn <- purrr::partial(netdis_expected_counts,
-                                scale_fn = NULL)
+  scale_fn = NULL
+)
 
 # Calculate netdis statistics
 results <- netdis_many_to_many(graphs,
-                               ref_graph = NULL,
-                               max_graphlet_size = max_graphlet_size,
-                               neighbourhood_size = neighbourhood_size,
-                               min_ego_nodes = min_ego_nodes,
-                               min_ego_edges = min_ego_edges,
-                               bin_counts_fn = bin_counts_fn,
-                               exp_counts_fn = exp_counts_fn)
+  ref_graph = NULL,
+  max_graphlet_size = max_graphlet_size,
+  neighbourhood_size = neighbourhood_size,
+  min_ego_nodes = min_ego_nodes,
+  min_ego_edges = min_ego_edges,
+  bin_counts_fn = bin_counts_fn,
+  exp_counts_fn = exp_counts_fn
+)
 
 print(results$netdis)
 print(results$comp_spec)
@@ -108,15 +111,16 @@ exp_counts_fn <- netdis_expected_counts
 
 # Calculate netdis statistics
 results <- netdis_many_to_many(graphs,
-                               ref_graph = NULL,
-                               max_graphlet_size = max_graphlet_size,
-                               neighbourhood_size = neighbourhood_size,
-                               min_ego_nodes = min_ego_nodes,
-                               min_ego_edges = min_ego_edges,
-                               binning_fn = binning_fn,
-                               bin_counts_fn = bin_counts_fn,
-                               exp_counts_fn = exp_counts_fn)
+  ref_graph = NULL,
+  max_graphlet_size = max_graphlet_size,
+  neighbourhood_size = neighbourhood_size,
+  min_ego_nodes = min_ego_nodes,
+  min_ego_edges = min_ego_edges,
+  binning_fn = binning_fn,
+  bin_counts_fn = bin_counts_fn,
+  exp_counts_fn = exp_counts_fn
+)
 
 print(results$netdis)
 print(results$comp_spec)
-```
\ No newline at end of file
+```
diff --git a/vignettes/netdis_pairwise_comparisons.Rmd b/vignettes/netdis_pairwise_comparisons.Rmd
index d5809c4d..650454c5 100644
--- a/vignettes/netdis_pairwise_comparisons.Rmd
+++ b/vignettes/netdis_pairwise_comparisons.Rmd
@@ -33,10 +33,10 @@ min_bin_count <- 5
 num_bins <- 100
 
 # Reference graph
-ref_path <- system.file(file.path("extdata", "random", "ER_1250_10_1"), 
-                        package = "netdist")
+ref_path <- system.file(file.path("extdata", "random", "ER_1250_10_1"),
+  package = "netdist"
+)
 ref_graph <- read_simple_graph(ref_path, format = "ncol")
-
 ```
 
 ## Compare two graphs
@@ -45,18 +45,21 @@ ref_graph <- read_simple_graph(ref_path, format = "ncol")
 source_dir <- system.file(file.path("extdata", "VRPINS"), package = "netdist")
 
 graph_1 <- read_simple_graph(file.path(source_dir, "EBV.txt"),
-                             format = "ncol")
+  format = "ncol"
+)
 
 graph_2 <- read_simple_graph(file.path(source_dir, "ECL.txt"),
-                             format = "ncol")
+  format = "ncol"
+)
 
 # Calculate netdis statistics
 netdis_one_to_one(graph_1, graph_2,
-                  ref_graph,
-                  max_graphlet_size = max_graphlet_size,
-                  neighbourhood_size = neighbourhood_size,
-                  min_ego_nodes = min_ego_nodes,
-                  min_ego_edges = min_ego_edges)
+  ref_graph,
+  max_graphlet_size = max_graphlet_size,
+  neighbourhood_size = neighbourhood_size,
+  min_ego_nodes = min_ego_nodes,
+  min_ego_edges = min_ego_edges
+)
 ```
 
 ## Compare one graph to many other graphs
@@ -68,11 +71,12 @@ graphs_compare <- graphs[c("ECL", "HSV-1", "KSHV", "VZV")]
 
 # Calculate netdis statistics
 netdis_one_to_many(graph_1, graphs_compare,
-                   ref_graph,
-                   max_graphlet_size = max_graphlet_size,
-                   neighbourhood_size = neighbourhood_size,
-                   min_ego_nodes = min_ego_nodes,
-                   min_ego_edges = min_ego_edges)
+  ref_graph,
+  max_graphlet_size = max_graphlet_size,
+  neighbourhood_size = neighbourhood_size,
+  min_ego_nodes = min_ego_nodes,
+  min_ego_edges = min_ego_edges
+)
 ```
 
 ## Do pairwise netdis calculations for many graphs
@@ -83,12 +87,13 @@ graphs <- read_simple_graphs(source_dir, format = "ncol", pattern = "*")
 
 # Calculate netdis statistics
 results <- netdis_many_to_many(graphs,
-                               ref_graph,
-                               max_graphlet_size = max_graphlet_size,
-                               neighbourhood_size = neighbourhood_size,
-                               min_ego_nodes = min_ego_nodes,
-                               min_ego_edges = min_ego_edges)
+  ref_graph,
+  max_graphlet_size = max_graphlet_size,
+  neighbourhood_size = neighbourhood_size,
+  min_ego_nodes = min_ego_nodes,
+  min_ego_edges = min_ego_edges
+)
 
 print(results$netdis)
 print(results$comp_spec)
-```
\ No newline at end of file
+```
diff --git a/vignettes/quickstart_netdis_2graphs.Rmd b/vignettes/quickstart_netdis_2graphs.Rmd
index a55f1d82..87764511 100644
--- a/vignettes/quickstart_netdis_2graphs.Rmd
+++ b/vignettes/quickstart_netdis_2graphs.Rmd
@@ -23,11 +23,12 @@ source_dir <- system.file(file.path("extdata", "VRPINS"), package = "netdist")
 
 # Load query graphs
 graph_1 <- read_simple_graph(file.path(source_dir, "EBV.txt"),
-                             format = "ncol")
+  format = "ncol"
+)
 
 graph_2 <- read_simple_graph(file.path(source_dir, "ECL.txt"),
-                             format = "ncol")
-
+  format = "ncol"
+)
 ```
 
 ## Set Netdis parameters
@@ -50,15 +51,17 @@ num_bins <- 100
 ## Generate ego networks
 ```{r}
 # Get ego networks for query graphs and reference graph
-ego_1 <- make_named_ego_graph(graph_1, 
-                              order = neighbourhood_size, 
-                              min_ego_nodes = min_ego_nodes, 
-                              min_ego_edges = min_ego_edges)
-
-ego_2 <- make_named_ego_graph(graph_2, 
-                              order = neighbourhood_size, 
-                              min_ego_nodes = min_ego_nodes, 
-                              min_ego_edges = min_ego_edges)
+ego_1 <- make_named_ego_graph(graph_1,
+  order = neighbourhood_size,
+  min_ego_nodes = min_ego_nodes,
+  min_ego_edges = min_ego_edges
+)
+
+ego_2 <- make_named_ego_graph(graph_2,
+  order = neighbourhood_size,
+  min_ego_nodes = min_ego_nodes,
+  min_ego_edges = min_ego_edges
+)
 ```
 
 ## Count graphlets in ego networks
@@ -71,65 +74,76 @@ graphlet_counts_2 <- ego_to_graphlet_counts(ego_2, max_graphlet_size = max_graph
 ## Use a reference graph to calculate expected graphlet counts in ego network density bins
 ```{r}
 # Load reference graph
-ref_path <- system.file(file.path("extdata", "random", "ER_1250_10_1"), 
-                        package = "netdist")
+ref_path <- system.file(file.path("extdata", "random", "ER_1250_10_1"),
+  package = "netdist"
+)
 ref_graph <- read_simple_graph(ref_path, format = "ncol")
 
-ego_ref <- make_named_ego_graph(ref_graph, 
-                                order = neighbourhood_size, 
-                                min_ego_nodes = min_ego_nodes, 
-                                min_ego_edges = min_ego_edges)
+ego_ref <- make_named_ego_graph(ref_graph,
+  order = neighbourhood_size,
+  min_ego_nodes = min_ego_nodes,
+  min_ego_edges = min_ego_edges
+)
 
 graphlet_counts_ref <- ego_to_graphlet_counts(ego_ref, max_graphlet_size = max_graphlet_size)
 
 # Scale ego-network graphlet counts by dividing by total number of k-tuples in
 # ego-network (where k is graphlet size)
-scaled_graphlet_counts_ref <- scale_graphlet_counts_ego(graphlet_counts_ref, 
-                                                        max_graphlet_size)
+scaled_graphlet_counts_ref <- scale_graphlet_counts_ego(
+  graphlet_counts_ref,
+  max_graphlet_size
+)
 
 
 # Get ego-network densities
 densities_ref <- ego_network_density(graphlet_counts_ref)
 
 # Adaptively bin ref ego-network densities
-binned_densities <- binned_densities_adaptive(densities_ref, 
-                                              min_counts_per_interval = min_bin_count, 
-                                              num_intervals = num_bins)
+binned_densities <- binned_densities_adaptive(densities_ref,
+  min_counts_per_interval = min_bin_count,
+  num_intervals = num_bins
+)
 
 ref_ego_density_bins <- binned_densities$breaks
 
 # Average ref graphlet counts across density bins
 ref_binned_graphlet_counts <- mean_density_binned_graphlet_counts(
-                                  scaled_graphlet_counts_ref, 
-                                  binned_densities$interval_indexes)
-  
+  scaled_graphlet_counts_ref,
+  binned_densities$interval_indexes
+)
 ```
 
 
 ## Centre graphlet counts of query graphs based on statistics of reference graph
 ```{r}
 # Calculate expected graphlet counts (using ref graph ego network density bins)
-exp_graphlet_counts_1 <- netdis_expected_counts(graphlet_counts_1, 
-                                                                 ref_ego_density_bins, 
-                                                                 ref_binned_graphlet_counts,
-                                                                 max_graphlet_size,
-                                                                 scale_fn=count_graphlet_tuples)
+exp_graphlet_counts_1 <- netdis_expected_counts(graphlet_counts_1,
+  ref_ego_density_bins,
+  ref_binned_graphlet_counts,
+  max_graphlet_size,
+  scale_fn = count_graphlet_tuples
+)
 
 
-exp_graphlet_counts_2 <- netdis_expected_counts(graphlet_counts_2, 
-                                                                 ref_ego_density_bins, 
-                                                                 ref_binned_graphlet_counts,
-                                                                 max_graphlet_size,
-                                                                 scale_fn=count_graphlet_tuples)
+exp_graphlet_counts_2 <- netdis_expected_counts(graphlet_counts_2,
+  ref_ego_density_bins,
+  ref_binned_graphlet_counts,
+  max_graphlet_size,
+  scale_fn = count_graphlet_tuples
+)
 
 # Centre graphlet counts by subtracting expected counts
-centred_graphlet_counts_1 <- netdis_subtract_exp_counts(graphlet_counts_1,
-                                                        exp_graphlet_counts_1,
-                                                        max_graphlet_size)
-
-centred_graphlet_counts_2 <- netdis_subtract_exp_counts(graphlet_counts_2,
-                                                        exp_graphlet_counts_2,
-                                                        max_graphlet_size)
+centred_graphlet_counts_1 <- netdis_subtract_exp_counts(
+  graphlet_counts_1,
+  exp_graphlet_counts_1,
+  max_graphlet_size
+)
+
+centred_graphlet_counts_2 <- netdis_subtract_exp_counts(
+  graphlet_counts_2,
+  exp_graphlet_counts_2,
+  max_graphlet_size
+)
 ```
 
 ## Sum centred graphlet counts across all ego networks
@@ -142,9 +156,11 @@ sum_graphlet_counts_2 <- colSums(centred_graphlet_counts_2)
 ## Calculate netdis statistics
 ```{r}
 
-netdis_result <- netdis_uptok(sum_graphlet_counts_1, 
-                              sum_graphlet_counts_2, 
-                              max_graphlet_size)
+netdis_result <- netdis_uptok(
+  sum_graphlet_counts_1,
+  sum_graphlet_counts_2,
+  max_graphlet_size
+)
 
 print(netdis_result)
 ```

From f11f06837809b789f8f5c1a4a0183a6387a51b92 Mon Sep 17 00:00:00 2001
From: Oliver Strickson <ostrickson@turing.ac.uk>
Date: Tue, 7 Jun 2022 12:54:54 +0100
Subject: [PATCH 66/84] Regenerated NAMESPACE and RcppExports.R after
 devtools::document()

---
 NAMESPACE       | 10 ++--------
 R/RcppExports.R | 10 ++++++++++
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index e0d5d89d..85e26932 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,7 +1,8 @@
 # Generated by roxygen2: do not edit by hand
 
-export(NetEmdSmoothV2)
 export(NetEmdSmooth)
+export(NetEmdSmoothV2)
+export(NetEmdSmoothV2_old)
 export(adaptive_breaks)
 export(area_between_dhist_ecmfs)
 export(as_smoothed_dhist)
@@ -50,10 +51,7 @@ export(min_emd)
 export(min_emd_exhaustive)
 export(min_emd_optimise)
 export(min_emd_optimise_fast)
-export(netEMDSpeedTest)
 export(netEMDSpeedTestSmooth)
-export(net_emd)
-export(net_emds_for_all_graphs)
 export(netdis)
 export(netdis.plot)
 export(netdis_centred_graphlet_counts)
@@ -79,9 +77,5 @@ export(shift_dhist)
 export(simplify_graph)
 export(single_density_bin)
 export(sort_dhist)
-export(zeros_to_ones)
-import(Rcpp)
-importFrom(Rcpp,evalCpp)
 importFrom(Rcpp,sourceCpp)
 useDynLib(netdist, .registration=TRUE)
-exportPattern("^[[:alpha:]]+")
diff --git a/R/RcppExports.R b/R/RcppExports.R
index e718b542..38e022c2 100644
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -45,3 +45,13 @@ NetEmdSmoothV2 <- function(loc1, val1, binWidth1, loc2, val2, binWidth2) {
     .Call(`_netdist_NetEmdSmoothV2`, loc1, val1, binWidth1, loc2, val2, binWidth2)
 }
 
+#' @title
+#' Compute EMD
+NULL
+
+#'
+#' @export
+NetEmdSmoothV2_old <- function(loc1, val1, binWidth1, loc2, val2, binWidth2) {
+    .Call(`_netdist_NetEmdSmoothV2_old`, loc1, val1, binWidth1, loc2, val2, binWidth2)
+}
+

From f31de5b5ec3eef8ac29b09a25cae123bfb5cd81e Mon Sep 17 00:00:00 2001
From: jack89roberts <jroberts@turing.ac.uk>
Date: Tue, 7 Jun 2022 13:24:53 +0100
Subject: [PATCH 67/84] vignette line length fixes

---
 .github/workflows/build.yml              |  2 +-
 R/data.R                                 | 38 ++++++++---
 R/dhist.R                                |  6 +-
 vignettes/ManyToMany.Rmd                 | 76 ++++++++++++++++++----
 vignettes/NetEmdTimeOrdering.Rmd         | 26 ++++++--
 vignettes/NetdisGPStepByStep.Rmd         | 11 +++-
 vignettes/NetdisStepByStep.Rmd           | 23 +++++--
 vignettes/NewNetdisCustomisations.Rmd    | 41 +++++++++---
 vignettes/PreComputedProps.Rmd           | 82 ++++++++++++++++++------
 vignettes/default_pairwise_usage.Rmd     | 64 +++++++++++++-----
 vignettes/dendrogram_example_net_dis.Rmd | 21 ++++--
 vignettes/dendrogram_example_net_emd.Rmd | 23 +++++--
 vignettes/quickstart_netdis_2graphs.Rmd  | 15 ++++-
 13 files changed, 329 insertions(+), 99 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 160d59eb..1c1ff79a 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -61,7 +61,7 @@ jobs:
           git config --local user.name "$GITHUB_ACTOR"
           git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
           git add man/\* NAMESPACE
-          git commit -m "Update documentation" || echo "No changes to commit"
+          git commit -m "Update documentation (bot)" || echo "No changes to commit"
           git pull --ff-only
           git push origin
 
diff --git a/R/data.R b/R/data.R
index 4753ed66..688cee76 100644
--- a/R/data.R
+++ b/R/data.R
@@ -36,9 +36,17 @@
 #' }
 #'
 #' @format A list of \code{igraph} objects.
-#' @source \strong{PPI data (EBV, HSV-1, KSHV, VZV):} Fossum E, Friedel CC, Rajagopala SV, Titz B, Baiker A, Schmidt T, et al. (2009) Evolutionarily Conserved Herpesviral Protein Interaction Networks. PLoS Pathog 5(9): e1000570. \url{https://doi.org/10.1371/journal.ppat.1000570}. Data from Table S2 in the supporting information.
-#' @source \strong{PPI data (ECL):} Peregrín-Alvarez JM, Xiong X, Su C, Parkinson J (2009) The Modular Organization of Protein Interactions in Escherichia coli. PLoS Comput Biol 5(10): e1000523. \url{https://doi.org/10.1371/journal.pcbi.1000523}
-#' @source \strong{Taxonomy ground truth:} NCBI taxonomy database. \url{https://www.ncbi.nlm.nih.gov/taxonomy}
+#' @source \strong{PPI data (EBV, HSV-1, KSHV, VZV):} Fossum E, Friedel CC,
+#' Rajagopala SV, Titz B, Baiker A, Schmidt T, et al. (2009) Evolutionarily
+#' Conserved Herpesviral Protein Interaction Networks. PLoS Pathog 5(9):
+#' e1000570. \url{https://doi.org/10.1371/journal.ppat.1000570}. Data from Table
+#' S2 in the supporting information.
+#' @source \strong{PPI data (ECL):} Peregrín-Alvarez JM, Xiong X, Su C,
+#' Parkinson J (2009) The Modular Organization of Protein Interactions in
+#' Escherichia coli. PLoS Comput Biol 5(10): e1000523.
+#' \url{https://doi.org/10.1371/journal.pcbi.1000523}
+#' @source \strong{Taxonomy ground truth:} NCBI taxonomy database.
+#' \url{https://www.ncbi.nlm.nih.gov/taxonomy}
 #' @encoding UTF-8
 "virusppi"
 
@@ -48,16 +56,28 @@
 
 #' World trade networks from 1985–2014
 #'
-#' The world trade data set consists of a small sample of world trade networks for the years 2001-2014, and pre-computed subgraph counts of a larger set of world trade networks (1985–2014). The world trade networks are based on the data set from [Feenstra et al., 2005] for the years 1962- 2000 and on the United Nations division COMTRADE [Division, 2015] for the years 2001-2014.
+#' The world trade data set consists of a small sample of world trade networks
+#' for the years 2001-2014, and pre-computed subgraph counts of a larger set of
+#' world trade networks (1985–2014). The world trade networks are based on the
+#' data set from [Feenstra et al., 2005] for the years 1962- 2000 and on the
+#' United Nations division COMTRADE [Division, 2015] for the years 2001-2014.
 #'
 #' \itemize{
-#'  \item wtnets:  List of \code{igraph} objects providing the world trade networks from 2001–2014.
-#'  \item Counts:  Pre-computed graphlet counts for the world trade networks in the years 1985-2014.
+#'  \item wtnets:  List of \code{igraph} objects providing the world trade
+#'  networks from 2001–2014.
+#'  \item Counts:  Pre-computed graphlet counts for the world trade networks in
+#'  the years 1985-2014.
 #'  }
 #'
-#' @format A list of two elements. The first element, 'wtnets', is a list of \code{igraph} objects providing a small sample of world trade networks from 2001–2014. The second element, 'Counts', is a list of pre-computed subgraph counts of world trade networks in the years 1985-2014.
-#' @source \strong{World trade networks:}. United nations commodity trade statistics database (UN comtrade). http://comtrade.un.org/, 2015.
-#' @source \strong{Subgraph Counts:} Feenstra RC,Lipsey RE, Deng H, Ma AC, and Mo H. (2005) World trade flows: 1962-2000. Technical report, National Bureau of Economic Research. (See also https://cid.econ.ucdavis.edu/wix.html).
+#' @format A list of two elements. The first element, 'wtnets', is a list of
+#' \code{igraph} objects providing a small sample of world trade networks from
+#' 2001–2014. The second element, 'Counts', is a list of pre-computed subgraph
+#' counts of world trade networks in the years 1985-2014.
+#' @source \strong{World trade networks:}. United nations commodity trade
+#' statistics database (UN comtrade). http://comtrade.un.org/, 2015.
+#' @source \strong{Subgraph Counts:} Feenstra RC,Lipsey RE, Deng H, Ma AC, and
+#' Mo H. (2005) World trade flows: 1962-2000. Technical report, National Bureau
+#' of Economic Research. (See also https://cid.econ.ucdavis.edu/wix.html).
 #'
 #' @encoding UTF-8
 "worldtradesub"
diff --git a/R/dhist.R b/R/dhist.R
index 0ff24b9a..fe81d357 100644
--- a/R/dhist.R
+++ b/R/dhist.R
@@ -491,7 +491,8 @@ dhist_variance <- function(dhist) {
     bin_uppers <- mean_centred_locations + hw
     # See comment in issue #21 on Github repository for verification that E[X^2]
     # is calculated as below for a uniform bin
-    bin_x2_integrals <- (bin_lowers^2 + bin_uppers^2 + bin_lowers * bin_uppers) / 3
+    bin_x2_integrals <- (bin_lowers^2 + bin_uppers^2 +
+      bin_lowers * bin_uppers) / 3
     variance <- sum(dhist$masses * bin_x2_integrals) / sum(dhist$masses)
   }
   return(variance)
@@ -563,7 +564,8 @@ normalise_dhist_variance <- function(dhist) {
     # If smoothing_window_width not zero, then update it to reflect the variance
     # normalisation
     if (dhist$smoothing_window_width != 0) {
-      normalised_smoothing_window_width <- dhist$smoothing_window_width / std_dev
+      normalised_smoothing_window_width <-
+        dhist$smoothing_window_width / std_dev
       dhist <- update_dhist(
         dhist,
         smoothing_window_width = normalised_smoothing_window_width
diff --git a/vignettes/ManyToMany.Rmd b/vignettes/ManyToMany.Rmd
index 89dc2ea5..c69e9f21 100644
--- a/vignettes/ManyToMany.Rmd
+++ b/vignettes/ManyToMany.Rmd
@@ -67,9 +67,23 @@ plot(gTree_2, vertex.size = 0.8, vertex.label = NA)
 Subgraph count based NetEmd comparisons:
 ```{r, netemdorbits,fig.align='center'}
 # NetEMD using subgraph counts.
-glist <- list(Lat_1 = gLat_1, Lat_2 = gLat_2, Ring_1 = gRing_1, Ring_2 = gRing_1, Tree_1 = gTree_1, Tree_2 = gTree_2)
+glist <- list(
+  Lat_1 = gLat_1,
+  Lat_2 = gLat_2,
+  Ring_1 = gRing_1,
+  Ring_2 = gRing_1,
+  Tree_1 = gTree_1,
+  Tree_2 = gTree_2
+)
 
-netemdlist <- netemd_many_to_many(graphs = glist, smoothing_window_width = 1, mc.cores = 1) # Use of smoothing window 1 is given for discrete integer distributions. If the network features are considered continuous variables smoothing_window_width equal to zero is recommended.
+# Use of smoothing window 1 is given for discrete integer distributions. If the
+# network features are considered continuous variables smoothing_window_width
+# equal to zero is recommended.
+netemdlist <- netemd_many_to_many(
+  graphs = glist,
+  smoothing_window_width = 1,
+  mc.cores = 1
+)
 
 netemdlist
 ```
@@ -77,13 +91,19 @@ netemdlist
 To read the results in a matrix form:
 ```{r,fig.align='center'}
 # Creating a comparison matrix:
-mat <- cross_comp_to_matrix(measure = netemdlist$netemds, cross_comparison_spec = netemdlist$comp_spec)
+mat <- cross_comp_to_matrix(
+  measure = netemdlist$netemds, cross_comparison_spec = netemdlist$comp_spec
+)
 mat
 ```
 
 Illustration of the multiple NetEmd comparisons based on subgraph counts.
 ```{r,netemdorbitsPLOT,fig.align='center',fig.dim=c(8,8)}
-netemd.plot(netemdlist = netemdlist, clustering_method = "ward.D", main = "NetEmd subgraph counts")
+netemd.plot(
+  netemdlist = netemdlist,
+  clustering_method = "ward.D",
+  main = "NetEmd subgraph counts"
+)
 ```
 
 ## NetEmd using the Laplacian and Normalized Laplacian Spectrum
@@ -95,16 +115,25 @@ SPECT <- list()
 
 # This step may take several minutes.
 for (i in 1:length(glist)) {
-  Lapg <- igraph::laplacian_matrix(graph = glist[[i]], normalized = FALSE, sparse = FALSE)
-  NLap <- igraph::laplacian_matrix(graph = glist[[i]], normalized = TRUE, sparse = FALSE)
-  SPECT[[names(glist)[i]]] <- cbind(L.Spectra = eigen(Lapg)$values, NL.Spectra = eigen(NLap)$values)
+  Lapg <- igraph::laplacian_matrix(
+    graph = glist[[i]], normalized = FALSE, sparse = FALSE
+  )
+  NLap <- igraph::laplacian_matrix(
+    graph = glist[[i]], normalized = TRUE, sparse = FALSE
+  )
+  SPECT[[names(glist)[i]]] <- cbind(
+    L.Spectra = eigen(Lapg)$values, NL.Spectra = eigen(NLap)$values
+  )
 }
 str(SPECT)
 ```
 
 Compute NetEmd:
 ```{r}
-netemdlist <- netemd_many_to_many(dhists = SPECT, smoothing_window_width = 0) # Use of smoothing window 1 is given for discrete integer distributions. If the network features are considered continuous variables smoothing_window_width equal to zero is recommended.
+# Use of smoothing window 1 is given for discrete integer distributions. If the
+# network features are considered continuous variables smoothing
+# window_width equal to zero is recommended.
+netemdlist <- netemd_many_to_many(dhists = SPECT, smoothing_window_width = 0)
 
 netemdlist
 ```
@@ -112,7 +141,9 @@ netemdlist
 
 ### Illustration of the multiple NetEmd comparisons based on the Laplacian and Normalized Laplacian spectra
 ```{r,netemdspectrumPLOT ,fig.align='center',fig.dim=c(8,8)}
-netemd.plot(netemdlist = netemdlist, clustering_method = "ward.D", main = "NetEmd Spectra")
+netemd.plot(
+  netemdlist = netemdlist, clustering_method = "ward.D", main = "NetEmd Spectra"
+)
 ```
 
 
@@ -154,7 +185,14 @@ plot(gst_2, vertex.size = 0.8, vertex.label = NA)
 
 Obtain the comparison via Netdis using each of the reference graph networks.
 ```{r,netdisgoldstand ,fig.align='center'}
-glist <- list(Lat_1 = gLat_1, Lat_2 = gLat_2, Ring_1 = gRing_1, Ring_2 = gRing_1, Tree_1 = gTree_1, Tree_2 = gTree_2)
+glist <- list(
+  Lat_1 = gLat_1,
+  Lat_2 = gLat_2,
+  Ring_1 = gRing_1,
+  Ring_2 = gRing_1,
+  Tree_1 = gTree_1,
+  Tree_2 = gTree_2
+)
 
 # Netdis using the goldstd_1 graph as gold-standard reference point
 netdis_mat_gst1 <- netdis_many_to_many(graphs = glist, ref_graph = gst_1)
@@ -170,18 +208,28 @@ netdis_mat_gst2
 To read the results in a matrix form:
 ```{r,fig.align='center'}
 # Creating a comparison matrix:
-cross_comp_to_matrix(measure = netdis_mat_gst1$netdis, cross_comparison_spec = netdis_mat_gst1$comp_spec)
+cross_comp_to_matrix(
+  measure = netdis_mat_gst1$netdis,
+  cross_comparison_spec = netdis_mat_gst1$comp_spec
+)
 
-cross_comp_to_matrix(measure = netdis_mat_gst2$netdis, cross_comparison_spec = netdis_mat_gst2$comp_spec)
+cross_comp_to_matrix(
+  measure = netdis_mat_gst2$netdis,
+  cross_comparison_spec = netdis_mat_gst2$comp_spec
+)
 ```
 
 Heatmap of the Netdis comparisons:
 ```{r,netdisgoldstandPLOT ,fig.align='center',fig.dim=c(8,8)}
 # Network comparisons heatmap with Gold-Standard 1
-netdis.plot(netdislist = netdis_mat_gst1, whatrow = 2, main = "Netdis GoldStd-1")
+netdis.plot(
+  netdislist = netdis_mat_gst1, whatrow = 2, main = "Netdis GoldStd-1"
+)
 
 # Network comparisons heatmap with Gold-Standard 2
-netdis.plot(netdislist = netdis_mat_gst2, whatrow = 2, main = "Netdis GoldStd-2")
+netdis.plot(
+  netdislist = netdis_mat_gst2, whatrow = 2, main = "Netdis GoldStd-2"
+)
 ```
 
 
diff --git a/vignettes/NetEmdTimeOrdering.Rmd b/vignettes/NetEmdTimeOrdering.Rmd
index 2a199a41..44a48e7a 100644
--- a/vignettes/NetEmdTimeOrdering.Rmd
+++ b/vignettes/NetEmdTimeOrdering.Rmd
@@ -47,8 +47,8 @@ plot(wtnets$wtn2001, vertex.size = 5, vertex.label.cex = 0.4)
   
 In this example **NetEmd** will consider orbit counts of subgraphs containing up to 5 nodes.  If NetEmd is to be called a single time, then the command `netemd_many_to_many(graphs = wtnets)` would suffice. The following code provides such an example: 
 ```{r}
-# As the trade networks are considerable dense, this example first considers a small number of networks.
-# This example may take some minutes to run.
+# As the trade networks are considerable dense, this example first considers a
+# small number of networks. This example may take some minutes to run.
 netemd_result <- netemd_many_to_many(graphs = wtnets[1:4], mc.cores = 1)
 
 print(netemd_result)
@@ -60,11 +60,15 @@ However, if there are pre-computed counts or features NetEmd can be called via t
 World trade networks consist of relatively dense networks, thus leading to longer computational times for the calculation of the subgraph counts. Hence, it is advisable to pre-compute counts in case there is a need to call NetEmd multiple times. This may, for example, be the case when adding a new network to the data set. The following illustrates the extraction of subgraph counts for the small network sample.
 
 ```{r}
-# This example may take more than a few minutes to run  (approx. 20 mins) , and it is not necessary to run it for the upcoming examples as a larger set of counts has been already computed.
-if (FALSE) { # It is not necessary to run, as these counts are already available in.
+# This example may take more than a few minutes to run  (approx. 20 mins), and
+# it is not necessary to run it for the upcoming examples as a larger set of
+# counts has been already computed.
+if (FALSE) {
   Counts <- list()
   for (i in 1:length(wtnets)) {
-    Counts[[names(wtnets)[i]]] <- count_orbits_per_node(graph = wtnets[[i]], max_graphlet_size = 5)
+    Counts[[names(wtnets)[i]]] <- count_orbits_per_node(
+      graph = wtnets[[i]], max_graphlet_size = 5
+    )
   }
 }
 ```
@@ -78,7 +82,10 @@ Counts <- worldtradesub$Counts
 netemd_result <- netemd_many_to_many(dhists = Counts, mc.cores = 1)
 
 # Results
-netemd_matrix <- cross_comp_to_matrix(measure = netemd_result$netemds, cross_comparison_spec = netemd_result$comp_spec)
+netemd_matrix <- cross_comp_to_matrix(
+  measure = netemd_result$netemds,
+  cross_comparison_spec = netemd_result$comp_spec
+)
 
 print(netemd_matrix[1:10, 1:5])
 ```
@@ -88,7 +95,12 @@ print(netemd_matrix[1:10, 1:5])
 Based on the comparison of the world trade networks across the years, we can identify periods of time where possible considerable changes in world trade have occurred. The following heat map clearly shows the existence of two potential
  changes in the world trade system, and which correspond to 1995-1996 and 2010-2011.
 ```{r,fig.align='center',fig.dim=c(8.5,8.5)}
-netemd.plot(netemdlist = netemd_result, clustering_method = "ward.D", main = "NetEmd", docluster = FALSE)
+netemd.plot(
+  netemdlist = netemd_result,
+  clustering_method = "ward.D",
+  main = "NetEmd",
+  docluster = FALSE
+)
 ```
 
 The World Trade Organization (WTO) said the following about these years:
diff --git a/vignettes/NetdisGPStepByStep.Rmd b/vignettes/NetdisGPStepByStep.Rmd
index 988efe97..c5351bcc 100644
--- a/vignettes/NetdisGPStepByStep.Rmd
+++ b/vignettes/NetdisGPStepByStep.Rmd
@@ -116,7 +116,8 @@ neighbourhood_size <- 2
 min_ego_nodes <- 3
 min_ego_edges <- 1
 
-# Ego-network density binning parameters. Here, the minimum number of ego-networks per bin and the starting number of bins
+# Ego-network density binning parameters. Here, the minimum number of
+# ego-networks per bin and the starting number of bins
 min_bin_count <- 5
 num_bins <- 100
 ```
@@ -145,8 +146,12 @@ head(ego_2, n = 2)
 Once the ego-networks are extracted, the subgraph counts for all ego-network are obtained for each network being compared:
 ```{r}
 # Subgraphs counts for ego-networks in query graphs
-subgraph_counts_1 <- ego_to_graphlet_counts(ego_networks = ego_1, max_graphlet_size = max_subgraph_size)
-subgraph_counts_2 <- ego_to_graphlet_counts(ego_networks = ego_2, max_graphlet_size = max_subgraph_size)
+subgraph_counts_1 <- ego_to_graphlet_counts(
+  ego_networks = ego_1, max_graphlet_size = max_subgraph_size
+)
+subgraph_counts_2 <- ego_to_graphlet_counts(
+  ego_networks = ego_2, max_graphlet_size = max_subgraph_size
+)
 
 head(subgraph_counts_1)
 head(subgraph_counts_2)
diff --git a/vignettes/NetdisStepByStep.Rmd b/vignettes/NetdisStepByStep.Rmd
index 7162f142..0499acad 100644
--- a/vignettes/NetdisStepByStep.Rmd
+++ b/vignettes/NetdisStepByStep.Rmd
@@ -104,7 +104,8 @@ neighbourhood_size <- 2
 min_ego_nodes <- 3
 min_ego_edges <- 1
 
-# Ego-network density binning parameters. Here, the minimum number of ego-networks per bin and the starting number of bins
+# Ego-network density binning parameters. Here, the minimum number of
+# ego-networks per bin and the starting number of bins
 min_bin_count <- 5
 num_bins <- 100
 ```
@@ -135,8 +136,12 @@ tail(ego_2, n = 2)
 Once the ego-networks are extracted, the subgraph counts for all ego-network are obtained for each network being compared:
 ```{r}
 # Subgraphs counts for ego-networks in query graphs
-subgraph_counts_1 <- ego_to_graphlet_counts(ego_networks = ego_1, max_graphlet_size = max_subgraph_size)
-subgraph_counts_2 <- ego_to_graphlet_counts(ego_networks = ego_2, max_graphlet_size = max_subgraph_size)
+subgraph_counts_1 <- ego_to_graphlet_counts(
+  ego_networks = ego_1, max_graphlet_size = max_subgraph_size
+)
+subgraph_counts_2 <- ego_to_graphlet_counts(
+  ego_networks = ego_2, max_graphlet_size = max_subgraph_size
+)
 
 tail(subgraph_counts_1)
 tail(subgraph_counts_2)
@@ -203,8 +208,8 @@ str(binned_densities_gst_1)
 $E_w$ is estimated based on the average subgraph counts of ego-networks per density bin for each given subgraph. However, as the query networks and the gold-standard may have different number of nodes, the counts of the gold-standard network are first scaled to a "standard" or "canonical" scale from which they can be scaled back towards networks of different sizes. The following code first shows the computation of the subgraph counts for the ego-networks in the gold-standard network with their corresponding scaling:
 
 ```{r}
-# Scale ego-network subgraph counts by dividing by the total number of k-tuples in the
-# ego-network (where k is the subgraph size)
+# Scale ego-network subgraph counts by dividing by the total number of k-tuples
+# in the ego-network (where k is the subgraph size)
 scaled_subgraph_counts_ref <- scale_graphlet_counts_ego(
   graphlet_counts = subgraph_counts_gst_1,
   max_graphlet_size = max_subgraph_size
@@ -215,7 +220,10 @@ str(scaled_subgraph_counts_ref)
 Finally, the standard or canonical $E_w$ can be obtained by taking the average per bin of the scaled subgraph counts: 
 ```{r}
 # Average of the scaled reference subgraph counts in each density bin
-ref_binned_canonical_subgraph_counts <- mean_density_binned_graphlet_counts(graphlet_counts = scaled_subgraph_counts_ref, density_interval_indexes = binned_densities_gst_1$interval_indexes)
+ref_binned_canonical_subgraph_counts <- mean_density_binned_graphlet_counts(
+  graphlet_counts = scaled_subgraph_counts_ref,
+  density_interval_indexes = binned_densities_gst_1$interval_indexes
+)
 
 ref_binned_canonical_subgraph_counts
 ```
@@ -224,7 +232,8 @@ ref_binned_canonical_subgraph_counts
 
 After obtaining the average scaled subgraph counts per density bin, the subgraph counts of the query networks can be centred:
 ```{r}
-# Scale the reference counts of the gold-standard network to the sizes of each of the query ego-networks.
+# Scale the reference counts of the gold-standard network to the sizes of each
+# of the query ego-networks.
 exp_subgraph_counts_1 <- netdis_expected_counts(
   graphlet_counts = subgraph_counts_1,
   density_breaks = binned_densities_gst_1$breaks,
diff --git a/vignettes/NewNetdisCustomisations.Rmd b/vignettes/NewNetdisCustomisations.Rmd
index e8cbe645..249c37d3 100644
--- a/vignettes/NewNetdisCustomisations.Rmd
+++ b/vignettes/NewNetdisCustomisations.Rmd
@@ -50,7 +50,14 @@ gRing_2 <- make_ring(40^2)
 gTree_1 <- igraph::as.undirected(make_tree(n = 20^2, children = 3))
 gTree_2 <- igraph::as.undirected(make_tree(n = 40^2, children = 3))
 
-glist <- list(Lat_1 = gLat_1, Lat_2 = gLat_2, Ring_1 = gRing_1, Ring_2 = gRing_1, Tree_1 = gTree_1, Tree_2 = gTree_2)
+glist <- list(
+  Lat_1 = gLat_1,
+  Lat_2 = gLat_2,
+  Ring_1 = gRing_1,
+  Ring_2 = gRing_1,
+  Tree_1 = gTree_1,
+  Tree_2 = gTree_2
+)
 
 # Create a random graph to be used as a gold-standard
 gst <- igraph::as.undirected(graph.star(1000))
@@ -65,7 +72,10 @@ netdis_mat_gst <- netdis_many_to_many(
   graphs = glist,
   ref_graph = gst
 )
-netdis.plot(netdislist = netdis_mat_gst, whatrow = 2, main = "Netdis with reference graph")
+netdis.plot(
+  netdislist = netdis_mat_gst, whatrow = 2,
+  main = "Netdis with reference graph"
+)
 ```
 
 ## Using Netdis with a constant valued expectation, $E_w=k$
@@ -107,7 +117,9 @@ mybinning <- function(densities) {
   min_counts_per_interval <- 5
   num_intervals <- 3
   #
-  if (length(densities) < min_counts_per_interval) min_counts_per_interval <- length(densities)
+  if (length(densities) < min_counts_per_interval) {
+    min_counts_per_interval <- length(densities)
+  }
   breaks <- adaptive_breaks(densities,
     min_count = min_counts_per_interval,
     breaks = num_intervals
@@ -136,7 +148,11 @@ netdisgp_mat_mybin <- netdis_many_to_many(
   binning_fn = mybinning
 )
 
-netdis.plot(netdislist = netdisgp_mat_mybin, whatrow = 2, main = "Netdis-GP with mybinning")
+netdis.plot(
+  netdislist = netdisgp_mat_mybin,
+  whatrow = 2,
+  main = "Netdis-GP with mybinning"
+)
 ```
 
 Note that whenever $E_w$ is taken as a constant value, then the binning will not have an effect on the  computation of Netdis. 
@@ -167,8 +183,9 @@ num_bins <- 100
 These values can be directly imputed and changed into the shortcut Netdis function calls. However, not all combinations may be possible. The following shows the use of subgraphs up to size 4, with 3-step ego-networks and where only ego-networks with at least 5 nodes and 4 edges can be considered. Furthermore, the binning of the ego-networks will be sett to start with 20 bins and each bin will be required to have at least 20 elements.
 
 ```{r,fig.align='center',fig.dim=c(7,7)}
-# (We only recommend changing these default values for those users that have a clear understanding of graph theory behind it)
-# (change values with care as not all combinations may be possible).
+# (We only recommend changing these default values for those users that have a
+# clear understanding of graph theory behind it. Change values with care as not
+# all combinations may be possible).
 
 # Defining a new binning function:
 binning_fn <- purrr::partial(binned_densities_adaptive,
@@ -205,8 +222,16 @@ netdisgp_mat <- netdis_many_to_many(
   min_ego_edges = min_ego_edges,
   binning_fn = binning_fn
 )
-netdis.plot(netdislist = netdisgp_mat, whatrow = 2, main = "Netdis-GP: Default parameter values")
-netdis.plot(netdislist = netdisgp_mat_custom, whatrow = 2, main = "Netdis-GP: illustrative parameter changes")
+netdis.plot(
+  netdislist = netdisgp_mat,
+  whatrow = 2,
+  main = "Netdis-GP: Default parameter values"
+)
+netdis.plot(
+  netdislist = netdisgp_mat_custom,
+  whatrow = 2,
+  main = "Netdis-GP: illustrative parameter changes"
+)
 ```
 
 
diff --git a/vignettes/PreComputedProps.Rmd b/vignettes/PreComputedProps.Rmd
index 564c9b1c..8bc8a8d8 100644
--- a/vignettes/PreComputedProps.Rmd
+++ b/vignettes/PreComputedProps.Rmd
@@ -51,7 +51,12 @@ plot(gLat_2, vertex.size = 4, vertex.label = NA)
 
 The simple computation of NetEmd without pre-computed features:
 ```{r}
-netemd_one_to_one(graph_1 = gLat_1, graph_2 = gLat_2, feature_type = "orbit", smoothing_window_width = 1)
+netemd_one_to_one(
+  graph_1 = gLat_1,
+  graph_2 = gLat_2,
+  feature_type = "orbit",
+  smoothing_window_width = 1
+)
 ```
 
 
@@ -61,7 +66,9 @@ counts_1 <- count_orbits_per_node(graph = gLat_1, max_graphlet_size = 5)
 counts_2 <- count_orbits_per_node(graph = gLat_2, max_graphlet_size = 5)
 head(counts_1[, 1:4])
 
-netemd_one_to_one(dhists_1 = counts_1, dhists_2 = counts_2, smoothing_window_width = 1)
+netemd_one_to_one(
+  dhists_1 = counts_1, dhists_2 = counts_2, smoothing_window_width = 1
+)
 ```
 
 ### Providing the network features as lists of dhist objects
@@ -69,7 +76,9 @@ netemd_one_to_one(dhists_1 = counts_1, dhists_2 = counts_2, smoothing_window_wid
 dhists_1 <- graph_features_to_histograms(features_matrix = counts_1)
 dhists_2 <- graph_features_to_histograms(features_matrix = counts_2)
 
-netemd_one_to_one(dhists_1 = dhists_1, dhists_2 = dhists_2, smoothing_window_width = 1)
+netemd_one_to_one(
+  dhists_1 = dhists_1, dhists_2 = dhists_2, smoothing_window_width = 1
+)
 ```
 ___
 
@@ -82,31 +91,48 @@ gLat_1 <- graph.lattice(c(20, 20))
 gLat_2 <- graph.lattice(c(44, 44))
 
 # Laplacian
-Lapg_1 <- igraph::laplacian_matrix(graph = gLat_1, normalized = FALSE, sparse = FALSE)
-Lapg_2 <- igraph::laplacian_matrix(graph = gLat_2, normalized = FALSE, sparse = FALSE)
+Lapg_1 <- igraph::laplacian_matrix(
+  graph = gLat_1, normalized = FALSE, sparse = FALSE
+)
+Lapg_2 <- igraph::laplacian_matrix(
+  graph = gLat_2, normalized = FALSE, sparse = FALSE
+)
 
 # Normalized Laplacian
-NLapg_1 <- igraph::laplacian_matrix(graph = gLat_1, normalized = TRUE, sparse = FALSE)
-NLapg_2 <- igraph::laplacian_matrix(graph = gLat_2, normalized = TRUE, sparse = FALSE)
+NLapg_1 <- igraph::laplacian_matrix(
+  graph = gLat_1, normalized = TRUE, sparse = FALSE
+)
+NLapg_2 <- igraph::laplacian_matrix(
+  graph = gLat_2, normalized = TRUE, sparse = FALSE
+)
 
 
-# Providing a matrix of network features (e.g. Spectra). (This may take a couple of minutes).
-spec_1 <- cbind(L.Spectra = eigen(Lapg_1)$values, NL.Spectra = eigen(NLapg_1)$values)
-spec_2 <- cbind(L.Spectra = eigen(Lapg_2)$values, NL.Spectra = eigen(NLapg_2)$values)
+# Providing a matrix of network features (e.g. Spectra). (This may take a
+# couple of minutes).
+spec_1 <- cbind(
+  L.Spectra = eigen(Lapg_1)$values, NL.Spectra = eigen(NLapg_1)$values
+)
+spec_2 <- cbind(
+  L.Spectra = eigen(Lapg_2)$values, NL.Spectra = eigen(NLapg_2)$values
+)
 
 head(spec_1)
 ```
 
 Similarly to counts, all other features can be given as a matrix or as dhist objects:
 ```{r}
-netemd_one_to_one(dhists_1 = spec_1, dhists_2 = spec_2, smoothing_window_width = 0)
+netemd_one_to_one(
+  dhists_1 = spec_1, dhists_2 = spec_2, smoothing_window_width = 0
+)
 
 
 # Providing pre-computed dhist objects from network features
 dhists_1 <- graph_features_to_histograms(spec_1)
 dhists_2 <- graph_features_to_histograms(spec_2)
 
-netemd_one_to_one(dhists_1 = dhists_1, dhists_2 = dhists_2, smoothing_window_width = 0)
+netemd_one_to_one(
+  dhists_1 = dhists_1, dhists_2 = dhists_2, smoothing_window_width = 0
+)
 ```
 -------------------------
 
@@ -120,7 +146,8 @@ Netdis uses counts from the resulting ego-networks of each of the nodes in a gra
 The selection of a gold-standard graph as a substitute for $E_w$ could be done when such graph is known to be a good proxy for $E_w$, or alternatively as a good reference point for the comparison. This option will focus on detecting discrepancies between the networks relative to the ego-network structure of the reference network / gold-standard graph and which is summarized in $E_w$.
 
 ```{r,netdisgoldstand}
-# Set source directory for virus protein-protein interaction edge files stored in the netdist package.
+# Set source directory for virus protein-protein interaction edge files stored
+# in the netdist package.
 source_dir <- system.file(file.path("extdata", "VRPINS"), package = "netdist")
 
 # Load query graphs as igraph objects
@@ -146,8 +173,16 @@ props_goldstd_2 <- count_graphlets_ego(graph = goldstd_2)
 
 Compute Netdis using the pre-computed counts and any of the example gold-standard networks.
 ```{r}
-netdis_one_to_one(graphlet_counts_1 = props_1, graphlet_counts_2 = props_2, graphlet_counts_ref = props_goldstd_1)
-netdis_one_to_one(graphlet_counts_1 = props_1, graphlet_counts_2 = props_2, graphlet_counts_ref = props_goldstd_2)
+netdis_one_to_one(
+  graphlet_counts_1 = props_1,
+  graphlet_counts_2 = props_2,
+  graphlet_counts_ref = props_goldstd_1
+)
+netdis_one_to_one(
+  graphlet_counts_1 = props_1,
+  graphlet_counts_2 = props_2,
+  graphlet_counts_ref = props_goldstd_2
+)
 ```
 
 Comparison to the result of Netdis without pre-computed counts.
@@ -165,21 +200,25 @@ netdis_one_to_one(graph_1 = graph_1, graph_2 = graph_2, ref_graph = goldstd_2)
 This Netdis variant focuses on detecting more meso-level discrepancies between the ego-network structures.
 
 ```{r, netdisGP}
-# Set source directory for Virus protein-protein interaction edge files stored in the netdist package.
+# Set source directory for Virus protein-protein interaction edge files stored
+# in the netdist package.
 source_dir <- system.file(file.path("extdata", "VRPINS"), package = "netdist")
 
 # Load query graphs as igraph objects
 graph_1 <- read_simple_graph(file.path(source_dir, "EBV.txt"), format = "ncol")
 graph_2 <- read_simple_graph(file.path(source_dir, "ECL.txt"), format = "ncol")
 
-# Netdis using the Geometric-Poisson approximation as a way to obtain background expectations.
+# Netdis using the Geometric-Poisson approximation as a way to obtain background
+# expectations.
 netdis_one_to_one(graph_1 = graph_1, graph_2 = graph_2, ref_graph = NULL)
 
 # Providing pre-calculated subgraph counts.
 props_1 <- count_graphlets_ego(graph = graph_1)
 props_2 <- count_graphlets_ego(graph = graph_2)
 
-netdis_one_to_one(graphlet_counts_1 = props_1, graphlet_counts_2 = props_2, ref_graph = NULL)
+netdis_one_to_one(
+  graphlet_counts_1 = props_1, graphlet_counts_2 = props_2, ref_graph = NULL
+)
 ```
 
 
@@ -188,7 +227,8 @@ Comparing the networks via their observed ego counts without centring them, (equ
 
 
 ```{r,netdiszero}
-# Set source directory for Virus protein-protein interaction edge files stored in the netdist package.
+# Set source directory for Virus protein-protein interaction edge files stored
+# in the netdist package.
 source_dir <- system.file(file.path("extdata", "VRPINS"), package = "netdist")
 
 # Load query graphs as igraph objects
@@ -202,7 +242,9 @@ netdis_one_to_one(graph_1 = graph_1, graph_2 = graph_2, ref_graph = 0)
 props_1 <- count_graphlets_ego(graph = graph_1)
 props_2 <- count_graphlets_ego(graph = graph_2)
 
-netdis_one_to_one(graphlet_counts_1 = props_1, graphlet_counts_2 = props_2, ref_graph = 0)
+netdis_one_to_one(
+  graphlet_counts_1 = props_1, graphlet_counts_2 = props_2, ref_graph = 0
+)
 ```
 
 -------------------------
diff --git a/vignettes/default_pairwise_usage.Rmd b/vignettes/default_pairwise_usage.Rmd
index 6041a04d..67683313 100644
--- a/vignettes/default_pairwise_usage.Rmd
+++ b/vignettes/default_pairwise_usage.Rmd
@@ -37,10 +37,12 @@ The `netdist` package also includes examples of a few real networks. These are p
 
 Although the `virusppi` list of PPI networks is loaded along with the `netdist` package, the following code shows how to read a network data from a file in disk:
 ```{r, graphs,fig.align='center',fig.dim=c(6,6)}
-# Set source directory for Virus protein-protein interaction edge files stored in the netdist package.
+# Set source directory for Virus protein-protein interaction edge files stored
+# in the netdist package.
 source_dir <- system.file(file.path("extdata", "VRPINS"), package = "netdist")
 
-# Load query graphs as undirected igraph objects, with no loops, multiple edges or degree zero nodes.
+# Load query graphs as undirected igraph objects, with no loops, multiple edges
+# or degree zero nodes.
 graph_1 <- read_simple_graph(
   file = file.path(source_dir, "EBV.txt"),
   format = "ncol"
@@ -51,12 +53,14 @@ graph_2 <- read_simple_graph(
   format = "ncol"
 )
 
-# Herpes virus EBV protein-protein interaction graph with 60 nodes and 208 edges.
+# Herpes virus EBV protein-protein interaction graph with 60 nodes
+# and 208 edges.
 graph_1
 # Note this graph is the same as
 # virusppi$EBV
 
-# Herpes virus ECL protein-protein interaction graph with 1941 nodes and 3989 edges.
+# Herpes virus ECL protein-protein interaction graph with 1941 nodes
+# and 3989 edges.
 graph_2
 # Note this graph is the same as
 # virusppi$ECL
@@ -95,42 +99,69 @@ and where $p_{t_i}(G)$ and $p_{t_i}(G')$ are the distributions of ${t_i}$ on $G$
 
 ## Comparing two graphs with NetEmd
 ```{r, netemd,fig.align='center'}
-# Set source directory for Virus protein-protein interaction network edge files stored in the netdist package.
+# Set source directory for Virus protein-protein interaction network edge files
+# stored in the netdist package.
 source_dir <- system.file(file.path("extdata", "VRPINS"), package = "netdist")
 
 # Load query graphs as igraph objects
-# Herpes virus EBV protein-protein interaction graph with 60 nodes and 208 edges.
+# Herpes virus EBV protein-protein interaction graph with 60 nodes
+# and 208 edges.
 graph_1 <- read_simple_graph(file.path(source_dir, "EBV.txt"),
   format = "ncol"
 )
 
-# Herpes virus ECL protein-protein interaction graph with 1941 nodes and 3989 edges.
+# Herpes virus ECL protein-protein interaction graph with 1941 nodes
+# and 3989 edges.
 graph_2 <- read_simple_graph(file.path(source_dir, "ECL.txt"),
   format = "ncol"
 )
 
 # One to one NetEmd comparison.
-netemd_one_to_one(graph_1 = graph_1, graph_2 = graph_2, feature_type = "orbit", smoothing_window_width = 1) # Use of smoothing window 1 is given for discrete integer distributions. If the network features are considered continuous variables smoothing_window_width equal to zero is recommended.
+# Use of smoothing window 1 is given for discrete integer distributions. If the
+# network features are considered continuous variables smoothing_window_width
+# equal to zero is recommended.
+netemd_one_to_one(
+  graph_1 = graph_1,
+  graph_2 = graph_2,
+  feature_type = "orbit",
+  smoothing_window_width = 1
+)
 ```
 
 ## Comparing two graphs with NetEmd via their Laplacian spectrum
 ```{r, netemdEigen,fig.align='center'}
 # Laplacian
-Lapg_1 <- igraph::laplacian_matrix(graph = graph_1, normalized = FALSE, sparse = FALSE)
-Lapg_2 <- igraph::laplacian_matrix(graph = graph_2, normalized = FALSE, sparse = FALSE)
+Lapg_1 <- igraph::laplacian_matrix(
+  graph = graph_1, normalized = FALSE, sparse = FALSE
+)
+Lapg_2 <- igraph::laplacian_matrix(
+  graph = graph_2, normalized = FALSE, sparse = FALSE
+)
 
 # Normalized Laplacian
-NLapg_1 <- igraph::laplacian_matrix(graph = graph_1, normalized = TRUE, sparse = FALSE)
-NLapg_2 <- igraph::laplacian_matrix(graph = graph_2, normalized = TRUE, sparse = FALSE)
+NLapg_1 <- igraph::laplacian_matrix(
+  graph = graph_1, normalized = TRUE, sparse = FALSE
+)
+NLapg_2 <- igraph::laplacian_matrix(
+  graph = graph_2, normalized = TRUE, sparse = FALSE
+)
 
 # Spectra (this may take a couple of minutes).
-props_1 <- cbind(L.Spectra = eigen(Lapg_1)$values, NL.Spectra = eigen(NLapg_1)$values)
-props_2 <- cbind(L.Spectra = eigen(Lapg_2)$values, NL.Spectra = eigen(NLapg_2)$values)
+props_1 <- cbind(
+  L.Spectra = eigen(Lapg_1)$values, NL.Spectra = eigen(NLapg_1)$values
+)
+props_2 <- cbind(
+  L.Spectra = eigen(Lapg_2)$values, NL.Spectra = eigen(NLapg_2)$values
+)
 
 head(props_1, n = 3)
 head(props_2, n = 3)
 
-netemd_one_to_one(dhists_1 = props_1, dhists_2 = props_2, smoothing_window_width = 0) # If the network features are considered continuous variables smoothing_window_width equal to zero is recommended.
+# If the network features are considered continuous variables
+# smoothing_window_width equal to zero is recommended.
+netemd_one_to_one(
+  dhists_1 = props_1, dhists_2 = props_2, smoothing_window_width = 0
+)
 ```
 -------------------------
 
@@ -207,7 +238,8 @@ where $l$ is the number of connected subgraphs of size $k$, for example, $l=6$ f
 This variant focuses on detecting more meso-level discrepancies between the ego-network structures.
 
 ```{r, netdisGP}
-# Netdis using the Geometric-Poisson approximation as a way to obtain background expectations.
+# Netdis using the Geometric-Poisson approximation as a way to obtain background
+# expectations.
 netdis_one_to_one(graph_1 = graph_1, graph_2 = graph_2, ref_graph = NULL)
 ```
 
diff --git a/vignettes/dendrogram_example_net_dis.Rmd b/vignettes/dendrogram_example_net_dis.Rmd
index 1090a4ab..265f97b7 100644
--- a/vignettes/dendrogram_example_net_dis.Rmd
+++ b/vignettes/dendrogram_example_net_dis.Rmd
@@ -87,7 +87,9 @@ netdis_result <- netdis_many_to_many(
 
 # Netdis measure for subgraphs of size 3
 res3 <- netdis_result$netdis["netdis3", ]
-netdis3_mat <- cross_comp_to_matrix(measure = res3, cross_comparison_spec = netdis_result$comp_spec)
+netdis3_mat <- cross_comp_to_matrix(
+  measure = res3, cross_comparison_spec = netdis_result$comp_spec
+)
 
 print("Netdis: subgraph size = 3")
 print(netdis3_mat)
@@ -134,11 +136,20 @@ plot(phangorn::upgma(as.dist(netdis4_mat), method = "average"),
 <!-- comment 
 # Application using NetEmd
 ```{r,fig.align='center',fig.dim=c(6,6),eval=FALSE,include=FALSE}
-netemdlist <- netemd_many_to_many(graphs = query_graphs, smoothing_window_width = 0, mc.cores = 1)
-netemd_mat <- cross_comp_to_matrix(measure = netemdlist$netemds, cross_comparison_spec = netemdlist$comp_spec)
+netemdlist <- netemd_many_to_many(
+  graphs = query_graphs, smoothing_window_width = 0, mc.cores = 1
+)
+netemd_mat <- cross_comp_to_matrix(
+  measure = netemdlist$netemds, cross_comparison_spec = netemdlist$comp_spec
+)
 
-netemdlist_smooth <- netemd_many_to_many(graphs = query_graphs, smoothing_window_width = 1, mc.cores = 1)
-netemd_mat_smooth <- cross_comp_to_matrix(measure = netemdlist_smooth$netemds, cross_comparison_spec = netemdlist_smooth$comp_spec)
+netemdlist_smooth <- netemd_many_to_many(
+  graphs = query_graphs, smoothing_window_width = 1, mc.cores = 1
+)
+netemd_mat_smooth <- cross_comp_to_matrix(
+  measure = netemdlist_smooth$netemds,
+  cross_comparison_spec = netemdlist_smooth$comp_spec
+)
 
 par(mfrow = c(1, 2))
 cex <- 1
diff --git a/vignettes/dendrogram_example_net_emd.Rmd b/vignettes/dendrogram_example_net_emd.Rmd
index e0601588..061ff1b6 100644
--- a/vignettes/dendrogram_example_net_emd.Rmd
+++ b/vignettes/dendrogram_example_net_emd.Rmd
@@ -59,10 +59,16 @@ netemd_mat
 ```{r}
 cex <- 1
 title <- paste("NetEMD: max graphlet size = ", 4, sep = "")
-plot(phangorn::upgma(as.dist(netemd_mat), method = "average"),
+plot(
+  phangorn::upgma(as.dist(netemd_mat), method = "average"),
   use.edge.length = FALSE,
-  edge.width = cex * 2, main = title, cex.lab = cex, cex.axis = cex, cex.main = cex,
-  cex.sub = cex, cex = cex
+  edge.width = cex * 2,
+  main = title,
+  cex.lab = cex,
+  cex.axis = cex,
+  cex.main = cex,
+  cex.sub = cex,
+  cex = cex
 )
 
 # The gdd_for_all_graphs and netemd_many_to_many functions will run in
@@ -78,6 +84,15 @@ plot(phangorn::upgma(as.dist(netemd_mat), method = "average"),
 cex <- 1.5
 col <- colorRampPalette(colors = c("blue", "white"))(100)
 title <- paste("NetEMD: max graphlet size = ", 4, sep = "")
-heatmap(netemd_mat, Rowv = NULL, Colv = NULL, col = col, main = title, cexRow = cex, cexCol = cex, symm = TRUE)
+heatmap(
+  netemd_mat,
+  Rowv = NULL,
+  Colv = NULL,
+  col = col,
+  main = title,
+  cexRow = cex,
+  cexCol = cex,
+  symm = TRUE
+)
 ```
 
diff --git a/vignettes/quickstart_netdis_2graphs.Rmd b/vignettes/quickstart_netdis_2graphs.Rmd
index 87764511..65b8d8a6 100644
--- a/vignettes/quickstart_netdis_2graphs.Rmd
+++ b/vignettes/quickstart_netdis_2graphs.Rmd
@@ -67,8 +67,14 @@ ego_2 <- make_named_ego_graph(graph_2,
 ## Count graphlets in ego networks
 ```{r}
 # Count graphlets for ego networks in query and reference graphs
-graphlet_counts_1 <- ego_to_graphlet_counts(ego_1, max_graphlet_size = max_graphlet_size)
-graphlet_counts_2 <- ego_to_graphlet_counts(ego_2, max_graphlet_size = max_graphlet_size)
+graphlet_counts_1 <- ego_to_graphlet_counts(
+  ego_1,
+  max_graphlet_size = max_graphlet_size
+)
+graphlet_counts_2 <- ego_to_graphlet_counts(
+  ego_2,
+  max_graphlet_size = max_graphlet_size
+)
 ```
 
 ## Use a reference graph to calculate expected graphlet counts in ego network density bins
@@ -85,7 +91,10 @@ ego_ref <- make_named_ego_graph(ref_graph,
   min_ego_edges = min_ego_edges
 )
 
-graphlet_counts_ref <- ego_to_graphlet_counts(ego_ref, max_graphlet_size = max_graphlet_size)
+graphlet_counts_ref <- ego_to_graphlet_counts(
+  ego_ref,
+  max_graphlet_size = max_graphlet_size
+)
 
 # Scale ego-network graphlet counts by dividing by total number of k-tuples in
 # ego-network (where k is graphlet size)

From 932155238efc130c3be4a08e239379e77dfa4fd1 Mon Sep 17 00:00:00 2001
From: jack89roberts <jack89roberts@users.noreply.github.com>
Date: Tue, 7 Jun 2022 12:28:05 +0000
Subject: [PATCH 68/84] Update documentation (bot)

---
 man/virusppi.Rd      | 14 +++++++++++---
 man/worldtradesub.Rd | 24 ++++++++++++++++++------
 2 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/man/virusppi.Rd b/man/virusppi.Rd
index 6c905b2c..174a2d04 100644
--- a/man/virusppi.Rd
+++ b/man/virusppi.Rd
@@ -9,11 +9,19 @@
 A list of \code{igraph} objects.
 }
 \source{
-\strong{PPI data (EBV, HSV-1, KSHV, VZV):} Fossum E, Friedel CC, Rajagopala SV, Titz B, Baiker A, Schmidt T, et al. (2009) Evolutionarily Conserved Herpesviral Protein Interaction Networks. PLoS Pathog 5(9): e1000570. \url{https://doi.org/10.1371/journal.ppat.1000570}. Data from Table S2 in the supporting information.
+\strong{PPI data (EBV, HSV-1, KSHV, VZV):} Fossum E, Friedel CC,
+Rajagopala SV, Titz B, Baiker A, Schmidt T, et al. (2009) Evolutionarily
+Conserved Herpesviral Protein Interaction Networks. PLoS Pathog 5(9):
+e1000570. \url{https://doi.org/10.1371/journal.ppat.1000570}. Data from Table
+S2 in the supporting information.
 
-\strong{PPI data (ECL):} Peregrín-Alvarez JM, Xiong X, Su C, Parkinson J (2009) The Modular Organization of Protein Interactions in Escherichia coli. PLoS Comput Biol 5(10): e1000523. \url{https://doi.org/10.1371/journal.pcbi.1000523}
+\strong{PPI data (ECL):} Peregrín-Alvarez JM, Xiong X, Su C,
+Parkinson J (2009) The Modular Organization of Protein Interactions in
+Escherichia coli. PLoS Comput Biol 5(10): e1000523.
+\url{https://doi.org/10.1371/journal.pcbi.1000523}
 
-\strong{Taxonomy ground truth:} NCBI taxonomy database. \url{https://www.ncbi.nlm.nih.gov/taxonomy}
+\strong{Taxonomy ground truth:} NCBI taxonomy database.
+\url{https://www.ncbi.nlm.nih.gov/taxonomy}
 }
 \usage{
 virusppi
diff --git a/man/worldtradesub.Rd b/man/worldtradesub.Rd
index aa537cac..2483480d 100644
--- a/man/worldtradesub.Rd
+++ b/man/worldtradesub.Rd
@@ -6,23 +6,35 @@
 \alias{worldtradesub}
 \title{World trade networks from 1985–2014}
 \format{
-A list of two elements. The first element, 'wtnets', is a list of \code{igraph} objects providing a small sample of world trade networks from 2001–2014. The second element, 'Counts', is a list of pre-computed subgraph counts of world trade networks in the years 1985-2014.
+A list of two elements. The first element, 'wtnets', is a list of
+\code{igraph} objects providing a small sample of world trade networks from
+2001–2014. The second element, 'Counts', is a list of pre-computed subgraph
+counts of world trade networks in the years 1985-2014.
 }
 \source{
-\strong{World trade networks:}. United nations commodity trade statistics database (UN comtrade). http://comtrade.un.org/, 2015.
+\strong{World trade networks:}. United nations commodity trade
+statistics database (UN comtrade). http://comtrade.un.org/, 2015.
 
-\strong{Subgraph Counts:} Feenstra RC,Lipsey RE, Deng H, Ma AC, and Mo H. (2005) World trade flows: 1962-2000. Technical report, National Bureau of Economic Research. (See also https://cid.econ.ucdavis.edu/wix.html).
+\strong{Subgraph Counts:} Feenstra RC,Lipsey RE, Deng H, Ma AC, and
+Mo H. (2005) World trade flows: 1962-2000. Technical report, National Bureau
+of Economic Research. (See also https://cid.econ.ucdavis.edu/wix.html).
 }
 \usage{
 worldtradesub
 }
 \description{
-The world trade data set consists of a small sample of world trade networks for the years 2001-2014, and pre-computed subgraph counts of a larger set of world trade networks (1985–2014). The world trade networks are based on the data set from [Feenstra et al., 2005] for the years 1962- 2000 and on the United Nations division COMTRADE [Division, 2015] for the years 2001-2014.
+The world trade data set consists of a small sample of world trade networks
+for the years 2001-2014, and pre-computed subgraph counts of a larger set of
+world trade networks (1985–2014). The world trade networks are based on the
+data set from [Feenstra et al., 2005] for the years 1962- 2000 and on the
+United Nations division COMTRADE [Division, 2015] for the years 2001-2014.
 }
 \details{
 \itemize{
- \item wtnets:  List of \code{igraph} objects providing the world trade networks from 2001–2014.
- \item Counts:  Pre-computed graphlet counts for the world trade networks in the years 1985-2014.
+ \item wtnets:  List of \code{igraph} objects providing the world trade
+ networks from 2001–2014.
+ \item Counts:  Pre-computed graphlet counts for the world trade networks in
+ the years 1985-2014.
  }
 }
 \keyword{datasets}

From a3ddebc8bb83869d84ffe68970f84c21a14db5fb Mon Sep 17 00:00:00 2001
From: jack89roberts <jroberts@turing.ac.uk>
Date: Tue, 7 Jun 2022 14:51:38 +0100
Subject: [PATCH 69/84] linting fixes

seq_len instead of 1:length
variable name lengths
snake case names
remove unused vars
---
 .lintr                                   |   1 +
 NAMESPACE                                |   4 +-
 R/PlottingFunctions.R                    |   4 +-
 R/RcppExports.R                          |   8 +-
 R/dhist.R                                |   4 +-
 R/emd.R                                  |   1 -
 R/measures_net_dis.R                     |  40 +-
 R/measures_net_emd.R                     |   5 +-
 R/net_emd_speed_benchmark.R              |  24 +-
 R/orca_interface.R                       |   2 +-
 man/netdis.Rd                            |  10 +-
 man/netdis_expected_counts.Rd            |   6 +-
 man/netdis_expected_counts_ego.Rd        |   6 +-
 man/{netdis.plot.Rd => netdis_plot.Rd}   |   6 +-
 man/netdis_uptok.Rd                      |   8 +-
 man/{netemd.plot.Rd => netemd_plot.Rd}   |   6 +-
 man/virusppi.Rd                          |  14 +-
 man/worldtradesub.Rd                     |  24 +-
 src/RcppExports.cpp                      |   5 +
 tests/testthat/test_dhist.R              | 276 ++++-----
 tests/testthat/test_emd.R                |  44 +-
 tests/testthat/test_measures_net_dis.R   | 724 +++++++++++------------
 tests/testthat/test_measures_net_emd.R   |  23 +-
 tests/testthat/test_orca_interface.R     | 162 ++---
 vignettes/ManyToMany.Rmd                 |  88 +--
 vignettes/NetEmdTimeOrdering.Rmd         |  12 +-
 vignettes/NetdisGPStepByStep.Rmd         |  20 +-
 vignettes/NetdisStepByStep.Rmd           |  22 +-
 vignettes/NewNetdisCustomisations.Rmd    |  38 +-
 vignettes/PreComputedProps.Rmd           |  40 +-
 vignettes/default_pairwise_usage.Rmd     |  12 +-
 vignettes/dendrogram_example_net_dis.Rmd |   8 +-
 32 files changed, 831 insertions(+), 816 deletions(-)
 rename man/{netdis.plot.Rd => netdis_plot.Rd} (95%)
 rename man/{netemd.plot.Rd => netemd_plot.Rd} (95%)

diff --git a/.lintr b/.lintr
index f2c1b9b3..838059df 100644
--- a/.lintr
+++ b/.lintr
@@ -1 +1,2 @@
 error_on_lint: TRUE
+linters: with_defaults(open_curly_linter = NULL, closed_curly_linter = NULL, spaces_left_parentheses_linter=NULL)
diff --git a/NAMESPACE b/NAMESPACE
index 68af50e8..c4d5fa28 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -49,17 +49,17 @@ export(min_emd_exhaustive)
 export(min_emd_optimise)
 export(min_emd_optimise_fast)
 export(netdis)
-export(netdis.plot)
 export(netdis_centred_graphlet_counts)
 export(netdis_expected_counts)
 export(netdis_many_to_many)
 export(netdis_one_to_many)
 export(netdis_one_to_one)
+export(netdis_plot)
 export(netdis_subtract_exp_counts)
 export(netdis_uptok)
-export(netemd.plot)
 export(netemd_many_to_many)
 export(netemd_one_to_one)
+export(netemd_plot)
 export(netemd_single_pair)
 export(normalise_dhist_mass)
 export(normalise_dhist_variance)
diff --git a/R/PlottingFunctions.R b/R/PlottingFunctions.R
index 5219dee3..cbaae62e 100644
--- a/R/PlottingFunctions.R
+++ b/R/PlottingFunctions.R
@@ -23,7 +23,7 @@
 #' \code{pheatmap}.
 #' @export
 
-netdis.plot <- function(netdislist,
+netdis_plot <- function(netdislist,
                         whatrow = c(1, 2)[2],
                         clustering_method = "ward.D",
                         main = "Nedis",
@@ -79,7 +79,7 @@ netdis.plot <- function(netdislist,
 #' \code{pheatmap}.
 #' @export
 
-netemd.plot <- function(netemdlist,
+netemd_plot <- function(netemdlist,
                         clustering_method = "ward.D",
                         main = "NetEmd",
                         docluster = TRUE) {
diff --git a/R/RcppExports.R b/R/RcppExports.R
index f1932442..b7d0c2dc 100644
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -22,12 +22,6 @@ counts_from_observations <- function(features) {
 #'
 #' @export
 emd_fast_no_smoothing <- function(locations1, values1, locations2, values2) {
-    .Call(
-        `_netdist_emd_fast_no_smoothing`,
-        locations1,
-        values1,
-        locations2,
-        values2
-    )
+    .Call(`_netdist_emd_fast_no_smoothing`, locations1, values1, locations2, values2)
 }
 
diff --git a/R/dhist.R b/R/dhist.R
index fe81d357..641818d7 100644
--- a/R/dhist.R
+++ b/R/dhist.R
@@ -564,11 +564,11 @@ normalise_dhist_variance <- function(dhist) {
     # If smoothing_window_width not zero, then update it to reflect the variance
     # normalisation
     if (dhist$smoothing_window_width != 0) {
-      normalised_smoothing_window_width <-
+      norm_smoothing_window_width <-
         dhist$smoothing_window_width / std_dev
       dhist <- update_dhist(
         dhist,
-        smoothing_window_width = normalised_smoothing_window_width
+        smoothing_window_width = norm_smoothing_window_width
       )
     }
   }
diff --git a/R/emd.R b/R/emd.R
index 02208756..a98c10a5 100644
--- a/R/emd.R
+++ b/R/emd.R
@@ -67,7 +67,6 @@ min_emd_optimise_fast <- function(dhist1, dhist2) {
     val2 <- val2 / val2[length(val2)]
     loc1 <- dhist1$locations
     loc2 <- dhist2$locations
-    count <- 0
     emd_offset <- function(offset) {
       temp1 <- emd_fast_no_smoothing(loc1 + offset, val1, loc2, val2)
       temp1
diff --git a/R/measures_net_dis.R b/R/measures_net_dis.R
index 51c689b3..5e3a67b2 100644
--- a/R/measures_net_dis.R
+++ b/R/measures_net_dis.R
@@ -702,9 +702,9 @@ netdis_many_to_many <- function(graphs = NULL,
 #' Calculate Netdis statistic between two graphs from their Centred Graphlet
 #' Counts (generated using \code{netdis_centred_graphlet_counts}) for graphlets
 #' of size \code{graphlet_size}.
-#' @param centred_graphlet_count_vector_1 Centred Graphlet Counts vector for
+#' @param centred_graphlet_counts_1 Centred Graphlet Counts vector for
 #' graph 1
-#' @param centred_graphlet_count_vector_2 Centred Graphlet Counts vector for
+#' @param centred_graphlet_counts_2 Centred Graphlet Counts vector for
 #' graph 2
 #' @param graphlet_size The size of graphlets to use for the Netdis calculation
 #' (only counts for graphlets of the specified size will be used). The size of
@@ -712,14 +712,14 @@ netdis_many_to_many <- function(graphs = NULL,
 #' @return Netdis statistic calculated using centred counts for graphlets of
 #' the specified size
 #' @export
-netdis <- function(centred_graphlet_count_vector_1,
-                   centred_graphlet_count_vector_2,
+netdis <- function(centred_graphlet_counts_1,
+                   centred_graphlet_counts_2,
                    graphlet_size) {
   # Select subset of centred counts corresponding to graphlets of the
   # specified size
   ids <- graphlet_ids_for_size(graphlet_size)
-  counts1 <- centred_graphlet_count_vector_1[ids]
-  counts2 <- centred_graphlet_count_vector_2[ids]
+  counts1 <- centred_graphlet_counts_1[ids]
+  counts2 <- centred_graphlet_counts_2[ids]
 
   # Calculate normalising constant
   norm_const <- sum(counts1^2 / sqrt(counts1^2 + counts2^2), na.rm = TRUE) *
@@ -737,9 +737,9 @@ netdis <- function(centred_graphlet_count_vector_1,
 #' Calculate Netdis statistic between two graphs from their Centred Graphlet
 #' Counts (generated using \code{netdis_centred_graphlet_counts}) for all
 #' graphlet sizes up to \code{max_graphlet_size}.
-#' @param centred_graphlet_count_vector_1 Centred Graphlet Counts  vector for
+#' @param centred_graphlet_counts_1 Centred Graphlet Counts  vector for
 #' graph 1
-#' @param centred_graphlet_count_vector_2 Centred Graphlet Counts vector for
+#' @param centred_graphlet_counts_2 Centred Graphlet Counts vector for
 #' graph 2
 #' @param max_graphlet_size max graphlet size to calculate Netdis for.
 #' The size of a graphlet is the number of nodes it contains. Netdis is
@@ -748,8 +748,8 @@ netdis <- function(centred_graphlet_count_vector_1,
 #' @return Netdis statistic calculated using centred counts for graphlets of
 #' the specified size
 #' @export
-netdis_uptok <- function(centred_graphlet_count_vector_1,
-                         centred_graphlet_count_vector_2,
+netdis_uptok <- function(centred_graphlet_counts_1,
+                         centred_graphlet_counts_2,
                          max_graphlet_size) {
   if ((max_graphlet_size > 5) | (max_graphlet_size < 3)) {
     stop("max_graphlet_size must be 3, 4 or 5.")
@@ -757,8 +757,8 @@ netdis_uptok <- function(centred_graphlet_count_vector_1,
 
   netdis_statistics <- purrr::map(3:max_graphlet_size,
     netdis,
-    centred_graphlet_count_vector_1 = centred_graphlet_count_vector_1,
-    centred_graphlet_count_vector_2 = centred_graphlet_count_vector_2
+    centred_graphlet_counts_1 = centred_graphlet_counts_1,
+    centred_graphlet_counts_2 = centred_graphlet_counts_2
   )
 
   netdis_statistics <- simplify2array(netdis_statistics)
@@ -932,7 +932,7 @@ netdis_subtract_exp_counts <- function(graphlet_counts,
 #' @param graphlet_counts Matrix of graphlet and node counts (columns) for a
 #' nummber of ego networks (rows).
 #' @param density_breaks Density values defining bin edges.
-#' @param density_binned_reference_counts Reference network graphlet counts for
+#' @param density_binned_ref_counts Reference network graphlet counts for
 #' each density bin.
 #' @param max_graphlet_size Determines the maximum size of graphlets to count.
 #' Only graphlets containing up to \code{max_graphlet_size} nodes are counted.
@@ -940,12 +940,12 @@ netdis_subtract_exp_counts <- function(graphlet_counts,
 #' @param scale_fn Optional function to scale calculated expected counts, taking
 #' \code{graphlet_counts} and \code{max_graphlet_size} as arguments,
 #' and returning a scale factor that the looked up
-#' \code{density_binned_reference_counts} values will be multiplied by.
+#' \code{density_binned_ref_counts} values will be multiplied by.
 #'
 #' @export
 netdis_expected_counts <- function(graphlet_counts,
                                    density_breaks,
-                                   density_binned_reference_counts,
+                                   density_binned_ref_counts,
                                    max_graphlet_size,
                                    scale_fn = NULL) {
 
@@ -956,7 +956,7 @@ netdis_expected_counts <- function(graphlet_counts,
     graphlet_counts, 1, netdis_expected_counts_ego,
     max_graphlet_size = max_graphlet_size,
     density_breaks = density_breaks,
-    density_binned_reference_counts = density_binned_reference_counts,
+    density_binned_ref_counts = density_binned_ref_counts,
     scale_fn = scale_fn
   ))
 
@@ -974,17 +974,17 @@ netdis_expected_counts <- function(graphlet_counts,
 #' Only graphlets containing up to \code{max_graphlet_size} nodes are counted.
 #' Currently only size 4 and 5 are supported.
 #' @param density_breaks Density values defining bin edges.
-#' @param density_binned_reference_counts Reference network graphlet counts for
+#' @param density_binned_ref_counts Reference network graphlet counts for
 #' each density bin.
 #' @param scale_fn Optional function to scale calculated expected counts, taking
 #' \code{graphlet_counts} and \code{max_graphlet_size} as arguments, and
 #' returning a scale factor that the looked up
-#' \code{density_binned_reference_counts} values will be multiplied by.
+#' \code{density_binned_ref_counts} values will be multiplied by.
 #'
 netdis_expected_counts_ego <- function(graphlet_counts,
                                        max_graphlet_size,
                                        density_breaks,
-                                       density_binned_reference_counts,
+                                       density_binned_ref_counts,
                                        scale_fn = NULL) {
 
   # Look up average scaled graphlet counts for graphs of similar density
@@ -993,7 +993,7 @@ netdis_expected_counts_ego <- function(graphlet_counts,
   matched_density_index <- interval_index(query_density, density_breaks)
 
   matched_reference_counts <-
-    density_binned_reference_counts[matched_density_index, ]
+    density_binned_ref_counts[matched_density_index, ]
 
   if (!is.null(scale_fn)) {
     # Scale reference counts e.g. by multiplying the
diff --git a/R/measures_net_emd.R b/R/measures_net_emd.R
index e1688ec1..cfd6fa93 100755
--- a/R/measures_net_emd.R
+++ b/R/measures_net_emd.R
@@ -367,7 +367,7 @@ netemd_many_to_many <- function(graphs = NULL,
   # Check arguments 4
   # cross_comparison_spec was coded to require names!
   if (is.null(names(dhists))) {
-    names(dhists) <- paste("Net", 1:length(dhists), sep = "")
+    names(dhists) <- paste("Net", seq_len(dhists), sep = "")
   }
   ## ------------------------------------------------------------------------
   comp_spec <- cross_comparison_spec(dhists)
@@ -493,9 +493,6 @@ netemd_single_pair <- function(dhist1, dhist2, method = "optimise",
   mean1 <- dhist_mean_location(dhist1)
   mean2 <- dhist_mean_location(dhist2)
 
-  var1 <- dhist_variance(dhist1)
-  var2 <- dhist_variance(dhist2)
-
   # Mean centre histograms. This helps with numerical stability as, after
   # variance normalisation, the differences between locations are often small.
   # We want to avoid calculating small differences between large numbers as
diff --git a/R/net_emd_speed_benchmark.R b/R/net_emd_speed_benchmark.R
index 3f37ccb9..da3d6f8c 100644
--- a/R/net_emd_speed_benchmark.R
+++ b/R/net_emd_speed_benchmark.R
@@ -1,4 +1,4 @@
-netEMDSpeedTest <- function() {
+netemd_speed_test <- function() {
   ## load the data
   source_dir <- system.file(file.path("extdata", "random"), package = "netdist")
   print(source_dir)
@@ -11,9 +11,9 @@ netEMDSpeedTest <- function() {
   )
   n1 <- names(graphs)
   lab1 <- c()
-  gddBuildTime <- c()
-  netEMDtime <- c()
-  for (i in 1:length(graphs))
+  gdd_build_time <- c()
+  netemd_time <- c()
+  for (i in seq_len(graphs))
   {
     for (j in 1:(i))
     {
@@ -21,18 +21,18 @@ netEMDSpeedTest <- function() {
       g2 <- graphs[[j]]
       lab1 <- append(lab1, paste(n1[i], n1[j], sep = ","))
       print(paste(n1[i], n1[j], sep = ","))
-      fulltimeStart <- Sys.time()
+      fulltime_start <- Sys.time()
       gdd1 <- gdd(g1)
       gdd2 <- gdd(g2)
-      netEMDStart <- Sys.time()
+      netemd_start <- Sys.time()
       net_emd(gdd1, gdd2)
-      endTime <- Sys.time()
-      gddBuildTime <- append(
-        gddBuildTime,
-        as.double(netEMDStart - fulltimeStart)
+      end_time <- Sys.time()
+      gdd_build_time <- append(
+        gdd_build_time,
+        as.double(netemd_start - fulltime_start)
       )
-      netEMDtime <- append(netEMDtime, as.double(endTime - netEMDStart))
+      net_emd_time <- append(netemd_time, as.double(end_time - netemd_start))
     }
   }
-  list(gddBuildTime = gddBuildTime, netEMDtime = netEMDtime)
+  list(gddBuildTime = gdd_build_time, netEMDtime = netemd_time)
 }
diff --git a/R/orca_interface.R b/R/orca_interface.R
index 10065126..5cfb5b5d 100644
--- a/R/orca_interface.R
+++ b/R/orca_interface.R
@@ -690,7 +690,7 @@ cross_comparison_spec <- function(named_list, how = "many-to-many") {
       2:length(named_list)
     )
   } else {
-    indexes <- as.data.frame(t(utils::combn(1:length(named_list), 2)))
+    indexes <- as.data.frame(t(utils::combn(seq_len(named_list), 2)))
   }
 
   names <- as.data.frame(cbind(
diff --git a/man/netdis.Rd b/man/netdis.Rd
index 6f71662a..a3ed7f4a 100644
--- a/man/netdis.Rd
+++ b/man/netdis.Rd
@@ -4,17 +4,13 @@
 \alias{netdis}
 \title{Netdis - for one graphlet size}
 \usage{
-netdis(
-  centred_graphlet_count_vector_1,
-  centred_graphlet_count_vector_2,
-  graphlet_size
-)
+netdis(centred_graphlet_counts_1, centred_graphlet_counts_2, graphlet_size)
 }
 \arguments{
-\item{centred_graphlet_count_vector_1}{Centred Graphlet Counts vector for
+\item{centred_graphlet_counts_1}{Centred Graphlet Counts vector for
 graph 1}
 
-\item{centred_graphlet_count_vector_2}{Centred Graphlet Counts vector for
+\item{centred_graphlet_counts_2}{Centred Graphlet Counts vector for
 graph 2}
 
 \item{graphlet_size}{The size of graphlets to use for the Netdis calculation
diff --git a/man/netdis_expected_counts.Rd b/man/netdis_expected_counts.Rd
index eed67af7..747840a9 100644
--- a/man/netdis_expected_counts.Rd
+++ b/man/netdis_expected_counts.Rd
@@ -7,7 +7,7 @@
 netdis_expected_counts(
   graphlet_counts,
   density_breaks,
-  density_binned_reference_counts,
+  density_binned_ref_counts,
   max_graphlet_size,
   scale_fn = NULL
 )
@@ -18,7 +18,7 @@ nummber of ego networks (rows).}
 
 \item{density_breaks}{Density values defining bin edges.}
 
-\item{density_binned_reference_counts}{Reference network graphlet counts for
+\item{density_binned_ref_counts}{Reference network graphlet counts for
 each density bin.}
 
 \item{max_graphlet_size}{Determines the maximum size of graphlets to count.
@@ -28,7 +28,7 @@ Currently only size 4 and 5 are supported.}
 \item{scale_fn}{Optional function to scale calculated expected counts, taking
 \code{graphlet_counts} and \code{max_graphlet_size} as arguments,
 and returning a scale factor that the looked up
-\code{density_binned_reference_counts} values will be multiplied by.}
+\code{density_binned_ref_counts} values will be multiplied by.}
 }
 \description{
 Calculates expected graphlet counts for each ego network based on its density
diff --git a/man/netdis_expected_counts_ego.Rd b/man/netdis_expected_counts_ego.Rd
index d2935c27..1272fc18 100644
--- a/man/netdis_expected_counts_ego.Rd
+++ b/man/netdis_expected_counts_ego.Rd
@@ -9,7 +9,7 @@ netdis_expected_counts_ego(
   graphlet_counts,
   max_graphlet_size,
   density_breaks,
-  density_binned_reference_counts,
+  density_binned_ref_counts,
   scale_fn = NULL
 )
 }
@@ -22,13 +22,13 @@ Currently only size 4 and 5 are supported.}
 
 \item{density_breaks}{Density values defining bin edges.}
 
-\item{density_binned_reference_counts}{Reference network graphlet counts for
+\item{density_binned_ref_counts}{Reference network graphlet counts for
 each density bin.}
 
 \item{scale_fn}{Optional function to scale calculated expected counts, taking
 \code{graphlet_counts} and \code{max_graphlet_size} as arguments, and
 returning a scale factor that the looked up
-\code{density_binned_reference_counts} values will be multiplied by.}
+\code{density_binned_ref_counts} values will be multiplied by.}
 }
 \description{
 Calculates expected graphlet counts for one ego network based on its density
diff --git a/man/netdis.plot.Rd b/man/netdis_plot.Rd
similarity index 95%
rename from man/netdis.plot.Rd
rename to man/netdis_plot.Rd
index c5bb8bd6..fb937505 100644
--- a/man/netdis.plot.Rd
+++ b/man/netdis_plot.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/PlottingFunctions.R
-\name{netdis.plot}
-\alias{netdis.plot}
+\name{netdis_plot}
+\alias{netdis_plot}
 \title{Heatmap of Netdis many-to-many comparisons}
 \usage{
-netdis.plot(
+netdis_plot(
   netdislist,
   whatrow = c(1, 2)[2],
   clustering_method = "ward.D",
diff --git a/man/netdis_uptok.Rd b/man/netdis_uptok.Rd
index 98ddf371..e4f7a51e 100644
--- a/man/netdis_uptok.Rd
+++ b/man/netdis_uptok.Rd
@@ -5,16 +5,16 @@
 \title{Netdis - for all graphlet sizes up to max_graphlet_size}
 \usage{
 netdis_uptok(
-  centred_graphlet_count_vector_1,
-  centred_graphlet_count_vector_2,
+  centred_graphlet_counts_1,
+  centred_graphlet_counts_2,
   max_graphlet_size
 )
 }
 \arguments{
-\item{centred_graphlet_count_vector_1}{Centred Graphlet Counts  vector for
+\item{centred_graphlet_counts_1}{Centred Graphlet Counts  vector for
 graph 1}
 
-\item{centred_graphlet_count_vector_2}{Centred Graphlet Counts vector for
+\item{centred_graphlet_counts_2}{Centred Graphlet Counts vector for
 graph 2}
 
 \item{max_graphlet_size}{max graphlet size to calculate Netdis for.
diff --git a/man/netemd.plot.Rd b/man/netemd_plot.Rd
similarity index 95%
rename from man/netemd.plot.Rd
rename to man/netemd_plot.Rd
index 91177187..e8738f1e 100644
--- a/man/netemd.plot.Rd
+++ b/man/netemd_plot.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/PlottingFunctions.R
-\name{netemd.plot}
-\alias{netemd.plot}
+\name{netemd_plot}
+\alias{netemd_plot}
 \title{Heatmap of NetEmd many-to-many comparisons}
 \usage{
-netemd.plot(
+netemd_plot(
   netemdlist,
   clustering_method = "ward.D",
   main = "NetEmd",
diff --git a/man/virusppi.Rd b/man/virusppi.Rd
index 6c905b2c..174a2d04 100644
--- a/man/virusppi.Rd
+++ b/man/virusppi.Rd
@@ -9,11 +9,19 @@
 A list of \code{igraph} objects.
 }
 \source{
-\strong{PPI data (EBV, HSV-1, KSHV, VZV):} Fossum E, Friedel CC, Rajagopala SV, Titz B, Baiker A, Schmidt T, et al. (2009) Evolutionarily Conserved Herpesviral Protein Interaction Networks. PLoS Pathog 5(9): e1000570. \url{https://doi.org/10.1371/journal.ppat.1000570}. Data from Table S2 in the supporting information.
+\strong{PPI data (EBV, HSV-1, KSHV, VZV):} Fossum E, Friedel CC,
+Rajagopala SV, Titz B, Baiker A, Schmidt T, et al. (2009) Evolutionarily
+Conserved Herpesviral Protein Interaction Networks. PLoS Pathog 5(9):
+e1000570. \url{https://doi.org/10.1371/journal.ppat.1000570}. Data from Table
+S2 in the supporting information.
 
-\strong{PPI data (ECL):} Peregrín-Alvarez JM, Xiong X, Su C, Parkinson J (2009) The Modular Organization of Protein Interactions in Escherichia coli. PLoS Comput Biol 5(10): e1000523. \url{https://doi.org/10.1371/journal.pcbi.1000523}
+\strong{PPI data (ECL):} Peregrín-Alvarez JM, Xiong X, Su C,
+Parkinson J (2009) The Modular Organization of Protein Interactions in
+Escherichia coli. PLoS Comput Biol 5(10): e1000523.
+\url{https://doi.org/10.1371/journal.pcbi.1000523}
 
-\strong{Taxonomy ground truth:} NCBI taxonomy database. \url{https://www.ncbi.nlm.nih.gov/taxonomy}
+\strong{Taxonomy ground truth:} NCBI taxonomy database.
+\url{https://www.ncbi.nlm.nih.gov/taxonomy}
 }
 \usage{
 virusppi
diff --git a/man/worldtradesub.Rd b/man/worldtradesub.Rd
index aa537cac..2483480d 100644
--- a/man/worldtradesub.Rd
+++ b/man/worldtradesub.Rd
@@ -6,23 +6,35 @@
 \alias{worldtradesub}
 \title{World trade networks from 1985–2014}
 \format{
-A list of two elements. The first element, 'wtnets', is a list of \code{igraph} objects providing a small sample of world trade networks from 2001–2014. The second element, 'Counts', is a list of pre-computed subgraph counts of world trade networks in the years 1985-2014.
+A list of two elements. The first element, 'wtnets', is a list of
+\code{igraph} objects providing a small sample of world trade networks from
+2001–2014. The second element, 'Counts', is a list of pre-computed subgraph
+counts of world trade networks in the years 1985-2014.
 }
 \source{
-\strong{World trade networks:}. United nations commodity trade statistics database (UN comtrade). http://comtrade.un.org/, 2015.
+\strong{World trade networks:}. United nations commodity trade
+statistics database (UN comtrade). http://comtrade.un.org/, 2015.
 
-\strong{Subgraph Counts:} Feenstra RC,Lipsey RE, Deng H, Ma AC, and Mo H. (2005) World trade flows: 1962-2000. Technical report, National Bureau of Economic Research. (See also https://cid.econ.ucdavis.edu/wix.html).
+\strong{Subgraph Counts:} Feenstra RC,Lipsey RE, Deng H, Ma AC, and
+Mo H. (2005) World trade flows: 1962-2000. Technical report, National Bureau
+of Economic Research. (See also https://cid.econ.ucdavis.edu/wix.html).
 }
 \usage{
 worldtradesub
 }
 \description{
-The world trade data set consists of a small sample of world trade networks for the years 2001-2014, and pre-computed subgraph counts of a larger set of world trade networks (1985–2014). The world trade networks are based on the data set from [Feenstra et al., 2005] for the years 1962- 2000 and on the United Nations division COMTRADE [Division, 2015] for the years 2001-2014.
+The world trade data set consists of a small sample of world trade networks
+for the years 2001-2014, and pre-computed subgraph counts of a larger set of
+world trade networks (1985–2014). The world trade networks are based on the
+data set from [Feenstra et al., 2005] for the years 1962- 2000 and on the
+United Nations division COMTRADE [Division, 2015] for the years 2001-2014.
 }
 \details{
 \itemize{
- \item wtnets:  List of \code{igraph} objects providing the world trade networks from 2001–2014.
- \item Counts:  Pre-computed graphlet counts for the world trade networks in the years 1985-2014.
+ \item wtnets:  List of \code{igraph} objects providing the world trade
+ networks from 2001–2014.
+ \item Counts:  Pre-computed graphlet counts for the world trade networks in
+ the years 1985-2014.
  }
 }
 \keyword{datasets}
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
index a1fe416b..d47fea91 100644
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -5,6 +5,11 @@
 
 using namespace Rcpp;
 
+#ifdef RCPP_USE_GLOBAL_ROSTREAM
+Rcpp::Rostream<true>&  Rcpp::Rcout = Rcpp::Rcpp_cout_get();
+Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
+#endif
+
 // counts_from_observations
 NumericMatrix counts_from_observations(NumericMatrix features);
 RcppExport SEXP _netdist_counts_from_observations(SEXP featuresSEXP) {
diff --git a/tests/testthat/test_dhist.R b/tests/testthat/test_dhist.R
index 66ce6574..c7c63b01 100644
--- a/tests/testthat/test_dhist.R
+++ b/tests/testthat/test_dhist.R
@@ -46,19 +46,19 @@ test_that(
       # the bug where we generated test locations with zero counts was so
       # annoying to identify that we're going with a belt and braces approach)
       non_zero_count_indexes <- counts != 0
-      expected_locations <- locations[non_zero_count_indexes]
-      expected_counts <- counts[non_zero_count_indexes]
+      exp_locations <- locations[non_zero_count_indexes]
+      exp_counts <- counts[non_zero_count_indexes]
       # dhist_from_obs will return results with bins ordered by ascending
       # location, so sort expected data to match
-      sorted_locations <- sort(expected_locations, index.return = TRUE)
+      sorted_locations <- sort(exp_locations, index.return = TRUE)
       sorted_location_indexes <- sorted_locations$ix
-      expected_locations <- expected_locations[sorted_location_indexes]
-      expected_counts <- expected_counts[sorted_location_indexes]
+      exp_locations <- exp_locations[sorted_location_indexes]
+      exp_counts <- exp_counts[sorted_location_indexes]
 
       # Check that histogram locations and counts match those used to generate
       # the observations
-      expect_true(all.equal(hist$locations, expected_locations))
-      expect_true(all.equal(hist$masses, expected_counts))
+      expect_true(all.equal(hist$locations, exp_locations))
+      expect_true(all.equal(hist$masses, exp_counts))
     }
 
     for (i in 1:num_tests) {
@@ -83,20 +83,20 @@ test_that(
     masses2 <- c(23, 24, 26, 22, 21, 25)
     actual2 <- dhist(locations = locations2, masses = masses2, sorted = FALSE)
 
-    expected_class <- "dhist"
-    expected_smoothing_window_width <- 0
+    exp_class <- "dhist"
+    exp_smoothing_window_width <- 0
 
     expected1 <- list(
       locations = locations1, masses = masses1,
-      smoothing_window_width = expected_smoothing_window_width
+      smoothing_window_width = exp_smoothing_window_width
     )
-    class(expected1) <- expected_class
+    class(expected1) <- exp_class
 
     expected2 <- list(
       locations = locations2, masses = masses2,
-      smoothing_window_width = expected_smoothing_window_width
+      smoothing_window_width = exp_smoothing_window_width
     )
-    class(expected2) <- expected_class
+    class(expected2) <- exp_class
 
     expect_equal(actual1, expected1)
     expect_equal(actual2, expected2)
@@ -116,22 +116,22 @@ test_that(
     masses2 <- c(23, 24, 26, 22, 21, 25)
     actual2 <- dhist(locations = locations2, masses = masses2, sorted = TRUE)
 
-    expected_class <- "dhist"
-    expected_smoothing_window_width <- 0
+    exp_class <- "dhist"
+    exp_smoothing_window_width <- 0
 
     expected1 <- list(
       locations = c(1, 7, 9, 21, 42, 101),
       masses = c(16, 15, 14, 13, 12, 11),
-      smoothing_window_width = expected_smoothing_window_width
+      smoothing_window_width = exp_smoothing_window_width
     )
-    class(expected1) <- expected_class
+    class(expected1) <- exp_class
 
     expected2 <- list(
       locations = c(-62, -58, 0, 3, 7, 16),
       masses = c(26, 25, 24, 23, 22, 21),
-      smoothing_window_width = expected_smoothing_window_width
+      smoothing_window_width = exp_smoothing_window_width
     )
-    class(expected2) <- expected_class
+    class(expected2) <- exp_class
 
     expect_equal(actual1, expected1)
     expect_equal(actual2, expected2)
@@ -151,22 +151,22 @@ test_that(
     masses2 <- c(23, 24, 26, 22, 21, 25)
     actual2 <- dhist(locations = locations2, masses = masses2)
 
-    expected_class <- "dhist"
-    expected_smoothing_window_width <- 0
+    exp_class <- "dhist"
+    exp_smoothing_window_width <- 0
 
     expected1 <- list(
       locations = c(1, 7, 9, 21, 42, 101),
       masses = c(16, 15, 14, 13, 12, 11),
-      smoothing_window_width = expected_smoothing_window_width
+      smoothing_window_width = exp_smoothing_window_width
     )
-    class(expected1) <- expected_class
+    class(expected1) <- exp_class
 
     expected2 <- list(
       locations = c(-62, -58, 0, 3, 7, 16),
       masses = c(26, 25, 24, 23, 22, 21),
-      smoothing_window_width = expected_smoothing_window_width
+      smoothing_window_width = exp_smoothing_window_width
     )
-    class(expected2) <- expected_class
+    class(expected2) <- exp_class
 
     expect_equal(actual1, expected1)
     expect_equal(actual2, expected2)
@@ -194,19 +194,19 @@ test_that(
       smoothing_window_width = smoothing_window_width, sorted = FALSE
     )
 
-    expected_class <- "dhist"
+    exp_class <- "dhist"
 
     expected1 <- list(
       locations = locations1, masses = masses1,
       smoothing_window_width = smoothing_window_width
     )
-    class(expected1) <- expected_class
+    class(expected1) <- exp_class
 
     expected2 <- list(
       locations = locations2, masses = masses2,
       smoothing_window_width = smoothing_window_width
     )
-    class(expected2) <- expected_class
+    class(expected2) <- exp_class
 
     expect_equal(actual1, expected1)
     expect_equal(actual2, expected2)
@@ -234,21 +234,21 @@ test_that(
       smoothing_window_width = smoothing_window_width, sorted = TRUE
     )
 
-    expected_class <- "dhist"
+    exp_class <- "dhist"
 
     expected1 <- list(
       locations = c(1, 7, 9, 21, 42, 101),
       masses = c(16, 15, 14, 13, 12, 11),
       smoothing_window_width = smoothing_window_width
     )
-    class(expected1) <- expected_class
+    class(expected1) <- exp_class
 
     expected2 <- list(
       locations = c(-62, -58, 0, 3, 7, 16),
       masses = c(26, 25, 24, 23, 22, 21),
       smoothing_window_width = smoothing_window_width
     )
-    class(expected2) <- expected_class
+    class(expected2) <- exp_class
 
     expect_equal(actual1, expected1)
     expect_equal(actual2, expected2)
@@ -276,21 +276,21 @@ test_that(
       smoothing_window_width = smoothing_window_width
     )
 
-    expected_class <- "dhist"
+    exp_class <- "dhist"
 
     expected1 <- list(
       locations = c(1, 7, 9, 21, 42, 101),
       masses = c(16, 15, 14, 13, 12, 11),
       smoothing_window_width = smoothing_window_width
     )
-    class(expected1) <- expected_class
+    class(expected1) <- exp_class
 
     expected2 <- list(
       locations = c(-62, -58, 0, 3, 7, 16),
       masses = c(26, 25, 24, 23, 22, 21),
       smoothing_window_width = smoothing_window_width
     )
-    class(expected2) <- expected_class
+    class(expected2) <- exp_class
 
     expect_equal(actual1, expected1)
     expect_equal(actual2, expected2)
@@ -301,20 +301,20 @@ test_that("as_smoothed_dhist sets smoothing_window_width correctly", {
   dhist_pre <- dhist(locations <- c(7, 42, 1, 21, 101, 9),
     masses = c(15, 12, 16, 13, 11, 14)
   )
-  expected_smoothing_window_width_pre <- 0
-  expected_smoothing_window_width_post <- 1
+  exp_smooth_window_width_pre <- 0
+  exp_smooth_window_width_post <- 1
 
   expect_equal(
     dhist_pre$smoothing_window_width,
-    expected_smoothing_window_width_pre
+    exp_smooth_window_width_pre
   )
   dhist_post <- as_smoothed_dhist(
     dhist_pre,
-    expected_smoothing_window_width_post
+    exp_smooth_window_width_post
   )
   expect_equal(
     dhist_post$smoothing_window_width,
-    expected_smoothing_window_width_post
+    exp_smooth_window_width_post
   )
 })
 
@@ -323,20 +323,20 @@ test_that("as_unsmoothed_dhist sets smoothing_window_width correctly", {
     masses = c(15, 12, 16, 13, 11, 14),
     smoothing_window_width <- 1
   )
-  expected_smoothing_window_width_pre <- 1
-  expected_smoothing_window_width_post <- 0
+  exp_smooth_window_width_pre <- 1
+  exp_smooth_window_width_post <- 0
 
   expect_equal(
     dhist_pre$smoothing_window_width,
-    expected_smoothing_window_width_pre
+    exp_smooth_window_width_pre
   )
   dhist_post <- as_smoothed_dhist(
     dhist_pre,
-    expected_smoothing_window_width_post
+    exp_smooth_window_width_post
   )
   expect_equal(
     dhist_post$smoothing_window_width,
-    expected_smoothing_window_width_post
+    exp_smooth_window_width_post
   )
 })
 
@@ -390,23 +390,23 @@ test_that(
     )
     # Be careful: ensure that no smoothing window width results in overlapping
     # bins
-    smoothing_window_width_A <- 1
-    smoothing_window_width_B <- 2
+    smoothing_window_width_a <- 1
+    smoothing_window_width_b <- 2
     dhist_unsmoothed <- as_unsmoothed_dhist(dhist)
-    dhist_smoothed_A <- as_smoothed_dhist(dhist, smoothing_window_width_A)
-    dhist_smoothed_B <- as_smoothed_dhist(dhist, smoothing_window_width_B)
+    dhist_smoothed_a <- as_smoothed_dhist(dhist, smoothing_window_width_a)
+    dhist_smoothed_b <- as_smoothed_dhist(dhist, smoothing_window_width_b)
 
     var_unsmoothed <- dhist_variance(dhist_unsmoothed)
-    var_smoothed_A <- dhist_variance(dhist_smoothed_A)
-    var_smoothed_B <- dhist_variance(dhist_smoothed_B)
+    var_smoothed_a <- dhist_variance(dhist_smoothed_a)
+    var_smoothed_b <- dhist_variance(dhist_smoothed_b)
 
-    expected_var_smoothed_A <- var_unsmoothed +
-      ((smoothing_window_width_A^2) / 12)
-    expected_var_smoothed_B <- var_unsmoothed +
-      ((smoothing_window_width_B^2) / 12)
+    exp_var_smoothed_a <- var_unsmoothed +
+      ((smoothing_window_width_a^2) / 12)
+    exp_var_smoothed_b <- var_unsmoothed +
+      ((smoothing_window_width_b^2) / 12)
 
-    expect_equal(var_smoothed_A, expected_var_smoothed_A)
-    expect_equal(var_smoothed_B, expected_var_smoothed_B)
+    expect_equal(var_smoothed_a, exp_var_smoothed_a)
+    expect_equal(var_smoothed_b, exp_var_smoothed_b)
   }
 )
 
@@ -462,10 +462,10 @@ test_that("normalise_dhist_mass output sums to 1", {
       smoothing_window_width = smoothing_window_width
     ))
   })
-  expected_total_mass <- 1
+  exp_total_mass <- 1
   # Check total masses match expectations
   purrr::map_dbl(normalised_dhists, function(dhist) {
-    expect_equal(sum(dhist$masses), expected_total_mass)
+    expect_equal(sum(dhist$masses), exp_total_mass)
   })
   # Check other histogram properties unchanged
   purrr::walk(normalised_dhists, function(dhist) {
@@ -513,7 +513,7 @@ test_that(
       smoothing_window_width = smoothing_window_width
     )
 
-    expected_post_norm_smoothing_windows <- purrr::map_dbl(
+    exp_post_norm_smooth_windows <- purrr::map_dbl(
       rand_dhists_smoothed,
       function(dhist) {
         smoothing_window_width / dhist_std(dhist)
@@ -529,13 +529,13 @@ test_that(
     actual_dhist_smoothed <- purrr::map(rand_dhists_smoothed, function(dhist) {
       normalise_dhist_variance(dhist)
     })
-    expected_variance <- 1
+    exp_variance <- 1
     # Check variance of normalised hostograms is as expected
     purrr::walk(actual_dhist_unsmoothed, function(dhist) {
-      expect_equal(dhist_variance(dhist), expected_variance)
+      expect_equal(dhist_variance(dhist), exp_variance)
     })
     purrr::walk(actual_dhist_smoothed, function(dhist) {
-      expect_equal(dhist_variance(dhist), expected_variance)
+      expect_equal(dhist_variance(dhist), exp_variance)
     })
     # Check smoothing window is as expected (0 for unsmoothe; smoothing_window
     # width/sigma for smoothed)
@@ -543,7 +543,7 @@ test_that(
       expect_equal(dhist$smoothing_window_width, 0)
     })
     purrr::walk2(
-      actual_dhist_smoothed, expected_post_norm_smoothing_windows,
+      actual_dhist_smoothed, exp_post_norm_smooth_windows,
       function(dhist, sww) {
         expect_equal(dhist$smoothing_window_width, sww)
       }
@@ -655,18 +655,18 @@ test_that(
 
     cum_masses1 <- cumsum(dhist1$masses)
     max_cum_mass <- cum_masses1[length(cum_masses1)]
-    expected_knots_ecds1 <- cum_masses1
-    expected_inter_knots_ecds1 <- head(
-      expected_knots_ecds1,
-      length(expected_knots_ecds1) - 1
-    )
-    expected_extra_knots_ecds1 <- c(0, max_cum_mass)
-    expected_knots1 <- dhist1$locations
-
-    expect_equal(actual_knots1, expected_knots1)
-    expect_equal(actual_knots_ecds1, expected_knots_ecds1)
-    expect_equal(actual_inter_knots_ecds1, expected_inter_knots_ecds1)
-    expect_equal(actual_extra_knots_ecds1, expected_extra_knots_ecds1)
+    exp_knots_ecds1 <- cum_masses1
+    exp_inter_knots_ecds1 <- head(
+      exp_knots_ecds1,
+      length(exp_knots_ecds1) - 1
+    )
+    exp_extra_knots_ecds1 <- c(0, max_cum_mass)
+    exp_knots1 <- dhist1$locations
+
+    expect_equal(actual_knots1, exp_knots1)
+    expect_equal(actual_knots_ecds1, exp_knots_ecds1)
+    expect_equal(actual_inter_knots_ecds1, exp_inter_knots_ecds1)
+    expect_equal(actual_extra_knots_ecds1, exp_extra_knots_ecds1)
   }
 )
 
@@ -682,39 +682,39 @@ test_that(
     # half-integer grid
     # Smoothed and unsmoothed ECMF cumulative masses are on integer grid
     # Smoothed ECMF crossing points are on a quarter-integer grid
-    dhistA <- dhist(locations = c(1, 3, 4), masses = c(2, 1, 1))
-    dhistB <- dhist(locations = c(0, 2, 4, 5), masses = c(0.5, 2, 0.5, 1))
+    dhist_a <- dhist(locations = c(1, 3, 4), masses = c(2, 1, 1))
+    dhist_b <- dhist(locations = c(0, 2, 4, 5), masses = c(0.5, 2, 0.5, 1))
 
     # Set up smoothed and unsmoothed versions of histograms
     smoothing_window_width <- 1
-    dhistA_unsmoothed <- as_unsmoothed_dhist(dhistA)
-    dhistB_unsmoothed <- as_unsmoothed_dhist(dhistB)
-    dhistA_smoothed <- as_smoothed_dhist(dhistA, smoothing_window_width)
-    dhistB_smoothed <- as_smoothed_dhist(dhistB, smoothing_window_width)
+    dhist_a_unsmoothed <- as_unsmoothed_dhist(dhist_a)
+    dhist_b_unsmoothed <- as_unsmoothed_dhist(dhist_b)
+    dhist_a_smoothed <- as_smoothed_dhist(dhist_a, smoothing_window_width)
+    dhist_b_smoothed <- as_smoothed_dhist(dhist_b, smoothing_window_width)
 
     # Set expected area
-    expected_area_unsmoothed <- 4
-    expected_area_smoothed <- 3
+    exp_area_unsmoothed <- 4
+    exp_area_smoothed <- 3
 
     # Generate ecmfs
-    ecmfA_unsmoothed <- dhist_ecmf(dhistA_unsmoothed)
-    ecmfB_unsmoothed <- dhist_ecmf(dhistB_unsmoothed)
-    ecmfA_smoothed <- dhist_ecmf(dhistA_smoothed)
-    ecmfB_smoothed <- dhist_ecmf(dhistB_smoothed)
+    ecmf_a_unsmoothed <- dhist_ecmf(dhist_a_unsmoothed)
+    ecmf_b_unsmoothed <- dhist_ecmf(dhist_b_unsmoothed)
+    ecmf_a_smoothed <- dhist_ecmf(dhist_a_smoothed)
+    ecmf_b_smoothed <- dhist_ecmf(dhist_b_smoothed)
 
     # Calculate area between ECMFs
     actual_area_unsmoothed <- area_between_dhist_ecmfs(
-      ecmfA_unsmoothed,
-      ecmfB_unsmoothed
+      ecmf_a_unsmoothed,
+      ecmf_b_unsmoothed
     )
     actual_area_smoothed <- area_between_dhist_ecmfs(
-      ecmfA_smoothed,
-      ecmfB_smoothed
+      ecmf_a_smoothed,
+      ecmf_b_smoothed
     )
 
     # Compare caculated areas with expected areas
-    expect_equal(actual_area_unsmoothed, expected_area_unsmoothed)
-    expect_equal(actual_area_smoothed, expected_area_smoothed)
+    expect_equal(actual_area_unsmoothed, exp_area_unsmoothed)
+    expect_equal(actual_area_smoothed, exp_area_smoothed)
   }
 )
 
@@ -729,24 +729,24 @@ test_that(
     # Previous simple integer grid where both histograms have been separately
     # normalised to unit mass and variance. Has locations and masses at a range
     # of floating point locations. Has bowties, triangles and trapeziums.
-    dhistA <- dhist(locations = c(1, 3, 4), masses = c(2, 1, 1))
-    dhistB <- dhist(locations = c(0, 2, 4, 5), masses = c(0.5, 2, 0.5, 1))
+    dhist_a <- dhist(locations = c(1, 3, 4), masses = c(2, 1, 1))
+    dhist_b <- dhist(locations = c(0, 2, 4, 5), masses = c(0.5, 2, 0.5, 1))
 
-    dhistA <- normalise_dhist_mass(normalise_dhist_variance(dhistA))
-    dhistB <- normalise_dhist_mass(normalise_dhist_variance(dhistB))
+    dhist_a <- normalise_dhist_mass(normalise_dhist_variance(dhist_a))
+    dhist_b <- normalise_dhist_mass(normalise_dhist_variance(dhist_b))
 
     # Set up smoothed and unsmoothed versions of histograms
     smoothing_window_width <- 1
-    dhistA_unsmoothed <- as_unsmoothed_dhist(dhistA)
-    dhistB_unsmoothed <- as_unsmoothed_dhist(dhistB)
-    dhistA_smoothed <- as_smoothed_dhist(dhistA, smoothing_window_width)
-    dhistB_smoothed <- as_smoothed_dhist(dhistB, smoothing_window_width)
+    dhist_a_unsmoothed <- as_unsmoothed_dhist(dhist_a)
+    dhist_b_unsmoothed <- as_unsmoothed_dhist(dhist_b)
+    dhist_a_smoothed <- as_smoothed_dhist(dhist_a, smoothing_window_width)
+    dhist_b_smoothed <- as_smoothed_dhist(dhist_b, smoothing_window_width)
 
     # Generate ecmfs
-    ecmfA_unsmoothed <- dhist_ecmf(dhistA_unsmoothed)
-    ecmfB_unsmoothed <- dhist_ecmf(dhistB_unsmoothed)
-    ecmfA_smoothed <- dhist_ecmf(dhistA_smoothed)
-    ecmfB_smoothed <- dhist_ecmf(dhistB_smoothed)
+    ecmf_a_unsmoothed <- dhist_ecmf(dhist_a_unsmoothed)
+    ecmf_b_unsmoothed <- dhist_ecmf(dhist_b_unsmoothed)
+    ecmf_a_smoothed <- dhist_ecmf(dhist_a_smoothed)
+    ecmf_b_smoothed <- dhist_ecmf(dhist_b_smoothed)
 
     # Define some functions to make calculation of manually measured areas
     # easier
@@ -764,60 +764,66 @@ test_that(
     # Actual grid counts preserved in data to facilitate less tedious manual
     # checking if required
     # --- Unsmoothed ---
-    area_A_unsmoothed <- rectangle_area(width = 10 * 0.02, height = 12.5 * 0.01)
-    area_B_unsmoothed <- rectangle_area(width = 50.5 * 0.02, height = 37.5 * 0.01)
-    area_C_unsmoothed <- rectangle_area(width = 26 * 0.02, height = 12.5 * 0.01)
-    area_D_unsmoothed <- rectangle_area(width = 34.5 * 0.02, height = 12.5 * 0.01)
-    area_E_unsmoothed <- rectangle_area(width = 26.5 * 0.02, height = 25 * 0.01)
-    expected_area_unsmoothed <-
+    area_a_unsmoothed <- rectangle_area(width = 10 * 0.02, height = 12.5 * 0.01)
+    area_b_unsmoothed <- rectangle_area(
+      width = 50.5 * 0.02, height = 37.5 * 0.01
+    )
+    area_c_unsmoothed <- rectangle_area(
+      width = 26 * 0.02, height = 12.5 * 0.01
+    )
+    area_d_unsmoothed <- rectangle_area(
+      width = 34.5 * 0.02, height = 12.5 * 0.01
+    )
+    area_e_unsmoothed <- rectangle_area(width = 26.5 * 0.02, height = 25 * 0.01)
+    exp_area_unsmoothed <-
       sum(
-        area_A_unsmoothed, area_B_unsmoothed, area_C_unsmoothed,
-        area_D_unsmoothed, area_E_unsmoothed
+        area_a_unsmoothed, area_b_unsmoothed, area_c_unsmoothed,
+        area_d_unsmoothed, area_e_unsmoothed
       )
     # --- Smoothed ---
-    area_A_smoothed <- triangle_area(base = 2.75 * 0.01, height = 6.5 * 0.02)
-    area_B_smoothed <- triangle_area(base = 2.75 * 0.01, height = 3 * 0.02)
-    area_C_smoothed <- triangle_area(base = 18.5 * 0.01, height = 21 * 0.02)
-    area_D_smoothed <- trapezium_area(
+    area_a_smoothed <- triangle_area(base = 2.75 * 0.01, height = 6.5 * 0.02)
+    area_b_smoothed <- triangle_area(base = 2.75 * 0.01, height = 3 * 0.02)
+    area_c_smoothed <- triangle_area(base = 18.5 * 0.01, height = 21 * 0.02)
+    area_d_smoothed <- trapezium_area(
       side_a = 18.5 * 0.01,
       side_b = 37.5 * 0.01,
       height = 14.5 * 0.02
     )
-    area_E_smoothed <- trapezium_area(
+    area_e_smoothed <- trapezium_area(
       side_a = 37.5 * 0.01,
       side_b = 37.5 * 0.01,
       height = 16 * 0.02
     )
-    area_F_smoothed <- triangle_area(base = 37.5 * 0.01, height = 22.5 * 0.02)
-    area_G_smoothed <- triangle_area(base = 7.5 * 0.01, height = 8 * 0.02)
-    area_H_smoothed <- triangle_area(base = 7.5 * 0.01, height = 11 * 0.02)
-    area_I_smoothed <- triangle_area(base = 12.5 * 0.01, height = 19.5 * 0.02)
-    area_J_smoothed <- trapezium_area(
+    area_f_smoothed <- triangle_area(base = 37.5 * 0.01, height = 22.5 * 0.02)
+    area_g_smoothed <- triangle_area(base = 7.5 * 0.01, height = 8 * 0.02)
+    area_h_smoothed <- triangle_area(base = 7.5 * 0.01, height = 11 * 0.02)
+    area_i_smoothed <- triangle_area(base = 12.5 * 0.01, height = 19.5 * 0.02)
+    area_j_smoothed <- trapezium_area(
       side_a = 12.5 * 0.01,
       side_b = 20 * 0.01,
       height = 30.5 * 0.02
     )
-    area_K_smoothed <- trapezium_area(
+    area_k_smoothed <- trapezium_area(
       side_a = 20 * 0.01,
       side_b = 18 * 0.01,
       height = 8 * 0.02
     )
-    area_L_smoothed <- triangle_area(base = 18 * 0.01, height = 22 * 0.02)
-    expected_area_smoothed <-
+    area_l_smoothed <- triangle_area(base = 18 * 0.01, height = 22 * 0.02)
+    exp_area_smoothed <-
       sum(
-        area_A_smoothed, area_B_smoothed, area_C_smoothed, area_D_smoothed,
-        area_E_smoothed, area_F_smoothed, area_G_smoothed, area_H_smoothed,
-        area_I_smoothed, area_J_smoothed, area_K_smoothed, area_L_smoothed
+        area_a_smoothed, area_b_smoothed, area_c_smoothed, area_d_smoothed,
+        area_e_smoothed, area_f_smoothed, area_g_smoothed, area_h_smoothed,
+        area_i_smoothed, area_j_smoothed, area_k_smoothed, area_l_smoothed
       )
 
     # Calculate area between ECMFs
     actual_area_unsmoothed <- area_between_dhist_ecmfs(
-      ecmfA_unsmoothed,
-      ecmfB_unsmoothed
+      ecmf_a_unsmoothed,
+      ecmf_b_unsmoothed
     )
     actual_area_smoothed <- area_between_dhist_ecmfs(
-      ecmfA_smoothed,
-      ecmfB_smoothed
+      ecmf_a_smoothed,
+      ecmf_b_smoothed
     )
 
     # Compare caculated areas with expected areas
@@ -828,8 +834,8 @@ test_that(
 
     # Given manual measurement of areas between curves, consider area correct
     # if actual and expected areas are within 1% of each other
-    expect_equalish_manual(actual_area_unsmoothed, expected_area_unsmoothed, 0.01)
-    expect_equalish_manual(actual_area_smoothed, expected_area_smoothed, 0.01)
+    expect_equalish_manual(actual_area_unsmoothed, exp_area_unsmoothed, 0.01)
+    expect_equalish_manual(actual_area_smoothed, exp_area_smoothed, 0.01)
   }
 )
 
diff --git a/tests/testthat/test_emd.R b/tests/testthat/test_emd.R
index 3c782aec..e68f2838 100644
--- a/tests/testthat/test_emd.R
+++ b/tests/testthat/test_emd.R
@@ -73,8 +73,8 @@ test_that("EMD methods return numBins/2 when offsetting a symmetric discrete
   # Triangle(4, even), shifting by changing masses
   bin_masses1 <- c(0, 1, 2, 3, 4, 4, 3, 2, 1, 0)
   bin_masses2 <- c(0, 0, 1, 2, 3, 4, 4, 3, 2, 1)
-  bin_centres1 <- 1:length(bin_masses1)
-  bin_centres2 <- 1:length(bin_masses2)
+  bin_centres1 <- seq_len(bin_masses1)
+  bin_centres2 <- seq_len(bin_masses2)
   histogram1 <- dhist(masses = bin_masses1, locations = bin_centres1)
   histogram2 <- dhist(masses = bin_masses2, locations = bin_centres2)
   num_nonzero_bins <- sum(bin_masses1 > 0)
@@ -90,8 +90,8 @@ test_that("EMD methods return numBins/2 when offsetting a symmetric discrete
   # Triangle(4, even), shifting by changing centres
   bin_masses1 <- c(0, 1, 2, 3, 4, 4, 3, 2, 1, 0)
   bin_masses2 <- c(0, 1, 2, 3, 4, 4, 3, 2, 1, 0)
-  bin_centres1 <- 1:length(bin_masses1)
-  bin_centres2 <- 1:length(bin_masses2) + 1
+  bin_centres1 <- seq_len(bin_masses1)
+  bin_centres2 <- seq_len(bin_masses2) + 1
   histogram1 <- dhist(masses = bin_masses1, locations = bin_centres1)
   histogram2 <- dhist(masses = bin_masses2, locations = bin_centres2)
   num_nonzero_bins <- sum(bin_masses1 > 0)
@@ -106,8 +106,8 @@ test_that("EMD methods return numBins/2 when offsetting a symmetric discrete
   # Triangle(5, odd), shifting by changing masses
   bin_masses1 <- c(0, 1, 2, 3, 4, 5, 4, 3, 2, 1, 0)
   bin_masses2 <- c(0, 0, 1, 2, 3, 4, 5, 4, 3, 2, 1)
-  bin_centres1 <- 1:length(bin_masses1)
-  bin_centres2 <- 1:length(bin_masses2)
+  bin_centres1 <- seq_len(bin_masses1)
+  bin_centres2 <- seq_len(bin_masses2)
   histogram1 <- dhist(masses = bin_masses1, locations = bin_centres1)
   histogram2 <- dhist(masses = bin_masses2, locations = bin_centres2)
   expected <- cost_fn(sum(bin_masses1 > 0))
@@ -121,8 +121,8 @@ test_that("EMD methods return numBins/2 when offsetting a symmetric discrete
   # Triangle(5, odd), shifting by changing masses
   bin_masses1 <- c(0, 1, 2, 3, 4, 5, 4, 3, 2, 1, 0)
   bin_masses2 <- c(0, 1, 2, 3, 4, 5, 4, 3, 2, 1, 0)
-  bin_centres1 <- 1:length(bin_masses1)
-  bin_centres2 <- 1:length(bin_masses2) + 1
+  bin_centres1 <- seq_len(bin_masses1)
+  bin_centres2 <- seq_len(bin_masses2) + 1
   histogram1 <- dhist(masses = bin_masses1, locations = bin_centres1)
   histogram2 <- dhist(masses = bin_masses2, locations = bin_centres2)
   expected <- cost_fn(sum(bin_masses1 > 0))
@@ -136,8 +136,8 @@ test_that("EMD methods return numBins/2 when offsetting a symmetric discrete
   # Triangle(5, even), shifting by changing masses
   bin_masses1 <- c(0, 1, 2, 3, 4, 5, 5, 4, 3, 2, 1, 0)
   bin_masses2 <- c(0, 0, 1, 2, 3, 4, 5, 5, 4, 3, 2, 1)
-  bin_centres1 <- 1:length(bin_masses1)
-  bin_centres2 <- 1:length(bin_masses2)
+  bin_centres1 <- seq_len(bin_masses1)
+  bin_centres2 <- seq_len(bin_masses2)
   histogram1 <- dhist(masses = bin_masses1, locations = bin_centres1)
   histogram2 <- dhist(masses = bin_masses2, locations = bin_centres2)
   expected <- cost_fn(sum(bin_masses1 > 0))
@@ -151,8 +151,8 @@ test_that("EMD methods return numBins/2 when offsetting a symmetric discrete
   # Triangle(5, even), shifting by changing centres
   bin_masses1 <- c(0, 1, 2, 3, 4, 5, 5, 4, 3, 2, 1, 0)
   bin_masses2 <- c(0, 1, 2, 3, 4, 5, 5, 4, 3, 2, 1, 0)
-  bin_centres1 <- 1:length(bin_masses1)
-  bin_centres2 <- 1:length(bin_masses2) + 1
+  bin_centres1 <- seq_len(bin_masses1)
+  bin_centres2 <- seq_len(bin_masses2) + 1
   histogram1 <- dhist(masses = bin_masses1, locations = bin_centres1)
   histogram2 <- dhist(masses = bin_masses2, locations = bin_centres2)
   expected <- cost_fn(sum(bin_masses1 > 0))
@@ -166,8 +166,8 @@ test_that("EMD methods return numBins/2 when offsetting a symmetric discrete
   # Triangle(6, odd), shifting by changing masses
   bin_masses1 <- c(0, 1, 2, 3, 4, 5, 6, 5, 4, 3, 2, 1, 0)
   bin_masses2 <- c(0, 0, 1, 2, 3, 4, 5, 6, 5, 4, 3, 2, 1)
-  bin_centres1 <- 1:length(bin_masses1)
-  bin_centres2 <- 1:length(bin_masses2)
+  bin_centres1 <- seq_len(bin_masses1)
+  bin_centres2 <- seq_len(bin_masses2)
   histogram1 <- dhist(masses = bin_masses1, locations = bin_centres1)
   histogram2 <- dhist(masses = bin_masses2, locations = bin_centres2)
   expected <- cost_fn(sum(bin_masses1 > 0))
@@ -181,8 +181,8 @@ test_that("EMD methods return numBins/2 when offsetting a symmetric discrete
   # Triangle(6, odd), shifting by changing centres
   bin_masses1 <- c(0, 1, 2, 3, 4, 5, 6, 5, 4, 3, 2, 1, 0)
   bin_masses2 <- c(0, 1, 2, 3, 4, 5, 6, 5, 4, 3, 2, 1, 0)
-  bin_centres1 <- 1:length(bin_masses1)
-  bin_centres2 <- 1:length(bin_masses2) + 1
+  bin_centres1 <- seq_len(bin_masses1)
+  bin_centres2 <- seq_len(bin_masses2) + 1
   histogram1 <- dhist(masses = bin_masses1, locations = bin_centres1)
   histogram2 <- dhist(masses = bin_masses2, locations = bin_centres2)
   expected <- cost_fn(sum(bin_masses1 > 0))
@@ -249,13 +249,13 @@ test_that("EMD methods return same result when order of densely specified bins
           is changed", {
   bin_masses1 <- c(1, 1, 1, 1, 0, 0, 0, 0, 0)
   bin_masses2 <- c(0, 0, 0, 0, 0, 1, 1, 1, 1)
-  bin_centres1 <- 1:length(bin_masses1)
-  bin_centres2 <- 1:length(bin_masses2)
+  bin_centres1 <- seq_len(bin_masses1)
+  bin_centres2 <- seq_len(bin_masses2)
   histogram1 <- dhist(masses = bin_masses1, locations = bin_centres1)
   histogram2 <- dhist(masses = bin_masses2, locations = bin_centres2)
 
-  permuted_indexes1 <- sample(1:length(bin_masses1), replace = FALSE)
-  permuted_indexes2 <- sample(1:length(bin_masses2), replace = FALSE)
+  permuted_indexes1 <- sample(seq_len(bin_masses1), replace = FALSE)
+  permuted_indexes2 <- sample(seq_len(bin_masses2), replace = FALSE)
 
   permuted_bin_masses1 <- bin_masses1[permuted_indexes1]
   permuted_bin_centres1 <- bin_centres1[permuted_indexes1]
@@ -296,8 +296,8 @@ test_that("EMD methods return same result when order of sparsely specified bins
   histogram1 <- dhist(masses = bin_masses1, locations = bin_centres1)
   histogram2 <- dhist(masses = bin_masses2, locations = bin_centres2)
 
-  permuted_indexes1 <- sample(1:length(bin_masses1), replace = FALSE)
-  permuted_indexes2 <- sample(1:length(bin_masses2), replace = FALSE)
+  permuted_indexes1 <- sample(seq_len(bin_masses1), replace = FALSE)
+  permuted_indexes2 <- sample(seq_len(bin_masses2), replace = FALSE)
 
   permuted_bin_masses1 <- bin_masses1[permuted_indexes1]
   permuted_bin_centres1 <- bin_centres1[permuted_indexes1]
diff --git a/tests/testthat/test_measures_net_dis.R b/tests/testthat/test_measures_net_dis.R
index 39cc1992..90d4bc1f 100644
--- a/tests/testthat/test_measures_net_dis.R
+++ b/tests/testthat/test_measures_net_dis.R
@@ -39,28 +39,28 @@ test_that(test_message, {
 
   # === TEST count_graphlet_tuples ===
   # Generate expected tuple counts for graphlets up to size 4 and 5
-  expected_tuple_count_n11_gs4 <- graphlet_tuple_counts(11, 4)
-  expected_tuple_count_n37_gs4 <- graphlet_tuple_counts(37, 4)
-  expected_tuple_count_n73_gs4 <- graphlet_tuple_counts(73, 4)
-  expected_tuple_count_n11_gs5 <- graphlet_tuple_counts(11, 5)
-  expected_tuple_count_n37_gs5 <- graphlet_tuple_counts(37, 5)
-  expected_tuple_count_n73_gs5 <- graphlet_tuple_counts(73, 5)
+  exp_tuple_count_n11_gs4 <- graphlet_tuple_counts(11, 4)
+  exp_tuple_count_n37_gs4 <- graphlet_tuple_counts(37, 4)
+  exp_tuple_count_n73_gs4 <- graphlet_tuple_counts(73, 4)
+  exp_tuple_count_n11_gs5 <- graphlet_tuple_counts(11, 5)
+  exp_tuple_count_n37_gs5 <- graphlet_tuple_counts(37, 5)
+  exp_tuple_count_n73_gs5 <- graphlet_tuple_counts(73, 5)
 
   # Generate actual tuple counts for graphlets up to size 4 and 5
-  actual_tuple_count_n11_gs4 <- count_graphlet_tuples(graphlet_counts_n11, 4)
-  actual_tuple_count_n37_gs4 <- count_graphlet_tuples(graphlet_counts_n37, 4)
-  actual_tuple_count_n73_gs4 <- count_graphlet_tuples(graphlet_counts_n73, 4)
-  actual_tuple_count_n11_gs5 <- count_graphlet_tuples(graphlet_counts_n11, 5)
-  actual_tuple_count_n37_gs5 <- count_graphlet_tuples(graphlet_counts_n37, 5)
-  actual_tuple_count_n73_gs5 <- count_graphlet_tuples(graphlet_counts_n73, 5)
+  act_tuple_count_n11_gs4 <- count_graphlet_tuples(graphlet_counts_n11, 4)
+  act_tuple_count_n37_gs4 <- count_graphlet_tuples(graphlet_counts_n37, 4)
+  act_tuple_count_n73_gs4 <- count_graphlet_tuples(graphlet_counts_n73, 4)
+  act_tuple_count_n11_gs5 <- count_graphlet_tuples(graphlet_counts_n11, 5)
+  act_tuple_count_n37_gs5 <- count_graphlet_tuples(graphlet_counts_n37, 5)
+  act_tuple_count_n73_gs5 <- count_graphlet_tuples(graphlet_counts_n73, 5)
 
   # Compare expected tuple counts with actual
-  expect_equal(expected_tuple_count_n11_gs4, actual_tuple_count_n11_gs4)
-  expect_equal(expected_tuple_count_n37_gs4, actual_tuple_count_n37_gs4)
-  expect_equal(expected_tuple_count_n73_gs4, actual_tuple_count_n73_gs4)
-  expect_equal(expected_tuple_count_n11_gs5, actual_tuple_count_n11_gs5)
-  expect_equal(expected_tuple_count_n37_gs5, actual_tuple_count_n37_gs5)
-  expect_equal(expected_tuple_count_n73_gs5, actual_tuple_count_n73_gs5)
+  expect_equal(exp_tuple_count_n11_gs4, act_tuple_count_n11_gs4)
+  expect_equal(exp_tuple_count_n37_gs4, act_tuple_count_n37_gs4)
+  expect_equal(exp_tuple_count_n73_gs4, act_tuple_count_n73_gs4)
+  expect_equal(exp_tuple_count_n11_gs5, act_tuple_count_n11_gs5)
+  expect_equal(exp_tuple_count_n37_gs5, act_tuple_count_n37_gs5)
+  expect_equal(exp_tuple_count_n73_gs5, act_tuple_count_n73_gs5)
 
   # === TEST count_graphlet_tuples_ego ===
   # NOTE: This test is not amazing, as graphlet_tuple_counts_ego is very similar
@@ -82,121 +82,121 @@ test_that(test_message, {
 
   # Generate expected tuple counts for graphlets up to size 4 and 5
   # 1. For ego-networks of order 1
-  expected_tuple_count_n11_ego1_gs4 <- graphlet_tuple_counts_ego(
+  exp_tuple_count_n11_ego1_gs4 <- graphlet_tuple_counts_ego(
     graph_n11_ego1, 4
   )
-  expected_tuple_count_n37_ego1_gs4 <- graphlet_tuple_counts_ego(
+  exp_tuple_count_n37_ego1_gs4 <- graphlet_tuple_counts_ego(
     graph_n37_ego1, 4
   )
-  expected_tuple_count_n73_ego1_gs4 <- graphlet_tuple_counts_ego(
+  exp_tuple_count_n73_ego1_gs4 <- graphlet_tuple_counts_ego(
     graph_n73_ego1, 4
   )
-  expected_tuple_count_n11_ego1_gs5 <- graphlet_tuple_counts_ego(
+  exp_tuple_count_n11_ego1_gs5 <- graphlet_tuple_counts_ego(
     graph_n11_ego1, 5
   )
-  expected_tuple_count_n37_ego1_gs5 <- graphlet_tuple_counts_ego(
+  exp_tuple_count_n37_ego1_gs5 <- graphlet_tuple_counts_ego(
     graph_n37_ego1, 5
   )
-  expected_tuple_count_n73_ego1_gs5 <- graphlet_tuple_counts_ego(
+  exp_tuple_count_n73_ego1_gs5 <- graphlet_tuple_counts_ego(
     graph_n73_ego1, 5
   )
   # 2. For ego-networks of order 2
-  expected_tuple_count_n11_ego2_gs4 <- graphlet_tuple_counts_ego(
+  exp_tuple_count_n11_ego2_gs4 <- graphlet_tuple_counts_ego(
     graph_n11_ego2, 4
   )
-  expected_tuple_count_n37_ego2_gs4 <- graphlet_tuple_counts_ego(
+  exp_tuple_count_n37_ego2_gs4 <- graphlet_tuple_counts_ego(
     graph_n37_ego2, 4
   )
-  expected_tuple_count_n73_ego2_gs4 <- graphlet_tuple_counts_ego(
+  exp_tuple_count_n73_ego2_gs4 <- graphlet_tuple_counts_ego(
     graph_n73_ego2, 4
   )
-  expected_tuple_count_n11_ego2_gs5 <- graphlet_tuple_counts_ego(
+  exp_tuple_count_n11_ego2_gs5 <- graphlet_tuple_counts_ego(
     graph_n11_ego2, 5
   )
-  expected_tuple_count_n37_ego2_gs5 <- graphlet_tuple_counts_ego(
+  exp_tuple_count_n37_ego2_gs5 <- graphlet_tuple_counts_ego(
     graph_n37_ego2, 5
   )
-  expected_tuple_count_n73_ego2_gs5 <- graphlet_tuple_counts_ego(
+  exp_tuple_count_n73_ego2_gs5 <- graphlet_tuple_counts_ego(
     graph_n73_ego2, 5
   )
 
   # Calculate actual tuple counts
   # 1. For ego-networks of order 1
-  actual_tuple_count_n11_ego1_gs4 <- count_graphlet_tuples_ego(
+  act_tuple_count_n11_ego1_gs4 <- count_graphlet_tuples_ego(
     graph_n11_ego1, 4
   )
-  actual_tuple_count_n37_ego1_gs4 <- count_graphlet_tuples_ego(
+  act_tuple_count_n37_ego1_gs4 <- count_graphlet_tuples_ego(
     graph_n37_ego1, 4
   )
-  actual_tuple_count_n73_ego1_gs4 <- count_graphlet_tuples_ego(
+  act_tuple_count_n73_ego1_gs4 <- count_graphlet_tuples_ego(
     graph_n73_ego1, 4
   )
-  actual_tuple_count_n11_ego1_gs5 <- count_graphlet_tuples_ego(
+  act_tuple_count_n11_ego1_gs5 <- count_graphlet_tuples_ego(
     graph_n11_ego1, 5
   )
-  actual_tuple_count_n37_ego1_gs5 <- count_graphlet_tuples_ego(
+  act_tuple_count_n37_ego1_gs5 <- count_graphlet_tuples_ego(
     graph_n37_ego1, 5
   )
-  actual_tuple_count_n73_ego1_gs5 <- count_graphlet_tuples_ego(
+  act_tuple_count_n73_ego1_gs5 <- count_graphlet_tuples_ego(
     graph_n73_ego1, 5
   )
   # 2. For ego-networks of order 2
-  actual_tuple_count_n11_ego2_gs4 <- count_graphlet_tuples_ego(
+  act_tuple_count_n11_ego2_gs4 <- count_graphlet_tuples_ego(
     graph_n11_ego2, 4
   )
-  actual_tuple_count_n37_ego2_gs4 <- count_graphlet_tuples_ego(
+  act_tuple_count_n37_ego2_gs4 <- count_graphlet_tuples_ego(
     graph_n37_ego2, 4
   )
-  actual_tuple_count_n73_ego2_gs4 <- count_graphlet_tuples_ego(
+  act_tuple_count_n73_ego2_gs4 <- count_graphlet_tuples_ego(
     graph_n73_ego2, 4
   )
-  actual_tuple_count_n11_ego2_gs5 <- count_graphlet_tuples_ego(
+  act_tuple_count_n11_ego2_gs5 <- count_graphlet_tuples_ego(
     graph_n11_ego2, 5
   )
-  actual_tuple_count_n37_ego2_gs5 <- count_graphlet_tuples_ego(
+  act_tuple_count_n37_ego2_gs5 <- count_graphlet_tuples_ego(
     graph_n37_ego2, 5
   )
-  actual_tuple_count_n73_ego2_gs5 <- count_graphlet_tuples_ego(
+  act_tuple_count_n73_ego2_gs5 <- count_graphlet_tuples_ego(
     graph_n73_ego2, 5
   )
 
   # Compare expected with actual
   expect_equal(
-    expected_tuple_count_n11_ego1_gs4, actual_tuple_count_n11_ego1_gs4
+    exp_tuple_count_n11_ego1_gs4, act_tuple_count_n11_ego1_gs4
   )
   expect_equal(
-    expected_tuple_count_n37_ego1_gs4, actual_tuple_count_n37_ego1_gs4
+    exp_tuple_count_n37_ego1_gs4, act_tuple_count_n37_ego1_gs4
   )
   expect_equal(
-    expected_tuple_count_n73_ego1_gs4, actual_tuple_count_n73_ego1_gs4
+    exp_tuple_count_n73_ego1_gs4, act_tuple_count_n73_ego1_gs4
   )
   expect_equal(
-    expected_tuple_count_n11_ego1_gs5, actual_tuple_count_n11_ego1_gs5
+    exp_tuple_count_n11_ego1_gs5, act_tuple_count_n11_ego1_gs5
   )
   expect_equal(
-    expected_tuple_count_n37_ego1_gs5, actual_tuple_count_n37_ego1_gs5
+    exp_tuple_count_n37_ego1_gs5, act_tuple_count_n37_ego1_gs5
   )
   expect_equal(
-    expected_tuple_count_n73_ego1_gs5, actual_tuple_count_n73_ego1_gs5
+    exp_tuple_count_n73_ego1_gs5, act_tuple_count_n73_ego1_gs5
   )
   # 2. For ego-networks of order 2
   expect_equal(
-    expected_tuple_count_n11_ego2_gs4, actual_tuple_count_n11_ego2_gs4
+    exp_tuple_count_n11_ego2_gs4, act_tuple_count_n11_ego2_gs4
   )
   expect_equal(
-    expected_tuple_count_n37_ego2_gs4, actual_tuple_count_n37_ego2_gs4
+    exp_tuple_count_n37_ego2_gs4, act_tuple_count_n37_ego2_gs4
   )
   expect_equal(
-    expected_tuple_count_n73_ego2_gs4, actual_tuple_count_n73_ego2_gs4
+    exp_tuple_count_n73_ego2_gs4, act_tuple_count_n73_ego2_gs4
   )
   expect_equal(
-    expected_tuple_count_n11_ego2_gs5, actual_tuple_count_n11_ego2_gs5
+    exp_tuple_count_n11_ego2_gs5, act_tuple_count_n11_ego2_gs5
   )
   expect_equal(
-    expected_tuple_count_n37_ego2_gs5, actual_tuple_count_n37_ego2_gs5
+    exp_tuple_count_n37_ego2_gs5, act_tuple_count_n37_ego2_gs5
   )
   expect_equal(
-    expected_tuple_count_n73_ego2_gs5, actual_tuple_count_n73_ego2_gs5
+    exp_tuple_count_n73_ego2_gs5, act_tuple_count_n73_ego2_gs5
   )
 })
 
@@ -241,19 +241,19 @@ test_that("Ego-network 4-node density values match manually verified totals", {
 
   # Set manually verified ego-network node counts and edge densities
   # 1 . Ego-networks of order 1
-  expected_node_counts_o1 <- c(5, 5, 2, 4, 2, 4, 5, 5, 4, 4)
-  expected_edge_counts_o1 <- c(6, 5, 1, 5, 1, 4, 7, 7, 6, 6)
-  max_edge_counts_o1 <- choose(expected_node_counts_o1, 2)
-  expected_densities_o1 <- c(expected_edge_counts_o1 / max_edge_counts_o1)
-  names(expected_densities_o1) <- node_labels
+  exp_node_counts_o1 <- c(5, 5, 2, 4, 2, 4, 5, 5, 4, 4)
+  exp_edge_counts_o1 <- c(6, 5, 1, 5, 1, 4, 7, 7, 6, 6)
+  max_edge_counts_o1 <- choose(exp_node_counts_o1, 2)
+  exp_densities_o1 <- c(exp_edge_counts_o1 / max_edge_counts_o1)
+  names(exp_densities_o1) <- node_labels
   # Order 1 expected densities should be:
   # 0.6, 0.5, 1.0, 0.83, 1.0, 0.67, 0.7, 0.7, 1.0, 1.0
   # 2. Ego-networks of order 2
-  expected_node_counts_o2 <- c(10, 7, 5, 8, 5, 8, 8, 7, 6, 6)
-  expected_edge_counts_o2 <- c(15, 8, 5, 10, 5, 13, 13, 11, 9, 9)
-  max_edge_counts_o2 <- choose(expected_node_counts_o2, 2)
-  expected_densities_o2 <- c(expected_edge_counts_o2 / max_edge_counts_o2)
-  names(expected_densities_o2) <- node_labels
+  exp_node_counts_o2 <- c(10, 7, 5, 8, 5, 8, 8, 7, 6, 6)
+  exp_edge_counts_o2 <- c(15, 8, 5, 10, 5, 13, 13, 11, 9, 9)
+  max_edge_counts_o2 <- choose(exp_node_counts_o2, 2)
+  exp_densities_o2 <- c(exp_edge_counts_o2 / max_edge_counts_o2)
+  names(exp_densities_o2) <- node_labels
   # Order 2 expected densities should be:
   # 0.33, 0.38, 0.50, 0.36, 0.50, 0.46, 0.46, 0.52, 0.60, 0.60
 
@@ -271,12 +271,12 @@ test_that("Ego-network 4-node density values match manually verified totals", {
   )
 
   # Calculate densities
-  actual_densities_o1 <- ego_network_density(graphlet_counts_ego_o1)
-  actual_densities_o2 <- ego_network_density(graphlet_counts_ego_o2)
+  act_densities_o1 <- ego_network_density(graphlet_counts_ego_o1)
+  act_densities_o2 <- ego_network_density(graphlet_counts_ego_o2)
 
   # Check densities match expected values
-  expect_equal(actual_densities_o1, expected_densities_o1)
-  expect_equal(actual_densities_o2, expected_densities_o2)
+  expect_equal(act_densities_o1, exp_densities_o1)
+  expect_equal(act_densities_o2, exp_densities_o2)
 })
 
 context(
@@ -324,62 +324,62 @@ test_that(
 
     # Set manually verified ego-network node counts and edge densities
     # 1 . Ego-networks of order 1
-    expected_node_counts_o1 <- c(5, 5, 2, 4, 2, 4, 5, 5, 4, 4)
-    expected_edge_counts_o1 <- c(6, 5, 1, 5, 1, 4, 7, 7, 6, 6)
-    max_edge_counts_o1 <- choose(expected_node_counts_o1, 2)
-    expected_densities_o1 <- c(expected_edge_counts_o1 / max_edge_counts_o1)
+    exp_node_counts_o1 <- c(5, 5, 2, 4, 2, 4, 5, 5, 4, 4)
+    exp_edge_counts_o1 <- c(6, 5, 1, 5, 1, 4, 7, 7, 6, 6)
+    max_edge_counts_o1 <- choose(exp_node_counts_o1, 2)
+    exp_densities_o1 <- c(exp_edge_counts_o1 / max_edge_counts_o1)
     # Order 1 expected densities should be:
     # 0.6, 0.5, 1.0, 0.83, 1.0, 0.67, 0.7, 0.7, 1.0, 1.0
     # 2. Ego-networks of order 2
-    expected_node_counts_o2 <- c(10, 7, 5, 8, 5, 8, 8, 7, 6, 6)
-    expected_edge_counts_o2 <- c(15, 8, 5, 10, 5, 13, 13, 11, 9, 9)
-    max_edge_counts_o2 <- choose(expected_node_counts_o2, 2)
-    expected_densities_o2 <- c(expected_edge_counts_o2 / max_edge_counts_o2)
+    exp_node_counts_o2 <- c(10, 7, 5, 8, 5, 8, 8, 7, 6, 6)
+    exp_edge_counts_o2 <- c(15, 8, 5, 10, 5, 13, 13, 11, 9, 9)
+    max_edge_counts_o2 <- choose(exp_node_counts_o2, 2)
+    exp_densities_o2 <- c(exp_edge_counts_o2 / max_edge_counts_o2)
     # Order 2 expected densities should be:
     # 0.33, 0.38, 0.50, 0.36, 0.50, 0.46, 0.46, 0.52, 0.60, 0.60
 
     # Set manually verified density bins for ego-networks
     # 1. Ego-networks of order 1
-    expected_breaks_o1 <- c(0.5, 0.605, 0.705, 1)
-    expected_interval_indexes_o1 <- c(1, 1, 3, 3, 3, 2, 2, 2, 3, 3)
-    expected_binned_densities_o1 <- list(
-      densities = expected_densities_o1,
-      interval_indexes = expected_interval_indexes_o1,
-      breaks = expected_breaks_o1
+    exp_breaks_o1 <- c(0.5, 0.605, 0.705, 1)
+    exp_interval_indexes_o1 <- c(1, 1, 3, 3, 3, 2, 2, 2, 3, 3)
+    exp_binned_densities_o1 <- list(
+      densities = exp_densities_o1,
+      interval_indexes = exp_interval_indexes_o1,
+      breaks = exp_breaks_o1
     )
     # Check binned densities are as expected
-    actual_binned_densities_o1 <- binned_densities_adaptive(
-      expected_densities_o1,
+    act_binned_densities_o1 <- binned_densities_adaptive(
+      exp_densities_o1,
       min_counts_per_interval = min_counts_per_interval,
       num_intervals = num_intervals
     )
-    expect_equal(actual_binned_densities_o1, expected_binned_densities_o1)
+    expect_equal(act_binned_densities_o1, exp_binned_densities_o1)
     # 2. Ego-networks of order 2
-    expected_min_break_o2 <- 1 / 3
-    expected_max_break_o2 <- 0.6
-    expected_initial_interval_o2 <-
-      (expected_max_break_o2 - expected_min_break_o2) / (num_intervals)
-    expected_breaks_o2 <- expected_min_break_o2 +
-      (expected_initial_interval_o2 * c(0, 9, 50, 63, 100))
-    expected_interval_indexes_o2 <- c(1, 2, 3, 1, 3, 2, 2, 4, 4, 4)
-    expected_binned_densities_o2 <- list(
-      densities = expected_densities_o2,
-      interval_indexes = expected_interval_indexes_o2,
-      breaks = expected_breaks_o2
+    exp_min_break_o2 <- 1 / 3
+    exp_max_break_o2 <- 0.6
+    exp_initial_interval_o2 <-
+      (exp_max_break_o2 - exp_min_break_o2) / (num_intervals)
+    exp_breaks_o2 <- exp_min_break_o2 +
+      (exp_initial_interval_o2 * c(0, 9, 50, 63, 100))
+    exp_interval_indexes_o2 <- c(1, 2, 3, 1, 3, 2, 2, 4, 4, 4)
+    exp_binned_densities_o2 <- list(
+      densities = exp_densities_o2,
+      interval_indexes = exp_interval_indexes_o2,
+      breaks = exp_breaks_o2
     )
     # Check binned densities are as expected
-    actual_binned_densities_o2 <- binned_densities_adaptive(
-      expected_densities_o2,
+    act_binned_densities_o2 <- binned_densities_adaptive(
+      exp_densities_o2,
       min_counts_per_interval = min_counts_per_interval,
       num_intervals = num_intervals
     )
-    expect_equal(actual_binned_densities_o2, expected_binned_densities_o2)
+    expect_equal(act_binned_densities_o2, exp_binned_densities_o2)
 
     # Set manually verified scaled ego-network graphlet counts
     graphlet_key <- graphlet_key(max_graphlet_size)
     k <- graphlet_key$node_count
     # 1-step ego networks
-    expected_counts_o1 <- rbind(
+    exp_counts_o1 <- rbind(
       c(6, 5, 2, 0, 1, 0, 2, 1, 0) / zeros_to_ones(choose(5, k)),
       c(5, 5, 1, 0, 2, 0, 2, 0, 0) / zeros_to_ones(choose(5, k)),
       c(1, 0, 0, 0, 0, 0, 0, 0, 0) / zeros_to_ones(choose(2, k)),
@@ -391,10 +391,10 @@ test_that(
       c(6, 0, 4, 0, 0, 0, 0, 0, 1) / zeros_to_ones(choose(4, k)),
       c(6, 0, 4, 0, 0, 0, 0, 0, 1) / zeros_to_ones(choose(4, k))
     )
-    rownames(expected_counts_o1) <- node_labels
-    colnames(expected_counts_o1) <- graphlet_labels
+    rownames(exp_counts_o1) <- node_labels
+    colnames(exp_counts_o1) <- graphlet_labels
     # 2-step ego networks
-    expected_counts_o2 <- rbind(
+    exp_counts_o2 <- rbind(
       c(15, 18, 6, 21, 3, 1, 11, 1, 1) / zeros_to_ones(choose(10, k)),
       c(8, 10, 2, 6, 3, 0, 4, 1, 0) / zeros_to_ones(choose(7, k)),
       c(5, 5, 1, 0, 2, 0, 2, 0, 0) / zeros_to_ones(choose(5, k)),
@@ -406,54 +406,54 @@ test_that(
       c(9, 8, 4, 4, 0, 1, 6, 0, 1) / zeros_to_ones(choose(6, k)),
       c(9, 8, 4, 4, 0, 1, 6, 0, 1) / zeros_to_ones(choose(6, k))
     )
-    rownames(expected_counts_o2) <- node_labels
-    colnames(expected_counts_o2) <- graphlet_labels
+    rownames(exp_counts_o2) <- node_labels
+    colnames(exp_counts_o2) <- graphlet_labels
 
     # Calculate binned average expected counts based on manually verified counts
     # and density bins
     # Order 1: Expected interval indexes: 1, 1, 3, 3, 3, 2, 2, 2, 3, 3
-    mean_counts_bin1_o1 <- (expected_counts_o1[1, ] +
-      expected_counts_o1[2, ]) / 2
-    mean_counts_bin2_o1 <- (expected_counts_o1[6, ] + expected_counts_o1[7, ] +
-      expected_counts_o1[8, ]) / 3
-    mean_counts_bin3_o1 <- (expected_counts_o1[3, ] + expected_counts_o1[4, ] +
-      expected_counts_o1[5, ] + expected_counts_o1[9, ] +
-      expected_counts_o1[10, ]) / 5
-    expected_mean_density_binned_counts_o1 <- rbind(
+    mean_counts_bin1_o1 <- (exp_counts_o1[1, ] +
+      exp_counts_o1[2, ]) / 2
+    mean_counts_bin2_o1 <- (exp_counts_o1[6, ] + exp_counts_o1[7, ] +
+      exp_counts_o1[8, ]) / 3
+    mean_counts_bin3_o1 <- (exp_counts_o1[3, ] + exp_counts_o1[4, ] +
+      exp_counts_o1[5, ] + exp_counts_o1[9, ] +
+      exp_counts_o1[10, ]) / 5
+    exp_mean_density_binned_counts_o1 <- rbind(
       mean_counts_bin1_o1, mean_counts_bin2_o1, mean_counts_bin3_o1
     )
-    rownames(expected_mean_density_binned_counts_o1) <- 1:3
+    rownames(exp_mean_density_binned_counts_o1) <- 1:3
     # Order 2: Expected interval indexes: 1, 3, 3, 1, 3, 2, 2, 4, 4, 4
-    mean_counts_bin1_o2 <- (expected_counts_o2[1, ] +
-      expected_counts_o2[4, ]) / 2
-    mean_counts_bin2_o2 <- (expected_counts_o2[2, ] + expected_counts_o2[6, ] +
-      expected_counts_o2[7, ]) / 3
-    mean_counts_bin3_o2 <- (expected_counts_o2[3, ] +
-      expected_counts_o2[5, ]) / 2
-    mean_counts_bin4_o2 <- (expected_counts_o2[8, ] +
-      expected_counts_o2[9, ] + expected_counts_o2[10, ]) / 3
-    expected_mean_density_binned_counts_o2 <- rbind(
+    mean_counts_bin1_o2 <- (exp_counts_o2[1, ] +
+      exp_counts_o2[4, ]) / 2
+    mean_counts_bin2_o2 <- (exp_counts_o2[2, ] + exp_counts_o2[6, ] +
+      exp_counts_o2[7, ]) / 3
+    mean_counts_bin3_o2 <- (exp_counts_o2[3, ] +
+      exp_counts_o2[5, ]) / 2
+    mean_counts_bin4_o2 <- (exp_counts_o2[8, ] +
+      exp_counts_o2[9, ] + exp_counts_o2[10, ]) / 3
+    exp_mean_density_binned_counts_o2 <- rbind(
       mean_counts_bin1_o2, mean_counts_bin2_o2, mean_counts_bin3_o2,
       mean_counts_bin4_o2
     )
-    rownames(expected_mean_density_binned_counts_o2) <- 1:4
+    rownames(exp_mean_density_binned_counts_o2) <- 1:4
 
     # Calculate actual output of function under test
-    actual_mean_density_binned_counts_o1 <- mean_density_binned_graphlet_counts(
-      expected_counts_o1, expected_interval_indexes_o1
+    act_mean_density_binned_counts_o1 <- mean_density_binned_graphlet_counts(
+      exp_counts_o1, exp_interval_indexes_o1
     )
-    actual_mean_density_binned_counts_o2 <- mean_density_binned_graphlet_counts(
-      expected_counts_o2, expected_interval_indexes_o2
+    act_mean_density_binned_counts_o2 <- mean_density_binned_graphlet_counts(
+      exp_counts_o2, exp_interval_indexes_o2
     )
 
     # Check actual output vs expected
     expect_equal(
-      actual_mean_density_binned_counts_o1,
-      expected_mean_density_binned_counts_o1
+      act_mean_density_binned_counts_o1,
+      exp_mean_density_binned_counts_o1
     )
     expect_equal(
-      actual_mean_density_binned_counts_o2,
-      expected_mean_density_binned_counts_o2
+      act_mean_density_binned_counts_o2,
+      exp_mean_density_binned_counts_o2
     )
   }
 )
@@ -520,11 +520,11 @@ test_that("Ego-network 4-node graphlet counts match manually verified totals", {
 
   # Calculate scaled counts with scale_graphlet_counts_ego
   # (function to test).
-  actual_counts_o1 <-
+  act_counts_o1 <-
     scale_graphlet_counts_ego(graphlet_counts_o1,
       max_graphlet_size = max_graphlet_size
     )
-  actual_counts_o2 <-
+  act_counts_o2 <-
     scale_graphlet_counts_ego(graphlet_counts_o2,
       max_graphlet_size = max_graphlet_size
     )
@@ -533,7 +533,7 @@ test_that("Ego-network 4-node graphlet counts match manually verified totals", {
   k <- graphlet_key$node_count
   # Set manually verified counts
   # 1-step ego networks
-  expected_counts_o1 <- rbind(
+  exp_counts_o1 <- rbind(
     c(6, 5, 2, 0, 1, 0, 2, 1, 0) / zeros_to_ones(choose(5, k)),
     c(5, 5, 1, 0, 2, 0, 2, 0, 0) / zeros_to_ones(choose(5, k)),
     c(1, 0, 0, 0, 0, 0, 0, 0, 0) / zeros_to_ones(choose(2, k)),
@@ -545,10 +545,10 @@ test_that("Ego-network 4-node graphlet counts match manually verified totals", {
     c(6, 0, 4, 0, 0, 0, 0, 0, 1) / zeros_to_ones(choose(4, k)),
     c(6, 0, 4, 0, 0, 0, 0, 0, 1) / zeros_to_ones(choose(4, k))
   )
-  rownames(expected_counts_o1) <- node_labels
-  colnames(expected_counts_o1) <- graphlet_labels
+  rownames(exp_counts_o1) <- node_labels
+  colnames(exp_counts_o1) <- graphlet_labels
   # 2-step ego networks
-  expected_counts_o2 <- rbind(
+  exp_counts_o2 <- rbind(
     c(15, 18, 6, 21, 3, 1, 11, 1, 1) / zeros_to_ones(choose(10, k)),
     c(8, 10, 2, 6, 3, 0, 4, 1, 0) / zeros_to_ones(choose(7, k)),
     c(5, 5, 1, 0, 2, 0, 2, 0, 0) / zeros_to_ones(choose(5, k)),
@@ -560,12 +560,12 @@ test_that("Ego-network 4-node graphlet counts match manually verified totals", {
     c(9, 8, 4, 4, 0, 1, 6, 0, 1) / zeros_to_ones(choose(6, k)),
     c(9, 8, 4, 4, 0, 1, 6, 0, 1) / zeros_to_ones(choose(6, k))
   )
-  rownames(expected_counts_o2) <- node_labels
-  colnames(expected_counts_o2) <- graphlet_labels
+  rownames(exp_counts_o2) <- node_labels
+  colnames(exp_counts_o2) <- graphlet_labels
 
   # Test that actual counts match expected
-  expect_equal(actual_counts_o1, expected_counts_o1)
-  expect_equal(actual_counts_o2, expected_counts_o2)
+  expect_equal(act_counts_o1, exp_counts_o1)
+  expect_equal(act_counts_o2, exp_counts_o2)
 })
 
 context(
@@ -615,48 +615,48 @@ test_that(
 
     # Set manually verified ego-network node counts and edge densities
     # 1 . Ego-networks of order 1
-    expected_node_counts_o1 <- c(5, 5, 2, 4, 2, 4, 5, 5, 4, 4)
-    expected_edge_counts_o1 <- c(6, 5, 1, 5, 1, 4, 7, 7, 6, 6)
-    max_edge_counts_o1 <- choose(expected_node_counts_o1, 2)
-    expected_densities_o1 <- c(expected_edge_counts_o1 / max_edge_counts_o1)
+    exp_node_counts_o1 <- c(5, 5, 2, 4, 2, 4, 5, 5, 4, 4)
+    exp_edge_counts_o1 <- c(6, 5, 1, 5, 1, 4, 7, 7, 6, 6)
+    max_edge_counts_o1 <- choose(exp_node_counts_o1, 2)
+    exp_densities_o1 <- c(exp_edge_counts_o1 / max_edge_counts_o1)
     # Order 1 expected densities should be:
     # 0.6, 0.5, 1.0, 0.83, 1.0, 0.67, 0.7, 0.7, 1.0, 1.0
     # 2. Ego-networks of order 2
-    expected_node_counts_o2 <- c(10, 7, 5, 8, 5, 8, 8, 7, 6, 6)
-    expected_edge_counts_o2 <- c(15, 8, 5, 10, 5, 13, 13, 11, 9, 9)
-    max_edge_counts_o2 <- choose(expected_node_counts_o2, 2)
-    expected_densities_o2 <- c(expected_edge_counts_o2 / max_edge_counts_o2)
+    exp_node_counts_o2 <- c(10, 7, 5, 8, 5, 8, 8, 7, 6, 6)
+    exp_edge_counts_o2 <- c(15, 8, 5, 10, 5, 13, 13, 11, 9, 9)
+    max_edge_counts_o2 <- choose(exp_node_counts_o2, 2)
+    exp_densities_o2 <- c(exp_edge_counts_o2 / max_edge_counts_o2)
     # Order 2 expected densities should be:
     # 0.33, 0.38, 0.50, 0.36, 0.50, 0.46, 0.46, 0.52, 0.60, 0.60
 
     # Set manually verified density bins for ego-networks
     # 1. Ego-networks of order 1
-    expected_breaks_o1 <- c(0.5, 0.605, 0.705, 1)
-    expected_interval_indexes_o1 <- c(1, 1, 3, 3, 3, 2, 2, 2, 3, 3)
-    expected_binned_densities_o1 <- list(
-      densities = expected_densities_o1,
-      interval_indexes = expected_interval_indexes_o1,
-      breaks = expected_breaks_o1
+    exp_breaks_o1 <- c(0.5, 0.605, 0.705, 1)
+    exp_interval_indexes_o1 <- c(1, 1, 3, 3, 3, 2, 2, 2, 3, 3)
+    exp_binned_densities_o1 <- list(
+      densities = exp_densities_o1,
+      interval_indexes = exp_interval_indexes_o1,
+      breaks = exp_breaks_o1
     )
     # 2. Ego-networks of order 2
-    expected_min_break_o2 <- 1 / 3
-    expected_max_break_o2 <- 0.6
-    expected_initial_interval_o2 <-
-      (expected_max_break_o2 - expected_min_break_o2) / (num_intervals)
-    expected_breaks_o2 <- expected_min_break_o2 +
-      (expected_initial_interval_o2 * c(0, 9, 50, 63, 100))
-    expected_interval_indexes_o2 <- c(1, 2, 3, 1, 3, 2, 2, 4, 4, 4)
-    expected_binned_densities_o2 <- list(
-      densities = expected_densities_o2,
-      interval_indexes = expected_interval_indexes_o2,
-      breaks = expected_breaks_o2
+    exp_min_break_o2 <- 1 / 3
+    exp_max_break_o2 <- 0.6
+    exp_initial_interval_o2 <-
+      (exp_max_break_o2 - exp_min_break_o2) / (num_intervals)
+    exp_breaks_o2 <- exp_min_break_o2 +
+      (exp_initial_interval_o2 * c(0, 9, 50, 63, 100))
+    exp_interval_indexes_o2 <- c(1, 2, 3, 1, 3, 2, 2, 4, 4, 4)
+    exp_binned_densities_o2 <- list(
+      densities = exp_densities_o2,
+      interval_indexes = exp_interval_indexes_o2,
+      breaks = exp_breaks_o2
     )
 
     # Set manually verified scaled ego-network graphlet counts
     graphlet_key <- graphlet_key(max_graphlet_size)
     k <- graphlet_key$node_count
     # 1-step ego networks
-    expected_counts_o1 <- rbind(
+    exp_counts_o1 <- rbind(
       c(6, 5, 2, 0, 1, 0, 2, 1, 0) / zeros_to_ones(choose(5, k)),
       c(5, 5, 1, 0, 2, 0, 2, 0, 0) / zeros_to_ones(choose(5, k)),
       c(1, 0, 0, 0, 0, 0, 0, 0, 0) / zeros_to_ones(choose(2, k)),
@@ -668,10 +668,10 @@ test_that(
       c(6, 0, 4, 0, 0, 0, 0, 0, 1) / zeros_to_ones(choose(4, k)),
       c(6, 0, 4, 0, 0, 0, 0, 0, 1) / zeros_to_ones(choose(4, k))
     )
-    rownames(expected_counts_o1) <- node_labels
-    colnames(expected_counts_o1) <- graphlet_labels
+    rownames(exp_counts_o1) <- node_labels
+    colnames(exp_counts_o1) <- graphlet_labels
     # 2-step ego networks
-    expected_counts_o2 <- rbind(
+    exp_counts_o2 <- rbind(
       c(15, 18, 6, 21, 3, 1, 11, 1, 1) / zeros_to_ones(choose(10, k)),
       c(8, 10, 2, 6, 3, 0, 4, 1, 0) / zeros_to_ones(choose(7, k)),
       c(5, 5, 1, 0, 2, 0, 2, 0, 0) / zeros_to_ones(choose(5, k)),
@@ -683,58 +683,58 @@ test_that(
       c(9, 8, 4, 4, 0, 1, 6, 0, 1) / zeros_to_ones(choose(6, k)),
       c(9, 8, 4, 4, 0, 1, 6, 0, 1) / zeros_to_ones(choose(6, k))
     )
-    rownames(expected_counts_o2) <- node_labels
-    colnames(expected_counts_o2) <- graphlet_labels
+    rownames(exp_counts_o2) <- node_labels
+    colnames(exp_counts_o2) <- graphlet_labels
 
     # Calculate binned average expected counts based on manually verified counts
     # and density bins
     # Order 1: Expected interval indexes: 1, 1, 3, 3, 3, 2, 2, 2, 3, 3
-    mean_counts_bin1_o1 <- (expected_counts_o1[1, ] +
-      expected_counts_o1[2, ]) / 2
-    mean_counts_bin2_o1 <- (expected_counts_o1[6, ] + expected_counts_o1[7, ] +
-      expected_counts_o1[8, ]) / 3
-    mean_counts_bin3_o1 <- (expected_counts_o1[3, ] + expected_counts_o1[4, ] +
-      expected_counts_o1[5, ] + expected_counts_o1[9, ] +
-      expected_counts_o1[10, ]) / 5
-    expected_mean_density_binned_counts_o1 <- rbind(
+    mean_counts_bin1_o1 <- (exp_counts_o1[1, ] +
+      exp_counts_o1[2, ]) / 2
+    mean_counts_bin2_o1 <- (exp_counts_o1[6, ] + exp_counts_o1[7, ] +
+      exp_counts_o1[8, ]) / 3
+    mean_counts_bin3_o1 <- (exp_counts_o1[3, ] + exp_counts_o1[4, ] +
+      exp_counts_o1[5, ] + exp_counts_o1[9, ] +
+      exp_counts_o1[10, ]) / 5
+    exp_mean_density_binned_counts_o1 <- rbind(
       mean_counts_bin1_o1, mean_counts_bin2_o1, mean_counts_bin3_o1
     )
-    rownames(expected_mean_density_binned_counts_o1) <- 1:3
+    rownames(exp_mean_density_binned_counts_o1) <- 1:3
     # Order 2: Expected interval indexes: 1, 3, 3, 1, 3, 2, 2, 4, 4, 4
-    mean_counts_bin1_o2 <- (expected_counts_o2[1, ] +
-      expected_counts_o2[4, ]) / 2
-    mean_counts_bin2_o2 <- (expected_counts_o2[2, ] + expected_counts_o2[6, ] +
-      expected_counts_o2[7, ]) / 3
-    mean_counts_bin3_o2 <- (expected_counts_o2[3, ] +
-      expected_counts_o2[5, ]) / 2
-    mean_counts_bin4_o2 <- (expected_counts_o2[8, ] + expected_counts_o2[9, ] +
-      expected_counts_o2[10, ]) / 3
-    expected_mean_density_binned_counts_o2 <- rbind(
+    mean_counts_bin1_o2 <- (exp_counts_o2[1, ] +
+      exp_counts_o2[4, ]) / 2
+    mean_counts_bin2_o2 <- (exp_counts_o2[2, ] + exp_counts_o2[6, ] +
+      exp_counts_o2[7, ]) / 3
+    mean_counts_bin3_o2 <- (exp_counts_o2[3, ] +
+      exp_counts_o2[5, ]) / 2
+    mean_counts_bin4_o2 <- (exp_counts_o2[8, ] + exp_counts_o2[9, ] +
+      exp_counts_o2[10, ]) / 3
+    exp_mean_density_binned_counts_o2 <- rbind(
       mean_counts_bin1_o2, mean_counts_bin2_o2, mean_counts_bin3_o2,
       mean_counts_bin4_o2
     )
-    rownames(expected_mean_density_binned_counts_o2) <- 1:4
+    rownames(exp_mean_density_binned_counts_o2) <- 1:4
 
     # density_binned_counts with default arguments should give
     # mean graphlet count in each density bin
-    actual_density_binned_counts_o1 <- density_binned_counts(
-      expected_counts_o1,
-      expected_interval_indexes_o1
+    act_density_binned_counts_o1 <- density_binned_counts(
+      exp_counts_o1,
+      exp_interval_indexes_o1
     )
 
-    actual_density_binned_counts_o2 <- density_binned_counts(
-      expected_counts_o2,
-      expected_interval_indexes_o2
+    act_density_binned_counts_o2 <- density_binned_counts(
+      exp_counts_o2,
+      exp_interval_indexes_o2
     )
 
     # Check actual output vs expected
     expect_equal(
-      actual_density_binned_counts_o1,
-      expected_mean_density_binned_counts_o1
+      act_density_binned_counts_o1,
+      exp_mean_density_binned_counts_o1
     )
     expect_equal(
-      actual_density_binned_counts_o2,
-      expected_mean_density_binned_counts_o2
+      act_density_binned_counts_o2,
+      exp_mean_density_binned_counts_o2
     )
 
     # Calculate max binned counts based on manually verified counts
@@ -742,71 +742,71 @@ test_that(
     # Order 1: Expected interval indexes: 1, 1, 3, 3, 3, 2, 2, 2, 3, 3
     # apply(x, 2, max): returns max of each column in x
     max_counts_bin1_o1 <- apply(
-      rbind(expected_counts_o1[1, ], expected_counts_o1[2, ]), 2, max
+      rbind(exp_counts_o1[1, ], exp_counts_o1[2, ]), 2, max
     )
     max_counts_bin2_o1 <- apply(rbind(
-      expected_counts_o1[6, ], expected_counts_o1[7, ],
-      expected_counts_o1[8, ]
+      exp_counts_o1[6, ], exp_counts_o1[7, ],
+      exp_counts_o1[8, ]
     ), 2, max)
     max_counts_bin3_o1 <- apply(rbind(
-      expected_counts_o1[3, ], expected_counts_o1[4, ],
-      expected_counts_o1[5, ], expected_counts_o1[9, ],
-      expected_counts_o1[10, ]
+      exp_counts_o1[3, ], exp_counts_o1[4, ],
+      exp_counts_o1[5, ], exp_counts_o1[9, ],
+      exp_counts_o1[10, ]
     ), 2, max)
 
-    expected_max_density_binned_counts_o1 <- rbind(
+    exp_max_density_binned_counts_o1 <- rbind(
       max_counts_bin1_o1, max_counts_bin2_o1, max_counts_bin3_o1
     )
-    rownames(expected_max_density_binned_counts_o1) <- 1:3
+    rownames(exp_max_density_binned_counts_o1) <- 1:3
     # Order 2: Expected interval indexes: 1, 3, 3, 1, 3, 2, 2, 4, 4, 4
     max_counts_bin1_o2 <- apply(
-      rbind(expected_counts_o2[1, ], expected_counts_o2[4, ]), 2, max
+      rbind(exp_counts_o2[1, ], exp_counts_o2[4, ]), 2, max
     )
     max_counts_bin2_o2 <- apply(rbind(
-      expected_counts_o2[2, ], expected_counts_o2[6, ],
-      expected_counts_o2[7, ]
+      exp_counts_o2[2, ], exp_counts_o2[6, ],
+      exp_counts_o2[7, ]
     ), 2, max)
     max_counts_bin3_o2 <- apply(
-      rbind(expected_counts_o2[3, ], expected_counts_o2[5, ]), 2, max
+      rbind(exp_counts_o2[3, ], exp_counts_o2[5, ]), 2, max
     )
     max_counts_bin4_o2 <- apply(rbind(
-      expected_counts_o2[8, ], expected_counts_o2[9, ],
-      expected_counts_o2[10, ]
+      exp_counts_o2[8, ], exp_counts_o2[9, ],
+      exp_counts_o2[10, ]
     ), 2, max)
 
-    expected_max_density_binned_counts_o2 <- rbind(
+    exp_max_density_binned_counts_o2 <- rbind(
       max_counts_bin1_o2, max_counts_bin2_o2, max_counts_bin3_o2,
       max_counts_bin4_o2
     )
-    rownames(expected_max_density_binned_counts_o2) <- 1:4
+    rownames(exp_max_density_binned_counts_o2) <- 1:4
 
     # density_binned_counts with agg_fn = max should give
     # max graphlet count in each density bin
     agg_fn <- max
     scale_fn <- NULL
 
-    actual_max_density_binned_counts_o1 <- density_binned_counts(
-      expected_counts_o1,
-      expected_interval_indexes_o1,
+    act_max_density_binned_counts_o1 <- density_binned_counts(
+      exp_counts_o1,
+      exp_interval_indexes_o1,
       agg_fn = agg_fn,
       scale_fn = scale_fn
     )
 
-    actual_max_density_binned_counts_o2 <- density_binned_counts(
-      expected_counts_o2,
-      expected_interval_indexes_o2,
+    act_max_density_binned_counts_o2 <- density_binned_counts(
+      exp_counts_o2,
+      exp_interval_indexes_o2,
       agg_fn = agg_fn,
       scale_fn = scale_fn
     )
 
     # Check actual output vs expected
     expect_equal(
-      actual_max_density_binned_counts_o1,
-      expected_max_density_binned_counts_o1
+      act_max_density_binned_counts_o1,
+      exp_max_density_binned_counts_o1
     )
     expect_equal(
-      actual_max_density_binned_counts_o2,
-      expected_max_density_binned_counts_o2
+      act_max_density_binned_counts_o2,
+      exp_max_density_binned_counts_o2
     )
 
     # density_binned_counts with scale_fn = scale_graphlet_counts_ego
@@ -816,64 +816,64 @@ test_that(
     scale_fn <- scale_graphlet_counts_ego
 
     # calculate expected counts using previously tested function
-    expected_scaled_counts_o1 <-
-      scale_graphlet_counts_ego(expected_counts_o1,
+    exp_scaled_counts_o1 <-
+      scale_graphlet_counts_ego(exp_counts_o1,
         max_graphlet_size = max_graphlet_size
       )
-    expected_scaled_counts_o2 <-
-      scale_graphlet_counts_ego(expected_counts_o2,
+    exp_scaled_counts_o2 <-
+      scale_graphlet_counts_ego(exp_counts_o2,
         max_graphlet_size = max_graphlet_size
       )
 
     # calculate mean expected counts using expected density bins
-    mean_scaled_counts_bin1_o1 <- (expected_scaled_counts_o1[1, ] +
-      expected_scaled_counts_o1[2, ]) / 2
-    mean_scaled_counts_bin2_o1 <- (expected_scaled_counts_o1[6, ] +
-      expected_scaled_counts_o1[7, ] +
-      expected_scaled_counts_o1[8, ]) / 3
-    mean_scaled_counts_bin3_o1 <- (expected_scaled_counts_o1[3, ] +
-      expected_scaled_counts_o1[4, ] +
-      expected_scaled_counts_o1[5, ] +
-      expected_scaled_counts_o1[9, ] +
-      expected_scaled_counts_o1[10, ]) / 5
-    expected_scaled_density_binned_counts_o1 <- rbind(
+    mean_scaled_counts_bin1_o1 <- (exp_scaled_counts_o1[1, ] +
+      exp_scaled_counts_o1[2, ]) / 2
+    mean_scaled_counts_bin2_o1 <- (exp_scaled_counts_o1[6, ] +
+      exp_scaled_counts_o1[7, ] +
+      exp_scaled_counts_o1[8, ]) / 3
+    mean_scaled_counts_bin3_o1 <- (exp_scaled_counts_o1[3, ] +
+      exp_scaled_counts_o1[4, ] +
+      exp_scaled_counts_o1[5, ] +
+      exp_scaled_counts_o1[9, ] +
+      exp_scaled_counts_o1[10, ]) / 5
+    exp_scaled_density_binned_counts_o1 <- rbind(
       mean_scaled_counts_bin1_o1,
       mean_scaled_counts_bin2_o1,
       mean_scaled_counts_bin3_o1
     )
-    rownames(expected_scaled_density_binned_counts_o1) <- 1:3
+    rownames(exp_scaled_density_binned_counts_o1) <- 1:3
     # Order 2: Expected interval indexes: 1, 3, 3, 1, 3, 2, 2, 4, 4, 4
-    mean_scaled_counts_bin1_o2 <- (expected_scaled_counts_o2[1, ] +
-      expected_scaled_counts_o2[4, ]) / 2
-    mean_scaled_counts_bin2_o2 <- (expected_scaled_counts_o2[2, ] +
-      expected_scaled_counts_o2[6, ] +
-      expected_scaled_counts_o2[7, ]) / 3
-    mean_scaled_counts_bin3_o2 <- (expected_scaled_counts_o2[3, ] +
-      expected_scaled_counts_o2[5, ]) / 2
-    mean_scaled_counts_bin4_o2 <- (expected_scaled_counts_o2[8, ] +
-      expected_scaled_counts_o2[9, ] +
-      expected_scaled_counts_o2[10, ]) / 3
-    expected_scaled_density_binned_counts_o2 <- rbind(
+    mean_scaled_counts_bin1_o2 <- (exp_scaled_counts_o2[1, ] +
+      exp_scaled_counts_o2[4, ]) / 2
+    mean_scaled_counts_bin2_o2 <- (exp_scaled_counts_o2[2, ] +
+      exp_scaled_counts_o2[6, ] +
+      exp_scaled_counts_o2[7, ]) / 3
+    mean_scaled_counts_bin3_o2 <- (exp_scaled_counts_o2[3, ] +
+      exp_scaled_counts_o2[5, ]) / 2
+    mean_scaled_counts_bin4_o2 <- (exp_scaled_counts_o2[8, ] +
+      exp_scaled_counts_o2[9, ] +
+      exp_scaled_counts_o2[10, ]) / 3
+    exp_scaled_density_binned_counts_o2 <- rbind(
       mean_scaled_counts_bin1_o2,
       mean_scaled_counts_bin2_o2,
       mean_scaled_counts_bin3_o2,
       mean_scaled_counts_bin4_o2
     )
-    rownames(expected_scaled_density_binned_counts_o2) <- 1:4
+    rownames(exp_scaled_density_binned_counts_o2) <- 1:4
 
     # Calculate scaled binned counts with density_binned_counts (function to
     # test)
-    actual_scaled_density_binned_counts_o1 <- density_binned_counts(
-      expected_counts_o1,
-      expected_interval_indexes_o1,
+    act_scaled_density_binned_counts_o1 <- density_binned_counts(
+      exp_counts_o1,
+      exp_interval_indexes_o1,
       agg_fn = agg_fn,
       scale_fn = scale_fn,
       max_graphlet_size = max_graphlet_size
     )
 
-    actual_scaled_density_binned_counts_o2 <- density_binned_counts(
-      expected_counts_o2,
-      expected_interval_indexes_o2,
+    act_scaled_density_binned_counts_o2 <- density_binned_counts(
+      exp_counts_o2,
+      exp_interval_indexes_o2,
       agg_fn = agg_fn,
       scale_fn = scale_fn,
       max_graphlet_size = max_graphlet_size
@@ -881,18 +881,18 @@ test_that(
 
     # Check actual output vs expected
     expect_equal(
-      actual_scaled_density_binned_counts_o1,
-      expected_scaled_density_binned_counts_o1
+      act_scaled_density_binned_counts_o1,
+      exp_scaled_density_binned_counts_o1
     )
     expect_equal(
-      actual_scaled_density_binned_counts_o2,
-      expected_scaled_density_binned_counts_o2
+      act_scaled_density_binned_counts_o2,
+      exp_scaled_density_binned_counts_o2
     )
   }
 )
 
 context("Measures Netdis: Expected graphlet counts")
-test_that("netdis_expected_counts_ego works for graphlets up to 4 nodes", {
+test_that("netdis_exp_counts_ego works for graphlets up to 4 nodes", {
   # Helper function to generate graphs with known density and number of nodes
   rand_graph <- function(num_nodes, density) {
     max_edges <- choose(num_nodes, 2)
@@ -933,17 +933,17 @@ test_that("netdis_expected_counts_ego works for graphlets up to 4 nodes", {
   )
   # WITH scale_fn = NULL (bin counts directly with no scaling)
   # Helper function to calculate expected expected graphlet counts
-  expected_expected_graphlet_counts_fn <- function(density_index) {
+  exp_exp_graphlet_counts_fn <- function(density_index) {
     scaled_reference_counts[density_index, ]
   }
   # Determine expected and actual expected graphlet counts
-  expected_expected_graphlet_counts <-
-    purrr::map(density_indexes, expected_expected_graphlet_counts_fn)
-  actual_expected_graphlet_counts <-
-    purrr::map(graphlet_counts, netdis_expected_counts_ego,
+  exp_exp_graphlet_counts <-
+    purrr::map(density_indexes, exp_exp_graphlet_counts_fn)
+  act_exp_graphlet_counts <-
+    purrr::map(graphlet_counts, netdis_exp_counts_ego,
       max_graphlet_size = max_graphlet_size,
       density_breaks = density_breaks,
-      density_binned_reference_counts = scaled_reference_counts,
+      density_binned_ref_counts = scaled_reference_counts,
       scale_fn = NULL
     )
 
@@ -951,46 +951,46 @@ test_that("netdis_expected_counts_ego works for graphlets up to 4 nodes", {
   # NOTE: v2.0.0 of testthat library made a breaking change that means using
   # map, mapply etc can cause failures under certain conditions
   # See: https://github.com/r-lib/testthat/releases/tag/v2.0.0
-  for (i in 1:length(actual_expected_graphlet_counts)) {
+  for (i in seq_len(act_exp_graphlet_counts)) {
     expect_equal(
-      actual_expected_graphlet_counts[i],
-      expected_expected_graphlet_counts[i]
+      act_exp_graphlet_counts[i],
+      exp_exp_graphlet_counts[i]
     )
   }
 
   # WITH scale_fn = count_graphlet_tuples (default netdis from paper)
   # Helper function to calculate expected expected graphlet counts
-  expected_expected_graphlet_counts_fn <- function(density_index, node_count) {
+  exp_exp_graphlet_counts_fn <- function(density_index, node_count) {
     reference_counts <- scaled_reference_counts[density_index, ]
     reference_counts * choose(node_count, graphlet_sizes)
   }
   # Determine expected and actual expected graphlet counts
-  expected_expected_graphlet_counts <-
+  exp_exp_graphlet_counts <-
     purrr::map2(
       density_indexes,
       num_nodes,
-      expected_expected_graphlet_counts_fn
+      exp_exp_graphlet_counts_fn
     )
-  actual_expected_graphlet_counts <-
-    purrr::map(graphlet_counts, netdis_expected_counts_ego,
+  act_exp_graphlet_counts <-
+    purrr::map(graphlet_counts, netdis_exp_counts_ego,
       max_graphlet_size = max_graphlet_size,
       density_breaks = density_breaks,
-      density_binned_reference_counts = scaled_reference_counts,
+      density_binned_ref_counts = scaled_reference_counts,
       scale_fn = count_graphlet_tuples
     )
   # Loop over each graph and compare expected with actual
   # NOTE: v2.0.0 of testthat library made a breaking change that means using
   # map, mapply etc can cause failures under certain conditions
   # See: https://github.com/r-lib/testthat/releases/tag/v2.0.0
-  for (i in 1:length(actual_expected_graphlet_counts)) {
+  for (i in seq_len(act_exp_graphlet_counts)) {
     expect_equal(
-      actual_expected_graphlet_counts[i],
-      expected_expected_graphlet_counts[i]
+      act_exp_graphlet_counts[i],
+      exp_exp_graphlet_counts[i]
     )
   }
 })
 
-test_that("netdis_expected_counts works for graphlets up to 4 nodes", {
+test_that("netdis_exp_counts works for graphlets up to 4 nodes", {
   # Helper function to generate graphs with known density and number of nodes
   # Set up a small sample network with at least one ego-network that contains
   # at least one of each graphlets
@@ -1064,7 +1064,7 @@ test_that("netdis_expected_counts works for graphlets up to 4 nodes", {
     c(91, 92, 93, 94, 95, 96, 97, 98, 99)
   )
   colnames(scaled_reference_counts) <- graphlet_labels
-  expected_dims <- dim(scaled_reference_counts)
+  exp_dims <- dim(scaled_reference_counts)
   min_ego_nodes <- 3
   min_ego_edges <- 1
 
@@ -1072,42 +1072,42 @@ test_that("netdis_expected_counts works for graphlets up to 4 nodes", {
   # With scale_fn = count_graphlet_tuples (default netdis paper)
   #-------------------------------------------------------
   # Helper function to calculate expected expected graphlet counts
-  expected_expected_graphlet_counts_fn <- function(density_index, node_count) {
+  exp_exp_graphlet_counts_fn <- function(density_index, node_count) {
     reference_counts <- scaled_reference_counts[density_index, ]
     reference_counts * choose(node_count, graphlet_sizes)
   }
   # Calculate expected graphlet counts. NOTE: We expect a matrix with graphlet
   # types as columns and ego networks for nodes in graph as rows
-  expected_expected_graphlet_counts_ego_o1 <- t(simplify2array(purrr::map2(
-    density_indexes_o1, num_nodes_o1, expected_expected_graphlet_counts_fn
+  exp_exp_graphlet_counts_ego_o1 <- t(simplify2array(purrr::map2(
+    density_indexes_o1, num_nodes_o1, exp_exp_graphlet_counts_fn
   )))
-  expected_expected_graphlet_counts_ego_o2 <- t(simplify2array(purrr::map2(
-    density_indexes_o2, num_nodes_o2, expected_expected_graphlet_counts_fn
+  exp_exp_graphlet_counts_ego_o2 <- t(simplify2array(purrr::map2(
+    density_indexes_o2, num_nodes_o2, exp_exp_graphlet_counts_fn
   )))
   # Sanity check for expected output shape. Should be matrix with graphlet types
   # as columns and nodes as rows
-  expect_equal(dim(expected_expected_graphlet_counts_ego_o1), expected_dims)
-  expect_equal(dim(expected_expected_graphlet_counts_ego_o2), expected_dims)
+  expect_equal(dim(exp_exp_graphlet_counts_ego_o1), exp_dims)
+  expect_equal(dim(exp_exp_graphlet_counts_ego_o2), exp_dims)
   # Set column labels to graphlet names
-  colnames(expected_expected_graphlet_counts_ego_o1) <- graphlet_labels
-  colnames(expected_expected_graphlet_counts_ego_o2) <- graphlet_labels
+  colnames(exp_exp_graphlet_counts_ego_o1) <- graphlet_labels
+  colnames(exp_exp_graphlet_counts_ego_o2) <- graphlet_labels
   # Set row labels to ego network names
-  rownames(expected_expected_graphlet_counts_ego_o1) <-
+  rownames(exp_exp_graphlet_counts_ego_o1) <-
     rownames(graphlet_counts_ego_o1)
-  rownames(expected_expected_graphlet_counts_ego_o2) <-
+  rownames(exp_exp_graphlet_counts_ego_o2) <-
     rownames(graphlet_counts_ego_o1)
 
   # Calculate actual output of function under test
-  actual_expected_graphlet_counts_ego_o1 <-
-    netdis_expected_counts(
+  act_exp_graphlet_counts_ego_o1 <-
+    netdis_exp_counts(
       graphlet_counts_ego_o1,
       breaks,
       scaled_reference_counts,
       max_graphlet_size,
       scale_fn = count_graphlet_tuples
     )
-  actual_expected_graphlet_counts_ego_o2 <-
-    netdis_expected_counts(
+  act_exp_graphlet_counts_ego_o2 <-
+    netdis_exp_counts(
       graphlet_counts_ego_o2,
       breaks,
       scaled_reference_counts,
@@ -1117,53 +1117,53 @@ test_that("netdis_expected_counts works for graphlets up to 4 nodes", {
 
   # Compare actual to expected
   expect_equal(
-    actual_expected_graphlet_counts_ego_o1,
-    expected_expected_graphlet_counts_ego_o1
+    act_exp_graphlet_counts_ego_o1,
+    exp_exp_graphlet_counts_ego_o1
   )
   expect_equal(
-    actual_expected_graphlet_counts_ego_o2,
-    expected_expected_graphlet_counts_ego_o2
+    act_exp_graphlet_counts_ego_o2,
+    exp_exp_graphlet_counts_ego_o2
   )
 
   #-------------------------------------------------------
   # With scale_fn = NULL (take reference counts directly)
   #-------------------------------------------------------
   # Helper function to calculate expected expected graphlet counts
-  expected_expected_graphlet_counts_fn <- function(density_index) {
+  exp_exp_graphlet_counts_fn <- function(density_index) {
     scaled_reference_counts[density_index, ]
   }
   # Calculate expected graphlet counts. NOTE: We expect a matrix with graphlet
   # types as columns and ego networks for nodes in graph as rows
-  expected_expected_graphlet_counts_ego_o1 <- t(simplify2array(purrr::map(
-    density_indexes_o1, expected_expected_graphlet_counts_fn
+  exp_exp_graphlet_counts_ego_o1 <- t(simplify2array(purrr::map(
+    density_indexes_o1, exp_exp_graphlet_counts_fn
   )))
-  expected_expected_graphlet_counts_ego_o2 <- t(simplify2array(purrr::map(
-    density_indexes_o2, expected_expected_graphlet_counts_fn
+  exp_exp_graphlet_counts_ego_o2 <- t(simplify2array(purrr::map(
+    density_indexes_o2, exp_exp_graphlet_counts_fn
   )))
   # Sanity check for expected output shape. Should be matrix with graphlet types
   # as columns and nodes as rows
-  expect_equal(dim(expected_expected_graphlet_counts_ego_o1), expected_dims)
-  expect_equal(dim(expected_expected_graphlet_counts_ego_o2), expected_dims)
+  expect_equal(dim(exp_exp_graphlet_counts_ego_o1), exp_dims)
+  expect_equal(dim(exp_exp_graphlet_counts_ego_o2), exp_dims)
   # Set column labels to graphlet names
-  colnames(expected_expected_graphlet_counts_ego_o1) <- graphlet_labels
-  colnames(expected_expected_graphlet_counts_ego_o2) <- graphlet_labels
+  colnames(exp_exp_graphlet_counts_ego_o1) <- graphlet_labels
+  colnames(exp_exp_graphlet_counts_ego_o2) <- graphlet_labels
   # Set row labels to ego network names
-  rownames(expected_expected_graphlet_counts_ego_o1) <-
+  rownames(exp_exp_graphlet_counts_ego_o1) <-
     rownames(graphlet_counts_ego_o1)
-  rownames(expected_expected_graphlet_counts_ego_o2) <-
+  rownames(exp_exp_graphlet_counts_ego_o2) <-
     rownames(graphlet_counts_ego_o2)
 
   # Calculate actual output of function under test
-  actual_expected_graphlet_counts_ego_o1 <-
-    netdis_expected_counts(
+  act_exp_graphlet_counts_ego_o1 <-
+    netdis_exp_counts(
       graphlet_counts_ego_o1,
       breaks,
       scaled_reference_counts,
       max_graphlet_size,
       scale_fn = NULL
     )
-  actual_expected_graphlet_counts_ego_o2 <-
-    netdis_expected_counts(
+  act_exp_graphlet_counts_ego_o2 <-
+    netdis_exp_counts(
       graphlet_counts_ego_o2,
       breaks,
       scaled_reference_counts,
@@ -1173,12 +1173,12 @@ test_that("netdis_expected_counts works for graphlets up to 4 nodes", {
 
   # Compare actual to expected
   expect_equal(
-    actual_expected_graphlet_counts_ego_o1,
-    expected_expected_graphlet_counts_ego_o1
+    act_exp_graphlet_counts_ego_o1,
+    exp_exp_graphlet_counts_ego_o1
   )
   expect_equal(
-    actual_expected_graphlet_counts_ego_o2,
-    expected_expected_graphlet_counts_ego_o2
+    act_exp_graphlet_counts_ego_o2,
+    exp_exp_graphlet_counts_ego_o2
   )
 })
 
@@ -1200,18 +1200,18 @@ test_that("netdis statistic function output matches manually verified result", {
   names(counts_2) <- ids
 
   # manually verified results
-  expected_netdis_3 <- 0.03418796
-  expected_netdis_4 <- 0.02091792
-  expected_netdis_5 <- 0.03826385
+  exp_netdis_3 <- 0.03418796
+  exp_netdis_4 <- 0.02091792
+  exp_netdis_5 <- 0.03826385
 
   # check function to test
-  actual_netdis_3 <- netdis(counts_1, counts_2, 3)
-  actual_netdis_4 <- netdis(counts_1, counts_2, 4)
-  actual_netdis_5 <- netdis(counts_1, counts_2, 5)
+  act_netdis_3 <- netdis(counts_1, counts_2, 3)
+  act_netdis_4 <- netdis(counts_1, counts_2, 4)
+  act_netdis_5 <- netdis(counts_1, counts_2, 5)
 
-  expect_equal(expected_netdis_3, actual_netdis_3)
-  expect_equal(expected_netdis_4, actual_netdis_4)
-  expect_equal(expected_netdis_5, actual_netdis_5)
+  expect_equal(exp_netdis_3, act_netdis_3)
+  expect_equal(exp_netdis_4, act_netdis_4)
+  expect_equal(exp_netdis_5, act_netdis_5)
 })
 test_that(
   "netdis_uptok gives expected netdis result for graphlets up to size k",
@@ -1232,13 +1232,13 @@ test_that(
     names(counts_2) <- ids
 
     # manually verified results
-    expected_netdis <- c(0.03418796, 0.02091792, 0.03826385)
-    names(expected_netdis) <- c("netdis3", "netdis4", "netdis5")
+    exp_netdis <- c(0.03418796, 0.02091792, 0.03826385)
+    names(exp_netdis) <- c("netdis3", "netdis4", "netdis5")
 
     # check function to test
-    actual_netdis <- netdis_uptok(counts_1, counts_2, 5)
+    act_netdis <- netdis_uptok(counts_1, counts_2, 5)
 
-    expect_equal(expected_netdis, actual_netdis)
+    expect_equal(exp_netdis, act_netdis)
   }
 )
 
@@ -1276,18 +1276,18 @@ test_that("netdis_many_to_many gives expected result", {
   # 4    ECL  HSV-1       2       3
   # 5    ECL   KSHV       2       4
   # 6  HSV-1   KSHV       3       4
-  expected_netdis_netdis <- matrix(nrow = 2, ncol = 6)
-  expected_netdis_netdis[1, ] <- c(
+  exp_netdis_netdis <- matrix(nrow = 2, ncol = 6)
+  exp_netdis_netdis[1, ] <- c(
     0.1846655, 0.008264222, 0.01005385,
     0.2065762, 0.2091241, 0.0001335756
   )
-  expected_netdis_netdis[2, ] <- c(
+  exp_netdis_netdis[2, ] <- c(
     0.1749835, 0.165264120, 0.01969246,
     0.2917612, 0.2215579, 0.0760242643
   )
-  rownames(expected_netdis_netdis) <- c("netdis3", "netdis4")
+  rownames(exp_netdis_netdis) <- c("netdis3", "netdis4")
 
-  expected_netdis_comp_spec <- cross_comparison_spec(
+  exp_netdis_comp_spec <- cross_comparison_spec(
     list(
       "EBV" = c(),
       "ECL" = c(),
@@ -1296,14 +1296,14 @@ test_that("netdis_many_to_many gives expected result", {
     )
   )
 
-  expected_netdis <- list(
-    netdis = expected_netdis_netdis,
-    comp_spec = expected_netdis_comp_spec
+  exp_netdis <- list(
+    netdis = exp_netdis_netdis,
+    comp_spec = exp_netdis_comp_spec
   )
 
 
   # Calculate netdis statistics
-  actual_netdis <- netdis_many_to_many(graphs,
+  act_netdis <- netdis_many_to_many(graphs,
     ref_graph,
     max_graphlet_size = max_graphlet_size,
     neighbourhood_size = neighbourhood_size,
@@ -1312,7 +1312,7 @@ test_that("netdis_many_to_many gives expected result", {
   )
 
   # Check results as expected
-  expect_equal(expected_netdis, actual_netdis, tolerance = .001, scale = 1)
+  expect_equal(exp_netdis, act_netdis, tolerance = .001, scale = 1)
 })
 
 context("Netdis: functions for different pairwise comparisons")
@@ -1341,11 +1341,11 @@ test_that("netdis_one_to_one gives expected result", {
   min_ego_edges <- 1
 
   # manually verified results
-  expected_netdis <- c(0.1846655, 0.1749835)
-  names(expected_netdis) <- c("netdis3", "netdis4")
+  exp_netdis <- c(0.1846655, 0.1749835)
+  names(exp_netdis) <- c("netdis3", "netdis4")
 
   # check function to test
-  actual_netdis <- netdis_one_to_one(graph_1,
+  act_netdis <- netdis_one_to_one(graph_1,
     graph_2,
     ref_graph,
     max_graphlet_size = max_graphlet_size,
@@ -1354,7 +1354,7 @@ test_that("netdis_one_to_one gives expected result", {
     min_ego_edges = min_ego_edges
   )
 
-  expect_equal(expected_netdis, actual_netdis, tolerance = .001, scale = 1)
+  expect_equal(exp_netdis, act_netdis, tolerance = .001, scale = 1)
 })
 test_that("netdis_one_to_many gives expected result", {
   # Set source directory for Virus PPI graph edge files
@@ -1380,14 +1380,14 @@ test_that("netdis_one_to_many gives expected result", {
   #         ECL       HSV-1       KSHV       VZV
   # netdis3 0.1846655 0.008264222 0.01005385 0.006777578
   # netdis4 0.1749835 0.165264120 0.01969246 0.159711160
-  expected_netdis <- matrix(nrow = 2, ncol = 4)
-  colnames(expected_netdis) <- c("ECL", "HSV-1", "KSHV", "VZV")
-  rownames(expected_netdis) <- c("netdis3", "netdis4")
-  expected_netdis[1, ] <- c(0.1846655, 0.008264222, 0.01005385, 0.006777578)
-  expected_netdis[2, ] <- c(0.1749835, 0.165264120, 0.01969246, 0.159711160)
+  exp_netdis <- matrix(nrow = 2, ncol = 4)
+  colnames(exp_netdis) <- c("ECL", "HSV-1", "KSHV", "VZV")
+  rownames(exp_netdis) <- c("netdis3", "netdis4")
+  exp_netdis[1, ] <- c(0.1846655, 0.008264222, 0.01005385, 0.006777578)
+  exp_netdis[2, ] <- c(0.1749835, 0.165264120, 0.01969246, 0.159711160)
 
   # Calculate netdis statistics
-  actual_netdis <- netdis_one_to_many(graph_1, graphs_compare,
+  act_netdis <- netdis_one_to_many(graph_1, graphs_compare,
     ref_graph,
     max_graphlet_size = max_graphlet_size,
     neighbourhood_size = neighbourhood_size,
@@ -1396,7 +1396,7 @@ test_that("netdis_one_to_many gives expected result", {
   )
 
   # Check results as expected
-  expect_equal(expected_netdis, actual_netdis, tolerance = .001, scale = 1)
+  expect_equal(exp_netdis, act_netdis, tolerance = .001, scale = 1)
 })
 
 context("Netdis: error if no query graphs or graphlet counts provided")
@@ -1558,10 +1558,10 @@ test_that("netdis_one_to_one gives expected result when using geometric Poisson
   # manually verified result for graphlets of size 4
   # verified using a different implementation of geometric poisson with these
   # networks.
-  expected_netdis4 <- 0.1892716
+  exp_netdis4 <- 0.1892716
 
   # check function to test
-  actual_netdis <- netdis_one_to_one(graph_1,
+  act_netdis <- netdis_one_to_one(graph_1,
     graph_2,
     ref_graph = NULL,
     max_graphlet_size = max_graphlet_size,
@@ -1570,7 +1570,7 @@ test_that("netdis_one_to_one gives expected result when using geometric Poisson
     min_ego_edges = min_ego_edges,
   )
 
-  expect_equal(expected_netdis4, actual_netdis[["netdis4"]],
+  expect_equal(exp_netdis4, act_netdis[["netdis4"]],
     tolerance = .0001, scale = 1
   )
 })
diff --git a/tests/testthat/test_measures_net_emd.R b/tests/testthat/test_measures_net_emd.R
index bf6bfef9..b0235cf1 100644
--- a/tests/testthat/test_measures_net_emd.R
+++ b/tests/testthat/test_measures_net_emd.R
@@ -31,9 +31,6 @@ expect_self_netemd_correct <- function(histogram, shift, method,
     dhists_1 = histogram, dhists_2 = shift_dhist(histogram, shift),
     method = method, return_details = return_details
   )
-  loc <- histogram$locations
-  mass <- histogram$masses
-  var <- sum(loc * loc * mass) / sum(mass) - (sum(loc * mass) / sum(mass))^2
   expected <- list(
     net_emd = 0, min_emds = 0, min_offsets = shift,
     min_offsets_std = 0
@@ -268,12 +265,12 @@ test_that(
 
     # Helper function to test actual vs expected
     test_pair <- function(p, expected) {
-      dhistA <- two_bin_dhist(p)
-      dhistB <- three_bin_dhist(p)
+      dhist_a <- two_bin_dhist(p)
+      dhist_b <- three_bin_dhist(p)
       expect_equal(
         netemd_one_to_one(
-          dhists_1 = dhistA,
-          dhists_2 = dhistB,
+          dhists_1 = dhist_a,
+          dhists_2 = dhist_b,
           method = "exhaustive"
         ),
         expected,
@@ -284,8 +281,8 @@ test_that(
       # tolerance here
       expect_equal(
         netemd_one_to_one(
-          dhists_1 = dhistA,
-          dhists_2 = dhistB,
+          dhists_1 = dhist_a,
+          dhists_2 = dhist_b,
           method = "optimise"
         ),
         expected,
@@ -305,7 +302,7 @@ context("Measures NetEMD: Virus PPI (EMD)")
 test_that("emd return 0 when comparing graphlet orbit degree distributions of
           virus PPI graphs to themselves", {
   # Load viurs PPI network data in ORCA-compatible edge list format
-  data_indexes <- 1:length(virusppi)
+  data_indexes <- seq_len(virusppi)
   data_names <- attr(virusppi, "name")
 
   # Calculate graphlet-based degree distributions up to graphlet order 4
@@ -323,7 +320,7 @@ context("Measures NetEMD: Virus PPI (NetEMD)")
 test_that("net_emd return 0 when comparing graphlet orbit degree distributions
           of virus PPI graphs to themselves", {
   # Load virus PPI network data in ORCA-compatible edge list format
-  data_indexes <- 1:length(virusppi)
+  data_indexes <- seq_len(virusppi)
   data_names <- attr(virusppi, "name")
 
   # Calculate graphlet-based degree distributions up to graphlet order 4
@@ -356,7 +353,7 @@ test_that("emd return 0 when comparing graphlet orbit degree distributions of
     system.file(package = "netdist", "extdata", "random"),
     format = "ncol", pattern = "*"
   )
-  data_indexes <- 1:length(random_graphs)
+  data_indexes <- seq_len(random_graphs)
   data_names <- attr(random_graphs, "name")
 
   # Calculate graphlet-based degree distributions up to graphlet order 4
@@ -378,7 +375,7 @@ test_that("net_emd return 0 when comparing graphlet orbit degree distributions
     system.file(package = "netdist", "extdata", "random"),
     format = "ncol", pattern = "*"
   )
-  data_indexes <- 1:length(random_graphs)
+  data_indexes <- seq_len(random_graphs)
   data_names <- attr(random_graphs, "name")
 
   # Calculate graphlet-based degree distributions up to graphlet order 4
diff --git a/tests/testthat/test_orca_interface.R b/tests/testthat/test_orca_interface.R
index 070ef949..6008415b 100644
--- a/tests/testthat/test_orca_interface.R
+++ b/tests/testthat/test_orca_interface.R
@@ -126,19 +126,19 @@ test_that(
 context("ORCA interface: Graph cross comparison")
 test_that("cross_comparison_spec works for virus PPI data", {
   # Load viurs PPI network data in ORCA-compatible edge list format
-  expected_name_A <- c(
+  exp_name_a <- c(
     rep("EBV", 4), rep("ECL", 3), rep("HSV-1", 2),
     rep("KSHV", 1), rep("VZV", 0)
   )
-  expected_index_A <- c(rep(1, 4), rep(2, 3), rep(3, 2), rep(4, 1), rep(5, 0))
-  expected_name_B <- c(
+  exp_index_a <- c(rep(1, 4), rep(2, 3), rep(3, 2), rep(4, 1), rep(5, 0))
+  exp_name_b <- c(
     c("ECL", "HSV-1", "KSHV", "VZV"), c("HSV-1", "KSHV", "VZV"),
     c("KSHV", "VZV"), c("VZV")
   )
-  expected_index_B <- c(c(2, 3, 4, 5), c(3, 4, 5), c(4, 5), c(5))
+  exp_index_b <- c(c(2, 3, 4, 5), c(3, 4, 5), c(4, 5), c(5))
   expected <- as.data.frame(cbind(
-    expected_name_A, expected_name_B,
-    expected_index_A, expected_index_B
+    exp_name_a, exp_name_b,
+    exp_index_a, exp_index_b
   ))
   colnames(expected) <- c("name_a", "name_b", "index_a", "index_b")
 
@@ -177,31 +177,31 @@ test_that("Single and zero node graphs are gracefully handled", {
     "O62", "O63", "O64", "O65", "O66", "O67", "O68", "O69",
     "O70", "O71", "O72"
   ))
-  expected_zero_node_counts4 <- matrix(0, nrow = 0, ncol = length(names4))
-  colnames(expected_zero_node_counts4) <- names4
-  expected_zero_node_counts5 <- matrix(0, nrow = 0, ncol = length(names5))
-  colnames(expected_zero_node_counts5) <- names5
+  exp_zero_node_counts4 <- matrix(0, nrow = 0, ncol = length(names4))
+  colnames(exp_zero_node_counts4) <- names4
+  exp_zero_node_counts5 <- matrix(0, nrow = 0, ncol = length(names5))
+  colnames(exp_zero_node_counts5) <- names5
 
-  expected_single_node_counts4 <- matrix(0, nrow = 1, ncol = length(names4))
-  colnames(expected_single_node_counts4) <- names4
-  expected_single_node_counts5 <- matrix(0, nrow = 1, ncol = length(names5))
-  colnames(expected_single_node_counts5) <- names5
+  exp_single_node_counts4 <- matrix(0, nrow = 1, ncol = length(names4))
+  colnames(exp_single_node_counts4) <- names4
+  exp_single_node_counts5 <- matrix(0, nrow = 1, ncol = length(names5))
+  colnames(exp_single_node_counts5) <- names5
 
   expect_equal(
-    expected_zero_node_counts4,
+    exp_zero_node_counts4,
     count_orbits_per_node(zero_node_graph, max_graphlet_size = 4)
   )
   expect_equal(
-    expected_zero_node_counts5,
+    exp_zero_node_counts5,
     count_orbits_per_node(zero_node_graph, max_graphlet_size = 5)
   )
 
   expect_equal(
-    expected_single_node_counts4,
+    exp_single_node_counts4,
     count_orbits_per_node(single_node_graph, max_graphlet_size = 4)
   )
   expect_equal(
-    expected_single_node_counts5,
+    exp_single_node_counts5,
     count_orbits_per_node(single_node_graph, max_graphlet_size = 5)
   )
 })
@@ -1189,7 +1189,7 @@ test_that("orbit_to_graphlet_counts summation works", {
   g28_counts <- rowSums(orbit_counts_5[, g28_indexes, drop = FALSE])
   g29_counts <- rowSums(orbit_counts_5[, g29_indexes, drop = FALSE])
   # Define expected graphlet count matrix for graphlets up to 5 nodes
-  expected_graphlet_counts_5 <-
+  exp_graphlet_counts_5 <-
     cbind(
       g0_counts, g1_counts, g2_counts, g3_counts, g4_counts, g5_counts,
       g6_counts, g7_counts, g8_counts, g9_counts, g10_counts, g11_counts,
@@ -1198,7 +1198,7 @@ test_that("orbit_to_graphlet_counts summation works", {
       g22_counts, g23_counts, g24_counts, g25_counts, g26_counts,
       g27_counts, g28_counts, g29_counts
     )
-  colnames(expected_graphlet_counts_5) <-
+  colnames(exp_graphlet_counts_5) <-
     c(
       "G0", "G1", "G2", "G3", "G4", "G5", "G6", "G7", "G8", "G9", "G10",
       "G11", "G12", "G13", "G14", "G15", "G16", "G17", "G18", "G19",
@@ -1207,13 +1207,13 @@ test_that("orbit_to_graphlet_counts summation works", {
     )
   # Define epected graphlet count matrix for graphlets up to 4 nodes by
   # selecting a subset of the matrix for graphlets up to 5 nodes
-  expected_graphlet_counts_4 <- expected_graphlet_counts_5[, 1:9]
+  exp_graphlet_counts_4 <- exp_graphlet_counts_5[, 1:9]
   # Calculate actual graphlet counts from functions under test
   actual_graphlet_counts_4 <- orbit_to_graphlet_counts(orbit_counts_4)
   actual_graphlet_counts_5 <- orbit_to_graphlet_counts(orbit_counts_5)
   # Check expected and actual graphlet counts match
-  expect_equal(actual_graphlet_counts_4, expected_graphlet_counts_4)
-  expect_equal(actual_graphlet_counts_5, expected_graphlet_counts_5)
+  expect_equal(actual_graphlet_counts_4, exp_graphlet_counts_4)
+  expect_equal(actual_graphlet_counts_5, exp_graphlet_counts_5)
 })
 
 context("ORCA interface: Named ego networks")
@@ -1247,12 +1247,12 @@ test_that(
     # The expectation below is based on igraph::graph_from_edgelist adding nodes
     # in the order they appear in the edge list, and igraph::V returning them
     # in this same order
-    expected_node_names <- c(
+    exp_node_names <- c(
       "n1", "n2", "n3", "n4", "n5", "n6", "n7", "n8", "n9", "n10"
     )
 
     # Expected edgelists for ego networks of order 1
-    expected_ego_elist_n1_o1 <- rbind(
+    exp_ego_elist_n1_o1 <- rbind(
       c("n1", "n2"),
       c("n1", "n4"),
       c("n1", "n6"),
@@ -1260,33 +1260,33 @@ test_that(
       c("n2", "n4"),
       c("n4", "n6")
     )
-    expected_ego_elist_n2_o1 <- rbind(
+    exp_ego_elist_n2_o1 <- rbind(
       c("n1", "n2"),
       c("n1", "n4"),
       c("n2", "n3"),
       c("n2", "n4"),
       c("n2", "n5")
     )
-    expected_ego_elist_n3_o1 <- rbind(
+    exp_ego_elist_n3_o1 <- rbind(
       c("n2", "n3")
     )
-    expected_ego_elist_n4_o1 <- rbind(
+    exp_ego_elist_n4_o1 <- rbind(
       c("n1", "n2"),
       c("n1", "n4"),
       c("n1", "n6"),
       c("n2", "n4"),
       c("n4", "n6")
     )
-    expected_ego_elist_n5_o1 <- rbind(
+    exp_ego_elist_n5_o1 <- rbind(
       c("n2", "n5")
     )
-    expected_ego_elist_n6_o1 <- rbind(
+    exp_ego_elist_n6_o1 <- rbind(
       c("n1", "n4"),
       c("n1", "n6"),
       c("n4", "n6"),
       c("n6", "n8")
     )
-    expected_ego_elist_n7_o1 <- rbind(
+    exp_ego_elist_n7_o1 <- rbind(
       c("n1", "n7"),
       c("n7", "n8"),
       c("n7", "n9"),
@@ -1295,7 +1295,7 @@ test_that(
       c("n8", "n10"),
       c("n9", "n10")
     )
-    expected_ego_elist_n8_o1 <- rbind(
+    exp_ego_elist_n8_o1 <- rbind(
       c("n6", "n8"),
       c("n7", "n8"),
       c("n7", "n9"),
@@ -1304,7 +1304,7 @@ test_that(
       c("n8", "n10"),
       c("n9", "n10")
     )
-    expected_ego_elist_n9_o1 <- rbind(
+    exp_ego_elist_n9_o1 <- rbind(
       c("n7", "n8"),
       c("n7", "n9"),
       c("n7", "n10"),
@@ -1312,7 +1312,7 @@ test_that(
       c("n8", "n10"),
       c("n9", "n10")
     )
-    expected_ego_elist_n10_o1 <- rbind(
+    exp_ego_elist_n10_o1 <- rbind(
       c("n7", "n8"),
       c("n7", "n9"),
       c("n7", "n10"),
@@ -1327,17 +1327,17 @@ test_that(
     min_ego_nodes <- 0
     min_ego_edges <- 0
 
-    expected_ego_elists_o1 <- list(
-      n1 = dplyr::arrange(data.frame(expected_ego_elist_n1_o1), X1, X2),
-      n2 = dplyr::arrange(data.frame(expected_ego_elist_n2_o1), X1, X2),
-      n3 = dplyr::arrange(data.frame(expected_ego_elist_n3_o1), X1, X2),
-      n4 = dplyr::arrange(data.frame(expected_ego_elist_n4_o1), X1, X2),
-      n5 = dplyr::arrange(data.frame(expected_ego_elist_n5_o1), X1, X2),
-      n6 = dplyr::arrange(data.frame(expected_ego_elist_n6_o1), X1, X2),
-      n7 = dplyr::arrange(data.frame(expected_ego_elist_n7_o1), X1, X2),
-      n8 = dplyr::arrange(data.frame(expected_ego_elist_n8_o1), X1, X2),
-      n9 = dplyr::arrange(data.frame(expected_ego_elist_n9_o1), X1, X2),
-      n10 = dplyr::arrange(data.frame(expected_ego_elist_n10_o1), X1, X2)
+    exp_ego_elists_o1 <- list(
+      n1 = dplyr::arrange(data.frame(exp_ego_elist_n1_o1), X1, X2),
+      n2 = dplyr::arrange(data.frame(exp_ego_elist_n2_o1), X1, X2),
+      n3 = dplyr::arrange(data.frame(exp_ego_elist_n3_o1), X1, X2),
+      n4 = dplyr::arrange(data.frame(exp_ego_elist_n4_o1), X1, X2),
+      n5 = dplyr::arrange(data.frame(exp_ego_elist_n5_o1), X1, X2),
+      n6 = dplyr::arrange(data.frame(exp_ego_elist_n6_o1), X1, X2),
+      n7 = dplyr::arrange(data.frame(exp_ego_elist_n7_o1), X1, X2),
+      n8 = dplyr::arrange(data.frame(exp_ego_elist_n8_o1), X1, X2),
+      n9 = dplyr::arrange(data.frame(exp_ego_elist_n9_o1), X1, X2),
+      n10 = dplyr::arrange(data.frame(exp_ego_elist_n10_o1), X1, X2)
     )
     # Generate actual ego-networks and convert to edge lists for comparison
     actual_ego_elists_o1 <-
@@ -1350,7 +1350,7 @@ test_that(
           dplyr::arrange(data.frame(igraph::as_edgelist(g)), X1, X2)
         }
       )
-    expect_equal(actual_ego_elists_o1, expected_ego_elists_o1)
+    expect_equal(actual_ego_elists_o1, exp_ego_elists_o1)
   }
 )
 
@@ -1383,12 +1383,12 @@ test_that("count_graphlets_for_graph works", {
   )
 
   # Manually verified graphlet counts
-  expected_counts <- c(10, 15, 18, 6, 21, 3, 1, 11, 1, 1)
-  names(expected_counts) <- graphlet_labels
+  exp_counts <- c(10, 15, 18, 6, 21, 3, 1, 11, 1, 1)
+  names(exp_counts) <- graphlet_labels
 
   # Test
   actual_counts <- count_graphlets_for_graph(graph, max_graphlet_size = 4)
-  expect_equal(expected_counts, actual_counts)
+  expect_equal(exp_counts, actual_counts)
 })
 
 context("ORCA interface: Ego-network graphlet counts")
@@ -1431,7 +1431,7 @@ test_that(
     k <- graphlet_key$node_count
     # Set manually verified counts
     # 1-step ego networks
-    expected_counts_order_1 <- rbind(
+    exp_counts_order_1 <- rbind(
       c(5, 6, 5, 2, 0, 1, 0, 2, 1, 0),
       c(5, 5, 5, 1, 0, 2, 0, 2, 0, 0),
       c(2, 1, 0, 0, 0, 0, 0, 0, 0, 0),
@@ -1443,10 +1443,10 @@ test_that(
       c(4, 6, 0, 4, 0, 0, 0, 0, 0, 1),
       c(4, 6, 0, 4, 0, 0, 0, 0, 0, 1)
     )
-    rownames(expected_counts_order_1) <- node_labels
-    colnames(expected_counts_order_1) <- graphlet_labels
+    rownames(exp_counts_order_1) <- node_labels
+    colnames(exp_counts_order_1) <- graphlet_labels
     # 2-step ego networks
-    expected_counts_order_2 <- rbind(
+    exp_counts_order_2 <- rbind(
       c(10, 15, 18, 6, 21, 3, 1, 11, 1, 1),
       c(7, 8, 10, 2, 6, 3, 0, 4, 1, 0),
       c(5, 5, 5, 1, 0, 2, 0, 2, 0, 0),
@@ -1458,8 +1458,8 @@ test_that(
       c(6, 9, 8, 4, 4, 0, 1, 6, 0, 1),
       c(6, 9, 8, 4, 4, 0, 1, 6, 0, 1)
     )
-    rownames(expected_counts_order_2) <- node_labels
-    colnames(expected_counts_order_2) <- graphlet_labels
+    rownames(exp_counts_order_2) <- node_labels
+    colnames(exp_counts_order_2) <- graphlet_labels
 
     # Count graphlets in each ego network of the graph with only counts
     # requested
@@ -1483,30 +1483,30 @@ test_that(
 
     # Test that actual counts match expected with only counts requested
     # (default)
-    expect_equal(actual_counts_order_1, expected_counts_order_1)
-    expect_equal(actual_counts_order_2, expected_counts_order_2)
+    expect_equal(actual_counts_order_1, exp_counts_order_1)
+    expect_equal(actual_counts_order_2, exp_counts_order_2)
 
     # Test that actual and returned ego networks match expected
     # 1. Define expected
-    expected_ego_networks_order_1 <- make_named_ego_graph(graph,
+    exp_ego_networks_order_1 <- make_named_ego_graph(graph,
       order = 1,
       min_ego_nodes = min_ego_nodes,
       min_ego_edges = min_ego_edges
     )
-    expected_ego_networks_order_2 <- make_named_ego_graph(graph,
+    exp_ego_networks_order_2 <- make_named_ego_graph(graph,
       order = 2,
       min_ego_nodes = min_ego_nodes,
       min_ego_edges = min_ego_edges
     )
-    expected_counts_with_networks_order_1 <-
+    exp_counts_with_networks_order_1 <-
       list(
-        graphlet_counts = expected_counts_order_1,
-        ego_networks = expected_ego_networks_order_1
+        graphlet_counts = exp_counts_order_1,
+        ego_networks = exp_ego_networks_order_1
       )
-    expected_counts_with_networks_order_2 <-
+    exp_counts_with_networks_order_2 <-
       list(
-        graphlet_counts = expected_counts_order_2,
-        ego_networks = expected_ego_networks_order_2
+        graphlet_counts = exp_counts_order_2,
+        ego_networks = exp_ego_networks_order_2
       )
     # 2. Calculate actual
     actual_counts_with_networks_order_1 <-
@@ -1528,11 +1528,11 @@ test_that(
     # Test that actual counts match expected with ego-networks requested
     expect_equal(
       actual_counts_with_networks_order_1$graphlet_counts,
-      expected_counts_order_1
+      exp_counts_order_1
     )
     expect_equal(
       actual_counts_with_networks_order_2$graphlet_counts,
-      expected_counts_order_2
+      exp_counts_order_2
     )
 
     # 3. Compare
@@ -1541,14 +1541,14 @@ test_that(
     # with indexed edge lists to ensure we are checking full properties of
     # returned objects (i.e. named lists with matching elements).
     # 3a. Convert expected and actual ego networks from igraphs to indexed edges
-    expected_counts_with_networks_order_1$ego_networks <-
+    exp_counts_with_networks_order_1$ego_networks <-
       purrr::map(
-        expected_counts_with_networks_order_1$ego_networks,
+        exp_counts_with_networks_order_1$ego_networks,
         graph_to_indexed_edges
       )
-    expected_counts_with_networks_order_2$ego_networks <-
+    exp_counts_with_networks_order_2$ego_networks <-
       purrr::map(
-        expected_counts_with_networks_order_2$ego_networks,
+        exp_counts_with_networks_order_2$ego_networks,
         graph_to_indexed_edges
       )
     actual_counts_with_networks_order_1$ego_networks <-
@@ -1564,11 +1564,11 @@ test_that(
     # 3b. Do comparison
     expect_equal(
       actual_counts_with_networks_order_1,
-      expected_counts_with_networks_order_1
+      exp_counts_with_networks_order_1
     )
     expect_equal(
       actual_counts_with_networks_order_2,
-      expected_counts_with_networks_order_2
+      exp_counts_with_networks_order_2
     )
   }
 )
@@ -1613,7 +1613,7 @@ test_that(
     k <- graphlet_key$node_count
     # Set manually verified counts
     # 1-step ego networks
-    expected_counts_order_1 <- rbind(
+    exp_counts_order_1 <- rbind(
       c(5, 6, 5, 2, 0, 1, 0, 2, 1, 0),
       c(5, 5, 5, 1, 0, 2, 0, 2, 0, 0),
       c(2, 1, 0, 0, 0, 0, 0, 0, 0, 0),
@@ -1625,10 +1625,10 @@ test_that(
       c(4, 6, 0, 4, 0, 0, 0, 0, 0, 1),
       c(4, 6, 0, 4, 0, 0, 0, 0, 0, 1)
     )
-    rownames(expected_counts_order_1) <- node_labels
-    colnames(expected_counts_order_1) <- graphlet_labels
+    rownames(exp_counts_order_1) <- node_labels
+    colnames(exp_counts_order_1) <- graphlet_labels
     # 2-step ego networks
-    expected_counts_order_2 <- rbind(
+    exp_counts_order_2 <- rbind(
       c(10, 15, 18, 6, 21, 3, 1, 11, 1, 1),
       c(7, 8, 10, 2, 6, 3, 0, 4, 1, 0),
       c(5, 5, 5, 1, 0, 2, 0, 2, 0, 0),
@@ -1640,8 +1640,8 @@ test_that(
       c(6, 9, 8, 4, 4, 0, 1, 6, 0, 1),
       c(6, 9, 8, 4, 4, 0, 1, 6, 0, 1)
     )
-    rownames(expected_counts_order_2) <- node_labels
-    colnames(expected_counts_order_2) <- graphlet_labels
+    rownames(exp_counts_order_2) <- node_labels
+    colnames(exp_counts_order_2) <- graphlet_labels
 
     # Count graphlets in each ego network of the graph with only counts
     # requested
@@ -1672,7 +1672,7 @@ test_that(
       )
 
     # 3. Test that actual counts match expected
-    expect_equal(actual_counts_order_1, expected_counts_order_1)
-    expect_equal(actual_counts_order_2, expected_counts_order_2)
+    expect_equal(actual_counts_order_1, exp_counts_order_1)
+    expect_equal(actual_counts_order_2, exp_counts_order_2)
   }
 )
diff --git a/vignettes/ManyToMany.Rmd b/vignettes/ManyToMany.Rmd
index c69e9f21..662611da 100644
--- a/vignettes/ManyToMany.Rmd
+++ b/vignettes/ManyToMany.Rmd
@@ -46,20 +46,20 @@ Generation of regular grid, ring and tree-like networks with 400 nodes and 1600
 ```{r, netwokrs,fig.align='center',fig.dim=c(8,4)}
 # Create networks
 set.seed(3171)
-gLat_1 <- igraph::graph.lattice(c(20, 20))
-gLat_2 <- igraph::graph.lattice(c(40, 40))
-gRing_1 <- igraph::make_ring(20^2)
-gRing_2 <- igraph::make_ring(40^2)
-gTree_1 <- igraph::as.undirected(make_tree(n = 20^2, children = 3))
-gTree_2 <- igraph::as.undirected(make_tree(n = 40^2, children = 3))
+glat_1 <- igraph::graph.lattice(c(20, 20))
+glat_2 <- igraph::graph.lattice(c(40, 40))
+gring_1 <- igraph::make_ring(20^2)
+gring_2 <- igraph::make_ring(40^2)
+gtree_1 <- igraph::as.undirected(make_tree(n = 20^2, children = 3))
+gtree_2 <- igraph::as.undirected(make_tree(n = 40^2, children = 3))
 
 par(mfrow = c(1, 2))
-plot(gLat_1, vertex.size = 0.8, vertex.label = NA)
-plot(gLat_2, vertex.size = 0.8, vertex.label = NA)
-plot(gRing_1, vertex.size = 0.8, vertex.label = NA)
-plot(gRing_2, vertex.size = 0.8, vertex.label = NA)
-plot(gTree_1, vertex.size = 0.8, vertex.label = NA)
-plot(gTree_2, vertex.size = 0.8, vertex.label = NA)
+plot(glat_1, vertex.size = 0.8, vertex.label = NA)
+plot(glat_2, vertex.size = 0.8, vertex.label = NA)
+plot(gring_1, vertex.size = 0.8, vertex.label = NA)
+plot(gring_2, vertex.size = 0.8, vertex.label = NA)
+plot(gtree_1, vertex.size = 0.8, vertex.label = NA)
+plot(gtree_2, vertex.size = 0.8, vertex.label = NA)
 ```
 
 ## NetEmd using subgraph counts
@@ -68,12 +68,12 @@ Subgraph count based NetEmd comparisons:
 ```{r, netemdorbits,fig.align='center'}
 # NetEMD using subgraph counts.
 glist <- list(
-  Lat_1 = gLat_1,
-  Lat_2 = gLat_2,
-  Ring_1 = gRing_1,
-  Ring_2 = gRing_1,
-  Tree_1 = gTree_1,
-  Tree_2 = gTree_2
+  Lat_1 = glat_1,
+  Lat_2 = glat_2,
+  Ring_1 = gring_1,
+  Ring_2 = gring_1,
+  Tree_1 = gtree_1,
+  Tree_2 = gtree_2
 )
 
 # Use of smoothing window 1 is given for discrete integer distributions. If the
@@ -99,7 +99,7 @@ mat
 
 Illustration of the multiple NetEmd comparisons based on subgraph counts.
 ```{r,netemdorbitsPLOT,fig.align='center',fig.dim=c(8,8)}
-netemd.plot(
+netemd_plot(
   netemdlist = netemdlist,
   clustering_method = "ward.D",
   main = "NetEmd subgraph counts"
@@ -111,21 +111,21 @@ netemd.plot(
 Pre-compute the Laplacian and normalized Laplacian for each graph considered:
 ```{r, netemdspectrum}
 # NetEMD using the Laplacian and normalized Laplacian Spectrum.
-SPECT <- list()
+spect <- list()
 
 # This step may take several minutes.
-for (i in 1:length(glist)) {
-  Lapg <- igraph::laplacian_matrix(
+for (i in seq_len(glist)) {
+  lapg <- igraph::laplacian_matrix(
     graph = glist[[i]], normalized = FALSE, sparse = FALSE
   )
-  NLap <- igraph::laplacian_matrix(
+  nlap <- igraph::laplacian_matrix(
     graph = glist[[i]], normalized = TRUE, sparse = FALSE
   )
-  SPECT[[names(glist)[i]]] <- cbind(
-    L.Spectra = eigen(Lapg)$values, NL.Spectra = eigen(NLap)$values
+  spect[[names(glist)[i]]] <- cbind(
+    L.Spectra = eigen(lapg)$values, NL.Spectra = eigen(nlap)$values
   )
 }
-str(SPECT)
+str(spect)
 ```
 
 Compute NetEmd:
@@ -133,7 +133,7 @@ Compute NetEmd:
 # Use of smoothing window 1 is given for discrete integer distributions. If the
 # network features are considered continuous variables smoothing
 # window_width equal to zero is recommended.
-netemdlist <- netemd_many_to_many(dhists = SPECT, smoothing_window_width = 0)
+netemdlist <- netemd_many_to_many(dhists = spect, smoothing_window_width = 0)
 
 netemdlist
 ```
@@ -141,7 +141,7 @@ netemdlist
 
 ### Illustration of the multiple NetEmd comparisons based on the Laplacian and Normalized Laplacian spectra
 ```{r,netemdspectrumPLOT ,fig.align='center',fig.dim=c(8,8)}
-netemd.plot(
+netemd_plot(
   netemdlist = netemdlist, clustering_method = "ward.D", main = "NetEmd Spectra"
 )
 ```
@@ -166,12 +166,12 @@ Generation of regular grid, ring and tree-like networks with 400 nodes and 1600
 ```{r, netdisgoldstandnetworks,fig.align='center',fig.dim=c(8,4)}
 # Create networks
 set.seed(3171)
-gLat_1 <- igraph::graph.lattice(c(20, 20))
-gLat_2 <- igraph::graph.lattice(c(40, 40))
-gRing_1 <- igraph::make_ring(20^2)
-gRing_2 <- igraph::make_ring(40^2)
-gTree_1 <- igraph::as.undirected(make_tree(n = 20^2, children = 3))
-gTree_2 <- igraph::as.undirected(make_tree(n = 40^2, children = 3))
+glat_1 <- igraph::graph.lattice(c(20, 20))
+glat_2 <- igraph::graph.lattice(c(40, 40))
+gring_1 <- igraph::make_ring(20^2)
+gring_2 <- igraph::make_ring(40^2)
+gtree_1 <- igraph::as.undirected(make_tree(n = 20^2, children = 3))
+gtree_2 <- igraph::as.undirected(make_tree(n = 40^2, children = 3))
 
 # Create a random graph to be used as a gold-standard
 gst_1 <- igraph::as.undirected(graph.star(20^2))
@@ -186,12 +186,12 @@ plot(gst_2, vertex.size = 0.8, vertex.label = NA)
 Obtain the comparison via Netdis using each of the reference graph networks.
 ```{r,netdisgoldstand ,fig.align='center'}
 glist <- list(
-  Lat_1 = gLat_1,
-  Lat_2 = gLat_2,
-  Ring_1 = gRing_1,
-  Ring_2 = gRing_1,
-  Tree_1 = gTree_1,
-  Tree_2 = gTree_2
+  Lat_1 = glat_1,
+  Lat_2 = glat_2,
+  Ring_1 = gring_1,
+  Ring_2 = gring_1,
+  Tree_1 = gtree_1,
+  Tree_2 = gtree_2
 )
 
 # Netdis using the goldstd_1 graph as gold-standard reference point
@@ -222,12 +222,12 @@ cross_comp_to_matrix(
 Heatmap of the Netdis comparisons:
 ```{r,netdisgoldstandPLOT ,fig.align='center',fig.dim=c(8,8)}
 # Network comparisons heatmap with Gold-Standard 1
-netdis.plot(
+netdis_plot(
   netdislist = netdis_mat_gst1, whatrow = 2, main = "Netdis GoldStd-1"
 )
 
 # Network comparisons heatmap with Gold-Standard 2
-netdis.plot(
+netdis_plot(
   netdislist = netdis_mat_gst2, whatrow = 2, main = "Netdis GoldStd-2"
 )
 ```
@@ -244,7 +244,7 @@ netdis_mat
 ```
 
 ```{r,netdisGPPLOT ,fig.align='center',fig.dim=c(8,8)}
-netdis.plot(netdislist = netdis_mat, whatrow = 2, main = "Netdis-GP")
+netdis_plot(netdislist = netdis_mat, whatrow = 2, main = "Netdis-GP")
 ```
 
 ### Using Netdis with no expectation ($E_w=0$)
@@ -258,7 +258,7 @@ netdis_mat
 ```
 
 ```{r,netdiszeroPLOT ,fig.align='center',fig.dim=c(8,8)}
-netdis.plot(netdislist = netdis_mat, whatrow = 2, main = "Netdis-zero")
+netdis_plot(netdislist = netdis_mat, whatrow = 2, main = "Netdis-zero")
 ```
 
 -------------------------
diff --git a/vignettes/NetEmdTimeOrdering.Rmd b/vignettes/NetEmdTimeOrdering.Rmd
index 44a48e7a..2f048f0d 100644
--- a/vignettes/NetEmdTimeOrdering.Rmd
+++ b/vignettes/NetEmdTimeOrdering.Rmd
@@ -64,9 +64,9 @@ World trade networks consist of relatively dense networks, thus leading to longe
 # it is not necessary to run it for the upcoming examples as a larger set of
 # counts has been already computed.
 if (FALSE) {
-  Counts <- list()
-  for (i in 1:length(wtnets)) {
-    Counts[[names(wtnets)[i]]] <- count_orbits_per_node(
+  counts <- list()
+  for (i in seq_len(wtnets)) {
+    counts[[names(wtnets)[i]]] <- count_orbits_per_node(
       graph = wtnets[[i]], max_graphlet_size = 5
     )
   }
@@ -76,10 +76,10 @@ if (FALSE) {
 Now, with pre-computed counts NetEmd can be calculated more rapidly as the computations of the counts are often the bottle neck in the computational time of NetEmd. NetEmd will be called `r length(worldtradesub$Counts) * (length(worldtradesub$Counts) - 1)/2 ` times in order to obtain all pairwise comparisons between the trade networks from 1985 to 2014 (networks with pre-computed subgraph counts):
 ```{r}
 # The pre-computed counts already in the package
-Counts <- worldtradesub$Counts
+counts <- worldtradesub$Counts
 
 # Calling NetEmd
-netemd_result <- netemd_many_to_many(dhists = Counts, mc.cores = 1)
+netemd_result <- netemd_many_to_many(dhists = counts, mc.cores = 1)
 
 # Results
 netemd_matrix <- cross_comp_to_matrix(
@@ -95,7 +95,7 @@ print(netemd_matrix[1:10, 1:5])
 Based on the comparison of the world trade networks across the years, we can identify periods of time where possible considerable changes in world trade have occurred. The following heat map clearly shows the existence of two potential
  changes in the world trade system, and which correspond to 1995-1996 and 2010-2011.
 ```{r,fig.align='center',fig.dim=c(8.5,8.5)}
-netemd.plot(
+netemd_plot(
   netemdlist = netemd_result,
   clustering_method = "ward.D",
   main = "NetEmd",
diff --git a/vignettes/NetdisGPStepByStep.Rmd b/vignettes/NetdisGPStepByStep.Rmd
index c5351bcc..5e4288db 100644
--- a/vignettes/NetdisGPStepByStep.Rmd
+++ b/vignettes/NetdisGPStepByStep.Rmd
@@ -92,11 +92,11 @@ Generation of tree-like networks with 400 nodes and 1600 nodes.
 ```{r, netwokrs,fig.align='center',fig.dim=c(6,6)}
 # Create networks
 set.seed(34)
-gTree_1 <- igraph::as.undirected(make_tree(n = 20^2, children = 3))
-gTree_2 <- igraph::as.undirected(make_tree(n = 40^2, children = 3))
+gtree_1 <- igraph::as.undirected(make_tree(n = 20^2, children = 3))
+gtree_2 <- igraph::as.undirected(make_tree(n = 40^2, children = 3))
 
-plot(gTree_1, vertex.size = 0.8, vertex.label = NA)
-plot(gTree_2, vertex.size = 0.8, vertex.label = NA)
+plot(gtree_1, vertex.size = 0.8, vertex.label = NA)
+plot(gtree_2, vertex.size = 0.8, vertex.label = NA)
 ```
 
 
@@ -127,13 +127,13 @@ num_bins <- 100
 One of the first steps in Netdis is the extraction of all ego-networks in each of the query networks:
 ```{r}
 # Get ego-networks for query graphs
-ego_1 <- make_named_ego_graph(gTree_1,
+ego_1 <- make_named_ego_graph(gtree_1,
   order = neighbourhood_size,
   min_ego_nodes = min_ego_nodes,
   min_ego_edges = min_ego_edges
 )
 
-ego_2 <- make_named_ego_graph(gTree_2,
+ego_2 <- make_named_ego_graph(gtree_2,
   order = neighbourhood_size,
   min_ego_nodes = min_ego_nodes,
   min_ego_edges = min_ego_edges
@@ -207,7 +207,7 @@ With $E_w$ now obtained, Netdis-GP, can be compute as per its construction by fi
 exp_gp_subgraph_counts_1 <- netdis_expected_counts(
   graphlet_counts = subgraph_counts_1,
   density_breaks = binned_densities_1$breaks,
-  density_binned_reference_counts = binned_gp_subgraph_counts_1,
+  density_binned_ref_counts = binned_gp_subgraph_counts_1,
   max_graphlet_size = max_subgraph_size,
   scale_fn = NULL
 )
@@ -216,7 +216,7 @@ exp_gp_subgraph_counts_1 <- netdis_expected_counts(
 exp_gp_subgraph_counts_2 <- netdis_expected_counts(
   graphlet_counts = subgraph_counts_2,
   density_breaks = binned_densities_2$breaks,
-  density_binned_reference_counts = binned_gp_subgraph_counts_2,
+  density_binned_ref_counts = binned_gp_subgraph_counts_2,
   max_graphlet_size = max_subgraph_size,
   scale_fn = NULL
 )
@@ -253,8 +253,8 @@ Finally, the total centred counts can be used to obtain the Netdis statistic:
 ```{r}
 
 netdis_result <- netdis_uptok(
-  centred_graphlet_count_vector_1 = sum_subgraph_counts_1,
-  centred_graphlet_count_vector_2 = sum_subgraph_counts_2,
+  centred_graphlet_counts_1 = sum_subgraph_counts_1,
+  centred_graphlet_counts_2 = sum_subgraph_counts_2,
   max_graphlet_size = max_subgraph_size
 )
 
diff --git a/vignettes/NetdisStepByStep.Rmd b/vignettes/NetdisStepByStep.Rmd
index 0499acad..198b10ad 100644
--- a/vignettes/NetdisStepByStep.Rmd
+++ b/vignettes/NetdisStepByStep.Rmd
@@ -80,11 +80,11 @@ Generation of tree-like networks with 400 nodes and 1600 nodes.
 ```{r, netwokrs,fig.align='center',fig.dim=c(6,6)}
 # Create networks
 set.seed(34)
-gTree_1 <- igraph::as.undirected(make_tree(n = 20^2, children = 3))
-gTree_2 <- igraph::as.undirected(make_tree(n = 40^2, children = 3))
+gtree_1 <- igraph::as.undirected(make_tree(n = 20^2, children = 3))
+gtree_2 <- igraph::as.undirected(make_tree(n = 40^2, children = 3))
 
-plot(gTree_1, vertex.size = 0.8, vertex.label = NA)
-plot(gTree_2, vertex.size = 0.8, vertex.label = NA)
+plot(gtree_1, vertex.size = 0.8, vertex.label = NA)
+plot(gtree_2, vertex.size = 0.8, vertex.label = NA)
 ```
 
 
@@ -116,13 +116,13 @@ num_bins <- 100
 One of the first steps in Netdis is the extraction of all ego-networks in each of the query networks:
 ```{r}
 # Get ego-networks for query graphs
-ego_1 <- make_named_ego_graph(gTree_1,
+ego_1 <- make_named_ego_graph(gtree_1,
   order = neighbourhood_size,
   min_ego_nodes = min_ego_nodes,
   min_ego_edges = min_ego_edges
 )
 
-ego_2 <- make_named_ego_graph(gTree_2,
+ego_2 <- make_named_ego_graph(gtree_2,
   order = neighbourhood_size,
   min_ego_nodes = min_ego_nodes,
   min_ego_edges = min_ego_edges
@@ -165,7 +165,7 @@ For this case the user must provide the gold-standard network of their choosing.
 The following considers a tree-like network with `r 30^2` nodes as the gold-standard.
 ```{r,fig.align='center',fig.dim=c(6,6)}
 # Network used as gold-standard
-gst_1 <- erdos.renyi.game(n = 30^2, p.or.m = graph.density(graph = gTree_2))
+gst_1 <- erdos.renyi.game(n = 30^2, p.or.m = graph.density(graph = gtree_2))
 plot(gst_1, vertex.size = 0.8, vertex.label = NA)
 ```
 
@@ -237,7 +237,7 @@ After obtaining the average scaled subgraph counts per density bin, the subgraph
 exp_subgraph_counts_1 <- netdis_expected_counts(
   graphlet_counts = subgraph_counts_1,
   density_breaks = binned_densities_gst_1$breaks,
-  density_binned_reference_counts = ref_binned_canonical_subgraph_counts,
+  density_binned_ref_counts = ref_binned_canonical_subgraph_counts,
   max_graphlet_size = max_subgraph_size,
   scale_fn = count_graphlet_tuples
 )
@@ -246,7 +246,7 @@ exp_subgraph_counts_1 <- netdis_expected_counts(
 exp_subgraph_counts_2 <- netdis_expected_counts(
   graphlet_counts = subgraph_counts_2,
   density_breaks = binned_densities_gst_1$breaks,
-  density_binned_reference_counts = ref_binned_canonical_subgraph_counts,
+  density_binned_ref_counts = ref_binned_canonical_subgraph_counts,
   max_graphlet_size = max_subgraph_size,
   scale_fn = count_graphlet_tuples
 )
@@ -284,8 +284,8 @@ Finally, the total centred counts can be used to obtain the Netdis statistic:
 ```{r}
 
 netdis_result <- netdis_uptok(
-  centred_graphlet_count_vector_1 = sum_subgraph_counts_1,
-  centred_graphlet_count_vector_2 = sum_subgraph_counts_2,
+  centred_graphlet_counts_1 = sum_subgraph_counts_1,
+  centred_graphlet_counts_2 = sum_subgraph_counts_2,
   max_graphlet_size = max_subgraph_size
 )
 
diff --git a/vignettes/NewNetdisCustomisations.Rmd b/vignettes/NewNetdisCustomisations.Rmd
index 249c37d3..7e69dd5f 100644
--- a/vignettes/NewNetdisCustomisations.Rmd
+++ b/vignettes/NewNetdisCustomisations.Rmd
@@ -43,20 +43,20 @@ Generation of regular grid, ring and tree-like networks with 400 nodes and 1600
 # Create lattice, Ring and Tree like networks of sizes 20^2 and 40^2.
 # Create networks
 set.seed(3171)
-gLat_1 <- graph.lattice(c(20, 20))
-gLat_2 <- graph.lattice(c(40, 40))
-gRing_1 <- make_ring(20^2)
-gRing_2 <- make_ring(40^2)
-gTree_1 <- igraph::as.undirected(make_tree(n = 20^2, children = 3))
-gTree_2 <- igraph::as.undirected(make_tree(n = 40^2, children = 3))
+glat_1 <- graph.lattice(c(20, 20))
+glat_2 <- graph.lattice(c(40, 40))
+gring_1 <- make_ring(20^2)
+gring_2 <- make_ring(40^2)
+gtree_1 <- igraph::as.undirected(make_tree(n = 20^2, children = 3))
+gtree_2 <- igraph::as.undirected(make_tree(n = 40^2, children = 3))
 
 glist <- list(
-  Lat_1 = gLat_1,
-  Lat_2 = gLat_2,
-  Ring_1 = gRing_1,
-  Ring_2 = gRing_1,
-  Tree_1 = gTree_1,
-  Tree_2 = gTree_2
+  Lat_1 = glat_1,
+  Lat_2 = glat_2,
+  Ring_1 = gring_1,
+  Ring_2 = gring_1,
+  Tree_1 = gtree_1,
+  Tree_2 = gtree_2
 )
 
 # Create a random graph to be used as a gold-standard
@@ -72,7 +72,7 @@ netdis_mat_gst <- netdis_many_to_many(
   graphs = glist,
   ref_graph = gst
 )
-netdis.plot(
+netdis_plot(
   netdislist = netdis_mat_gst, whatrow = 2,
   main = "Netdis with reference graph"
 )
@@ -86,13 +86,13 @@ netdis_mat_zero <- netdis_many_to_many(
   graphs = glist,
   ref_graph = 0
 )
-netdis.plot(netdislist = netdis_mat_zero, whatrow = 2, main = "Netdis Ew=0")
+netdis_plot(netdislist = netdis_mat_zero, whatrow = 2, main = "Netdis Ew=0")
 
 netdis_mat_5 <- netdis_many_to_many(
   graphs = glist,
   ref_graph = 5
 )
-netdis.plot(netdislist = netdis_mat_5, whatrow = 2, main = "Netdis Ew=5")
+netdis_plot(netdislist = netdis_mat_5, whatrow = 2, main = "Netdis Ew=5")
 ```
 
 
@@ -103,7 +103,7 @@ netdisgp_mat <- netdis_many_to_many(
   graphs = glist,
   ref_graph = NULL
 )
-netdis.plot(netdisgp_mat, whatrow = 2, main = "Netdis-GP")
+netdis_plot(netdisgp_mat, whatrow = 2, main = "Netdis-GP")
 ```
 
 ---------------------------
@@ -148,7 +148,7 @@ netdisgp_mat_mybin <- netdis_many_to_many(
   binning_fn = mybinning
 )
 
-netdis.plot(
+netdis_plot(
   netdislist = netdisgp_mat_mybin,
   whatrow = 2,
   main = "Netdis-GP with mybinning"
@@ -222,12 +222,12 @@ netdisgp_mat <- netdis_many_to_many(
   min_ego_edges = min_ego_edges,
   binning_fn = binning_fn
 )
-netdis.plot(
+netdis_plot(
   netdislist = netdisgp_mat,
   whatrow = 2,
   main = "Netdis-GP: Default parameter values"
 )
-netdis.plot(
+netdis_plot(
   netdislist = netdisgp_mat_custom,
   whatrow = 2,
   main = "Netdis-GP: illustrative parameter changes"
diff --git a/vignettes/PreComputedProps.Rmd b/vignettes/PreComputedProps.Rmd
index 8bc8a8d8..c258820d 100644
--- a/vignettes/PreComputedProps.Rmd
+++ b/vignettes/PreComputedProps.Rmd
@@ -41,19 +41,19 @@ NetEmd and Netdis use subgraph counts, however, NetEmd takes counts directly fro
 
 ```{r, netemd,fig.align='center',fig.dim=c(8,4)}
 # Create lattice networks
-gLat_1 <- igraph::graph.lattice(c(20, 20))
-gLat_2 <- igraph::graph.lattice(c(44, 44))
+glat_1 <- igraph::graph.lattice(c(20, 20))
+glat_2 <- igraph::graph.lattice(c(44, 44))
 
 par(mfrow = c(1, 2))
-plot(gLat_1, vertex.size = 4, vertex.label = NA)
-plot(gLat_2, vertex.size = 4, vertex.label = NA)
+plot(glat_1, vertex.size = 4, vertex.label = NA)
+plot(glat_2, vertex.size = 4, vertex.label = NA)
 ```
 
 The simple computation of NetEmd without pre-computed features:
 ```{r}
 netemd_one_to_one(
-  graph_1 = gLat_1,
-  graph_2 = gLat_2,
+  graph_1 = glat_1,
+  graph_2 = glat_2,
   feature_type = "orbit",
   smoothing_window_width = 1
 )
@@ -62,8 +62,8 @@ netemd_one_to_one(
 
 ### Providing a matrix of network features
 ```{r}
-counts_1 <- count_orbits_per_node(graph = gLat_1, max_graphlet_size = 5)
-counts_2 <- count_orbits_per_node(graph = gLat_2, max_graphlet_size = 5)
+counts_1 <- count_orbits_per_node(graph = glat_1, max_graphlet_size = 5)
+counts_2 <- count_orbits_per_node(graph = glat_2, max_graphlet_size = 5)
 head(counts_1[, 1:4])
 
 netemd_one_to_one(
@@ -87,33 +87,33 @@ ___
 Computation of the Laplacian and Normalized Laplacian:
 ```{r, netemdEigen}
 # Networks
-gLat_1 <- graph.lattice(c(20, 20))
-gLat_2 <- graph.lattice(c(44, 44))
+glat_1 <- graph.lattice(c(20, 20))
+glat_2 <- graph.lattice(c(44, 44))
 
 # Laplacian
-Lapg_1 <- igraph::laplacian_matrix(
-  graph = gLat_1, normalized = FALSE, sparse = FALSE
+lapg_1 <- igraph::laplacian_matrix(
+  graph = glat_1, normalized = FALSE, sparse = FALSE
 )
-Lapg_2 <- igraph::laplacian_matrix(
-  graph = gLat_2, normalized = FALSE, sparse = FALSE
+lapg_2 <- igraph::laplacian_matrix(
+  graph = glat_2, normalized = FALSE, sparse = FALSE
 )
 
 # Normalized Laplacian
-NLapg_1 <- igraph::laplacian_matrix(
-  graph = gLat_1, normalized = TRUE, sparse = FALSE
+nlapg_1 <- igraph::laplacian_matrix(
+  graph = glat_1, normalized = TRUE, sparse = FALSE
 )
-NLapg_2 <- igraph::laplacian_matrix(
-  graph = gLat_2, normalized = TRUE, sparse = FALSE
+nlapg_2 <- igraph::laplacian_matrix(
+  graph = glat_2, normalized = TRUE, sparse = FALSE
 )
 
 
 # Providing a matrix of network features (e.g. Spectra). (This may take a
 # couple of minutes).
 spec_1 <- cbind(
-  L.Spectra = eigen(Lapg_1)$values, NL.Spectra = eigen(NLapg_1)$values
+  L.Spectra = eigen(lapg_1)$values, NL.Spectra = eigen(nlapg_1)$values
 )
 spec_2 <- cbind(
-  L.Spectra = eigen(Lapg_2)$values, NL.Spectra = eigen(NLapg_2)$values
+  L.Spectra = eigen(lapg_2)$values, NL.Spectra = eigen(nlapg_2)$values
 )
 
 head(spec_1)
diff --git a/vignettes/default_pairwise_usage.Rmd b/vignettes/default_pairwise_usage.Rmd
index 67683313..c1bf5206 100644
--- a/vignettes/default_pairwise_usage.Rmd
+++ b/vignettes/default_pairwise_usage.Rmd
@@ -131,27 +131,27 @@ netemd_one_to_one(
 ## Comparing two graphs with NetEmd via their Laplacian spectrum
 ```{r, netemdEigen,fig.align='center'}
 # Laplacian
-Lapg_1 <- igraph::laplacian_matrix(
+lapg_1 <- igraph::laplacian_matrix(
   graph = graph_1, normalized = FALSE, sparse = FALSE
 )
-Lapg_2 <- igraph::laplacian_matrix(
+lapg_2 <- igraph::laplacian_matrix(
   graph = graph_2, normalized = FALSE, sparse = FALSE
 )
 
 # Normalized Laplacian
-NLapg_1 <- igraph::laplacian_matrix(
+nlapg_1 <- igraph::laplacian_matrix(
   graph = graph_1, normalized = TRUE, sparse = FALSE
 )
-NLapg_2 <- igraph::laplacian_matrix(
+nlapg_2 <- igraph::laplacian_matrix(
   graph = graph_2, normalized = TRUE, sparse = FALSE
 )
 
 # Spectra (this may take a couple of minutes).
 props_1 <- cbind(
-  L.Spectra = eigen(Lapg_1)$values, NL.Spectra = eigen(NLapg_1)$values
+  L.Spectra = eigen(lapg_1)$values, NL.Spectra = eigen(nlapg_1)$values
 )
 props_2 <- cbind(
-  L.Spectra = eigen(Lapg_2)$values, NL.Spectra = eigen(NLapg_2)$values
+  L.Spectra = eigen(lapg_2)$values, NL.Spectra = eigen(nlapg_2)$values
 )
 
 head(props_1, n = 3)
diff --git a/vignettes/dendrogram_example_net_dis.Rmd b/vignettes/dendrogram_example_net_dis.Rmd
index 265f97b7..b091e175 100644
--- a/vignettes/dendrogram_example_net_dis.Rmd
+++ b/vignettes/dendrogram_example_net_dis.Rmd
@@ -154,8 +154,8 @@ netemd_mat_smooth <- cross_comp_to_matrix(
 par(mfrow = c(1, 2))
 cex <- 1
 
-graphNetEMDdists_smooth <- as.dist(netemd_mat_smooth)
-plot(phangorn::upgma(as.dist(graphNetEMDdists_smooth), method = "average"),
+graph_netemd_dists_smooth <- as.dist(netemd_mat_smooth)
+plot(phangorn::upgma(as.dist(graph_netemd_dists_smooth), method = "average"),
   use.edge.length = FALSE,
   edge.width = cex * 2,
   main = "NetEmd: with smoothing",
@@ -165,8 +165,8 @@ plot(phangorn::upgma(as.dist(graphNetEMDdists_smooth), method = "average"),
 )
 
 
-graphNetEMDdists <- as.dist(netemd_mat)
-plot(phangorn::upgma(as.dist(graphNetEMDdists), method = "average"),
+graph_netemd_dists <- as.dist(netemd_mat)
+plot(phangorn::upgma(as.dist(graph_netemd_dists), method = "average"),
   use.edge.length = FALSE,
   edge.width = cex * 2,
   main = "NetEmd: without smoothing",

From 1a98f23b5be7848d041473bec0d2e748ac0a0eb6 Mon Sep 17 00:00:00 2001
From: jack89roberts <jroberts@turing.ac.uk>
Date: Tue, 7 Jun 2022 17:25:05 +0100
Subject: [PATCH 70/84] revert breaking lint changes

seq_len
incorrectly find/replaced function names
---
 .lintr                                 |  2 +-
 R/measures_net_emd.R                   |  2 +-
 R/net_emd_speed_benchmark.R            |  2 +-
 R/orca_interface.R                     |  2 +-
 tests/testthat/test_emd.R              | 44 +++++++++++++-------------
 tests/testthat/test_measures_net_dis.R | 20 ++++++------
 tests/testthat/test_measures_net_emd.R |  8 ++---
 vignettes/ManyToMany.Rmd               |  2 +-
 vignettes/NetEmdTimeOrdering.Rmd       |  2 +-
 9 files changed, 42 insertions(+), 42 deletions(-)

diff --git a/.lintr b/.lintr
index 838059df..2264d8c4 100644
--- a/.lintr
+++ b/.lintr
@@ -1,2 +1,2 @@
 error_on_lint: TRUE
-linters: with_defaults(open_curly_linter = NULL, closed_curly_linter = NULL, spaces_left_parentheses_linter=NULL)
+linters: with_defaults(open_curly_linter = NULL, closed_curly_linter = NULL, spaces_left_parentheses_linter = NULL, seq_linter = NULL)
diff --git a/R/measures_net_emd.R b/R/measures_net_emd.R
index cfd6fa93..fbe74b2d 100755
--- a/R/measures_net_emd.R
+++ b/R/measures_net_emd.R
@@ -367,7 +367,7 @@ netemd_many_to_many <- function(graphs = NULL,
   # Check arguments 4
   # cross_comparison_spec was coded to require names!
   if (is.null(names(dhists))) {
-    names(dhists) <- paste("Net", seq_len(dhists), sep = "")
+    names(dhists) <- paste("Net", 1:length(dhists), sep = "")
   }
   ## ------------------------------------------------------------------------
   comp_spec <- cross_comparison_spec(dhists)
diff --git a/R/net_emd_speed_benchmark.R b/R/net_emd_speed_benchmark.R
index da3d6f8c..afd495a2 100644
--- a/R/net_emd_speed_benchmark.R
+++ b/R/net_emd_speed_benchmark.R
@@ -13,7 +13,7 @@ netemd_speed_test <- function() {
   lab1 <- c()
   gdd_build_time <- c()
   netemd_time <- c()
-  for (i in seq_len(graphs))
+  for (i in 1:length(graphs))
   {
     for (j in 1:(i))
     {
diff --git a/R/orca_interface.R b/R/orca_interface.R
index 5cfb5b5d..10065126 100644
--- a/R/orca_interface.R
+++ b/R/orca_interface.R
@@ -690,7 +690,7 @@ cross_comparison_spec <- function(named_list, how = "many-to-many") {
       2:length(named_list)
     )
   } else {
-    indexes <- as.data.frame(t(utils::combn(seq_len(named_list), 2)))
+    indexes <- as.data.frame(t(utils::combn(1:length(named_list), 2)))
   }
 
   names <- as.data.frame(cbind(
diff --git a/tests/testthat/test_emd.R b/tests/testthat/test_emd.R
index e68f2838..3c782aec 100644
--- a/tests/testthat/test_emd.R
+++ b/tests/testthat/test_emd.R
@@ -73,8 +73,8 @@ test_that("EMD methods return numBins/2 when offsetting a symmetric discrete
   # Triangle(4, even), shifting by changing masses
   bin_masses1 <- c(0, 1, 2, 3, 4, 4, 3, 2, 1, 0)
   bin_masses2 <- c(0, 0, 1, 2, 3, 4, 4, 3, 2, 1)
-  bin_centres1 <- seq_len(bin_masses1)
-  bin_centres2 <- seq_len(bin_masses2)
+  bin_centres1 <- 1:length(bin_masses1)
+  bin_centres2 <- 1:length(bin_masses2)
   histogram1 <- dhist(masses = bin_masses1, locations = bin_centres1)
   histogram2 <- dhist(masses = bin_masses2, locations = bin_centres2)
   num_nonzero_bins <- sum(bin_masses1 > 0)
@@ -90,8 +90,8 @@ test_that("EMD methods return numBins/2 when offsetting a symmetric discrete
   # Triangle(4, even), shifting by changing centres
   bin_masses1 <- c(0, 1, 2, 3, 4, 4, 3, 2, 1, 0)
   bin_masses2 <- c(0, 1, 2, 3, 4, 4, 3, 2, 1, 0)
-  bin_centres1 <- seq_len(bin_masses1)
-  bin_centres2 <- seq_len(bin_masses2) + 1
+  bin_centres1 <- 1:length(bin_masses1)
+  bin_centres2 <- 1:length(bin_masses2) + 1
   histogram1 <- dhist(masses = bin_masses1, locations = bin_centres1)
   histogram2 <- dhist(masses = bin_masses2, locations = bin_centres2)
   num_nonzero_bins <- sum(bin_masses1 > 0)
@@ -106,8 +106,8 @@ test_that("EMD methods return numBins/2 when offsetting a symmetric discrete
   # Triangle(5, odd), shifting by changing masses
   bin_masses1 <- c(0, 1, 2, 3, 4, 5, 4, 3, 2, 1, 0)
   bin_masses2 <- c(0, 0, 1, 2, 3, 4, 5, 4, 3, 2, 1)
-  bin_centres1 <- seq_len(bin_masses1)
-  bin_centres2 <- seq_len(bin_masses2)
+  bin_centres1 <- 1:length(bin_masses1)
+  bin_centres2 <- 1:length(bin_masses2)
   histogram1 <- dhist(masses = bin_masses1, locations = bin_centres1)
   histogram2 <- dhist(masses = bin_masses2, locations = bin_centres2)
   expected <- cost_fn(sum(bin_masses1 > 0))
@@ -121,8 +121,8 @@ test_that("EMD methods return numBins/2 when offsetting a symmetric discrete
   # Triangle(5, odd), shifting by changing masses
   bin_masses1 <- c(0, 1, 2, 3, 4, 5, 4, 3, 2, 1, 0)
   bin_masses2 <- c(0, 1, 2, 3, 4, 5, 4, 3, 2, 1, 0)
-  bin_centres1 <- seq_len(bin_masses1)
-  bin_centres2 <- seq_len(bin_masses2) + 1
+  bin_centres1 <- 1:length(bin_masses1)
+  bin_centres2 <- 1:length(bin_masses2) + 1
   histogram1 <- dhist(masses = bin_masses1, locations = bin_centres1)
   histogram2 <- dhist(masses = bin_masses2, locations = bin_centres2)
   expected <- cost_fn(sum(bin_masses1 > 0))
@@ -136,8 +136,8 @@ test_that("EMD methods return numBins/2 when offsetting a symmetric discrete
   # Triangle(5, even), shifting by changing masses
   bin_masses1 <- c(0, 1, 2, 3, 4, 5, 5, 4, 3, 2, 1, 0)
   bin_masses2 <- c(0, 0, 1, 2, 3, 4, 5, 5, 4, 3, 2, 1)
-  bin_centres1 <- seq_len(bin_masses1)
-  bin_centres2 <- seq_len(bin_masses2)
+  bin_centres1 <- 1:length(bin_masses1)
+  bin_centres2 <- 1:length(bin_masses2)
   histogram1 <- dhist(masses = bin_masses1, locations = bin_centres1)
   histogram2 <- dhist(masses = bin_masses2, locations = bin_centres2)
   expected <- cost_fn(sum(bin_masses1 > 0))
@@ -151,8 +151,8 @@ test_that("EMD methods return numBins/2 when offsetting a symmetric discrete
   # Triangle(5, even), shifting by changing centres
   bin_masses1 <- c(0, 1, 2, 3, 4, 5, 5, 4, 3, 2, 1, 0)
   bin_masses2 <- c(0, 1, 2, 3, 4, 5, 5, 4, 3, 2, 1, 0)
-  bin_centres1 <- seq_len(bin_masses1)
-  bin_centres2 <- seq_len(bin_masses2) + 1
+  bin_centres1 <- 1:length(bin_masses1)
+  bin_centres2 <- 1:length(bin_masses2) + 1
   histogram1 <- dhist(masses = bin_masses1, locations = bin_centres1)
   histogram2 <- dhist(masses = bin_masses2, locations = bin_centres2)
   expected <- cost_fn(sum(bin_masses1 > 0))
@@ -166,8 +166,8 @@ test_that("EMD methods return numBins/2 when offsetting a symmetric discrete
   # Triangle(6, odd), shifting by changing masses
   bin_masses1 <- c(0, 1, 2, 3, 4, 5, 6, 5, 4, 3, 2, 1, 0)
   bin_masses2 <- c(0, 0, 1, 2, 3, 4, 5, 6, 5, 4, 3, 2, 1)
-  bin_centres1 <- seq_len(bin_masses1)
-  bin_centres2 <- seq_len(bin_masses2)
+  bin_centres1 <- 1:length(bin_masses1)
+  bin_centres2 <- 1:length(bin_masses2)
   histogram1 <- dhist(masses = bin_masses1, locations = bin_centres1)
   histogram2 <- dhist(masses = bin_masses2, locations = bin_centres2)
   expected <- cost_fn(sum(bin_masses1 > 0))
@@ -181,8 +181,8 @@ test_that("EMD methods return numBins/2 when offsetting a symmetric discrete
   # Triangle(6, odd), shifting by changing centres
   bin_masses1 <- c(0, 1, 2, 3, 4, 5, 6, 5, 4, 3, 2, 1, 0)
   bin_masses2 <- c(0, 1, 2, 3, 4, 5, 6, 5, 4, 3, 2, 1, 0)
-  bin_centres1 <- seq_len(bin_masses1)
-  bin_centres2 <- seq_len(bin_masses2) + 1
+  bin_centres1 <- 1:length(bin_masses1)
+  bin_centres2 <- 1:length(bin_masses2) + 1
   histogram1 <- dhist(masses = bin_masses1, locations = bin_centres1)
   histogram2 <- dhist(masses = bin_masses2, locations = bin_centres2)
   expected <- cost_fn(sum(bin_masses1 > 0))
@@ -249,13 +249,13 @@ test_that("EMD methods return same result when order of densely specified bins
           is changed", {
   bin_masses1 <- c(1, 1, 1, 1, 0, 0, 0, 0, 0)
   bin_masses2 <- c(0, 0, 0, 0, 0, 1, 1, 1, 1)
-  bin_centres1 <- seq_len(bin_masses1)
-  bin_centres2 <- seq_len(bin_masses2)
+  bin_centres1 <- 1:length(bin_masses1)
+  bin_centres2 <- 1:length(bin_masses2)
   histogram1 <- dhist(masses = bin_masses1, locations = bin_centres1)
   histogram2 <- dhist(masses = bin_masses2, locations = bin_centres2)
 
-  permuted_indexes1 <- sample(seq_len(bin_masses1), replace = FALSE)
-  permuted_indexes2 <- sample(seq_len(bin_masses2), replace = FALSE)
+  permuted_indexes1 <- sample(1:length(bin_masses1), replace = FALSE)
+  permuted_indexes2 <- sample(1:length(bin_masses2), replace = FALSE)
 
   permuted_bin_masses1 <- bin_masses1[permuted_indexes1]
   permuted_bin_centres1 <- bin_centres1[permuted_indexes1]
@@ -296,8 +296,8 @@ test_that("EMD methods return same result when order of sparsely specified bins
   histogram1 <- dhist(masses = bin_masses1, locations = bin_centres1)
   histogram2 <- dhist(masses = bin_masses2, locations = bin_centres2)
 
-  permuted_indexes1 <- sample(seq_len(bin_masses1), replace = FALSE)
-  permuted_indexes2 <- sample(seq_len(bin_masses2), replace = FALSE)
+  permuted_indexes1 <- sample(1:length(bin_masses1), replace = FALSE)
+  permuted_indexes2 <- sample(1:length(bin_masses2), replace = FALSE)
 
   permuted_bin_masses1 <- bin_masses1[permuted_indexes1]
   permuted_bin_centres1 <- bin_centres1[permuted_indexes1]
diff --git a/tests/testthat/test_measures_net_dis.R b/tests/testthat/test_measures_net_dis.R
index 90d4bc1f..3c4fb628 100644
--- a/tests/testthat/test_measures_net_dis.R
+++ b/tests/testthat/test_measures_net_dis.R
@@ -892,7 +892,7 @@ test_that(
 )
 
 context("Measures Netdis: Expected graphlet counts")
-test_that("netdis_exp_counts_ego works for graphlets up to 4 nodes", {
+test_that("netdis_expected_counts_ego works for graphlets up to 4 nodes", {
   # Helper function to generate graphs with known density and number of nodes
   rand_graph <- function(num_nodes, density) {
     max_edges <- choose(num_nodes, 2)
@@ -940,7 +940,7 @@ test_that("netdis_exp_counts_ego works for graphlets up to 4 nodes", {
   exp_exp_graphlet_counts <-
     purrr::map(density_indexes, exp_exp_graphlet_counts_fn)
   act_exp_graphlet_counts <-
-    purrr::map(graphlet_counts, netdis_exp_counts_ego,
+    purrr::map(graphlet_counts, netdis_expected_counts_ego,
       max_graphlet_size = max_graphlet_size,
       density_breaks = density_breaks,
       density_binned_ref_counts = scaled_reference_counts,
@@ -951,7 +951,7 @@ test_that("netdis_exp_counts_ego works for graphlets up to 4 nodes", {
   # NOTE: v2.0.0 of testthat library made a breaking change that means using
   # map, mapply etc can cause failures under certain conditions
   # See: https://github.com/r-lib/testthat/releases/tag/v2.0.0
-  for (i in seq_len(act_exp_graphlet_counts)) {
+  for (i in 1:length(act_exp_graphlet_counts)) {
     expect_equal(
       act_exp_graphlet_counts[i],
       exp_exp_graphlet_counts[i]
@@ -972,7 +972,7 @@ test_that("netdis_exp_counts_ego works for graphlets up to 4 nodes", {
       exp_exp_graphlet_counts_fn
     )
   act_exp_graphlet_counts <-
-    purrr::map(graphlet_counts, netdis_exp_counts_ego,
+    purrr::map(graphlet_counts, netdis_expected_counts_ego,
       max_graphlet_size = max_graphlet_size,
       density_breaks = density_breaks,
       density_binned_ref_counts = scaled_reference_counts,
@@ -982,7 +982,7 @@ test_that("netdis_exp_counts_ego works for graphlets up to 4 nodes", {
   # NOTE: v2.0.0 of testthat library made a breaking change that means using
   # map, mapply etc can cause failures under certain conditions
   # See: https://github.com/r-lib/testthat/releases/tag/v2.0.0
-  for (i in seq_len(act_exp_graphlet_counts)) {
+  for (i in 1:length(act_exp_graphlet_counts)) {
     expect_equal(
       act_exp_graphlet_counts[i],
       exp_exp_graphlet_counts[i]
@@ -990,7 +990,7 @@ test_that("netdis_exp_counts_ego works for graphlets up to 4 nodes", {
   }
 })
 
-test_that("netdis_exp_counts works for graphlets up to 4 nodes", {
+test_that("netdis_expected_counts works for graphlets up to 4 nodes", {
   # Helper function to generate graphs with known density and number of nodes
   # Set up a small sample network with at least one ego-network that contains
   # at least one of each graphlets
@@ -1099,7 +1099,7 @@ test_that("netdis_exp_counts works for graphlets up to 4 nodes", {
 
   # Calculate actual output of function under test
   act_exp_graphlet_counts_ego_o1 <-
-    netdis_exp_counts(
+    netdis_expected_counts(
       graphlet_counts_ego_o1,
       breaks,
       scaled_reference_counts,
@@ -1107,7 +1107,7 @@ test_that("netdis_exp_counts works for graphlets up to 4 nodes", {
       scale_fn = count_graphlet_tuples
     )
   act_exp_graphlet_counts_ego_o2 <-
-    netdis_exp_counts(
+    netdis_expected_counts(
       graphlet_counts_ego_o2,
       breaks,
       scaled_reference_counts,
@@ -1155,7 +1155,7 @@ test_that("netdis_exp_counts works for graphlets up to 4 nodes", {
 
   # Calculate actual output of function under test
   act_exp_graphlet_counts_ego_o1 <-
-    netdis_exp_counts(
+    netdis_expected_counts(
       graphlet_counts_ego_o1,
       breaks,
       scaled_reference_counts,
@@ -1163,7 +1163,7 @@ test_that("netdis_exp_counts works for graphlets up to 4 nodes", {
       scale_fn = NULL
     )
   act_exp_graphlet_counts_ego_o2 <-
-    netdis_exp_counts(
+    netdis_expected_counts(
       graphlet_counts_ego_o2,
       breaks,
       scaled_reference_counts,
diff --git a/tests/testthat/test_measures_net_emd.R b/tests/testthat/test_measures_net_emd.R
index b0235cf1..6653f796 100644
--- a/tests/testthat/test_measures_net_emd.R
+++ b/tests/testthat/test_measures_net_emd.R
@@ -302,7 +302,7 @@ context("Measures NetEMD: Virus PPI (EMD)")
 test_that("emd return 0 when comparing graphlet orbit degree distributions of
           virus PPI graphs to themselves", {
   # Load viurs PPI network data in ORCA-compatible edge list format
-  data_indexes <- seq_len(virusppi)
+  data_indexes <- 1:length(virusppi)
   data_names <- attr(virusppi, "name")
 
   # Calculate graphlet-based degree distributions up to graphlet order 4
@@ -320,7 +320,7 @@ context("Measures NetEMD: Virus PPI (NetEMD)")
 test_that("net_emd return 0 when comparing graphlet orbit degree distributions
           of virus PPI graphs to themselves", {
   # Load virus PPI network data in ORCA-compatible edge list format
-  data_indexes <- seq_len(virusppi)
+  data_indexes <- 1:length(virusppi)
   data_names <- attr(virusppi, "name")
 
   # Calculate graphlet-based degree distributions up to graphlet order 4
@@ -353,7 +353,7 @@ test_that("emd return 0 when comparing graphlet orbit degree distributions of
     system.file(package = "netdist", "extdata", "random"),
     format = "ncol", pattern = "*"
   )
-  data_indexes <- seq_len(random_graphs)
+  data_indexes <- 1:length(random_graphs)
   data_names <- attr(random_graphs, "name")
 
   # Calculate graphlet-based degree distributions up to graphlet order 4
@@ -375,7 +375,7 @@ test_that("net_emd return 0 when comparing graphlet orbit degree distributions
     system.file(package = "netdist", "extdata", "random"),
     format = "ncol", pattern = "*"
   )
-  data_indexes <- seq_len(random_graphs)
+  data_indexes <- 1:length(random_graphs)
   data_names <- attr(random_graphs, "name")
 
   # Calculate graphlet-based degree distributions up to graphlet order 4
diff --git a/vignettes/ManyToMany.Rmd b/vignettes/ManyToMany.Rmd
index 662611da..ffc7bf06 100644
--- a/vignettes/ManyToMany.Rmd
+++ b/vignettes/ManyToMany.Rmd
@@ -114,7 +114,7 @@ Pre-compute the Laplacian and normalized Laplacian for each graph considered:
 spect <- list()
 
 # This step may take several minutes.
-for (i in seq_len(glist)) {
+for (i in 1:length(glist)) {
   lapg <- igraph::laplacian_matrix(
     graph = glist[[i]], normalized = FALSE, sparse = FALSE
   )
diff --git a/vignettes/NetEmdTimeOrdering.Rmd b/vignettes/NetEmdTimeOrdering.Rmd
index 2f048f0d..3f4ead81 100644
--- a/vignettes/NetEmdTimeOrdering.Rmd
+++ b/vignettes/NetEmdTimeOrdering.Rmd
@@ -65,7 +65,7 @@ World trade networks consist of relatively dense networks, thus leading to longe
 # counts has been already computed.
 if (FALSE) {
   counts <- list()
-  for (i in seq_len(wtnets)) {
+  for (i in 1:length(wtnets)) {
     counts[[names(wtnets)[i]]] <- count_orbits_per_node(
       graph = wtnets[[i]], max_graphlet_size = 5
     )

From 2e9348f56e448e82d1fb57e68d90c9b32b8232a8 Mon Sep 17 00:00:00 2001
From: jack89roberts <jroberts@turing.ac.uk>
Date: Tue, 7 Jun 2022 20:41:41 +0100
Subject: [PATCH 71/84] ignore remaining lint errors where appropriate

---
 .lintr                                 |  7 +++++-
 R/RcppExports.R                        | 11 ++++++---
 R/dhist.R                              |  5 ++--
 R/measures_net_dis.R                   | 20 ++++++++--------
 R/measures_net_emd.R                   |  4 ++--
 R/net_emd_speed_benchmark.R            |  4 ++--
 R/orca_interface.R                     | 11 +++------
 man/emd_fast_no_smoothing.Rd           |  4 ++--
 tests/testthat/test_graph_binning.R    | 10 --------
 tests/testthat/test_measures_net_dis.R | 33 +++++++++++---------------
 tests/testthat/test_measures_net_emd.R | 10 ++++----
 tests/testthat/test_orca_interface.R   |  8 +++----
 12 files changed, 59 insertions(+), 68 deletions(-)

diff --git a/.lintr b/.lintr
index 2264d8c4..2201ebb3 100644
--- a/.lintr
+++ b/.lintr
@@ -1,2 +1,7 @@
 error_on_lint: TRUE
-linters: with_defaults(open_curly_linter = NULL, closed_curly_linter = NULL, spaces_left_parentheses_linter = NULL, seq_linter = NULL)
+linters: with_defaults(
+    open_curly_linter = NULL,
+    closed_curly_linter = NULL,
+    spaces_left_parentheses_linter = NULL,
+    seq_linter = NULL
+  )
diff --git a/R/RcppExports.R b/R/RcppExports.R
index b7d0c2dc..e0e1b09a 100644
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -12,7 +12,7 @@ counts_from_observations <- function(features) {
 }
 
 #' @title
-#' Compute Earth Mover's Distance (EMD) between two Empirical Cumulative 
+#' Compute Earth Mover's Distance (EMD) between two Empirical Cumulative
 #' Density Functions (ECDFs)
 #'
 #' @param locations1 Locations for ECDF 1
@@ -22,6 +22,11 @@ counts_from_observations <- function(features) {
 #'
 #' @export
 emd_fast_no_smoothing <- function(locations1, values1, locations2, values2) {
-    .Call(`_netdist_emd_fast_no_smoothing`, locations1, values1, locations2, values2)
+    .Call(
+        `_netdist_emd_fast_no_smoothing`,
+        locations1,
+        values1,
+        locations2,
+        values2
+    )
 }
-
diff --git a/R/dhist.R b/R/dhist.R
index 641818d7..9ffb97fe 100644
--- a/R/dhist.R
+++ b/R/dhist.R
@@ -116,7 +116,7 @@ as_unsmoothed_dhist <- function(dhist) {
 #' superficial fast check limited to checking the object's class attribute
 #' is set to \code{dhist} (default = \code{TRUE})
 #' @export
-is_dhist <- function(x, fast_check = TRUE) {
+is_dhist <- function(x, fast_check = TRUE) { # nolint: cyclocomp_linter.
   # Quick check that relies on user not to construct variables with "dhist"
   # class that do not have the required elements
   has_class_attr <- (class(x) == "dhist")
@@ -386,7 +386,8 @@ segment_area_trapezium <- function(x_diff, y_diff_lower, y_diff_upper) {
   height_trapezium <- abs(x_diff)
   base_trapezium <- abs(y_diff_lower)
   top_trapezium <- abs(y_diff_upper)
-  segment_area <- 0.5 * height_trapezium * (base_trapezium + top_trapezium)
+
+  0.5 * height_trapezium * (base_trapezium + top_trapezium)
 }
 
 segment_area_bowtie <- function(x_diff, y_diff_lower, y_diff_upper) {
diff --git a/R/measures_net_dis.R b/R/measures_net_dis.R
index 5e3a67b2..00bb4232 100644
--- a/R/measures_net_dis.R
+++ b/R/measures_net_dis.R
@@ -511,7 +511,7 @@ netdis_one_to_many <- function(graph_1 = NULL,
 #' up to and including max_graphlet_size.
 #'
 #' @export
-netdis_many_to_many <- function(graphs = NULL,
+netdis_many_to_many <- function(graphs = NULL, # nolint: cyclocomp_linter.
                                 ref_graph = NULL,
                                 comparisons = "many-to-many",
                                 max_graphlet_size = 4,
@@ -1018,7 +1018,7 @@ netdis_expected_counts_ego <- function(graphlet_counts,
 #' (default \code{agg_fn = mean}).
 #'
 #' @export
-mean_density_binned_graphlet_counts <- function(graphlet_counts,
+mean_density_binned_graphlet_counts <- function(graphlet_counts, # nolint: object_length_linter.
                                                 density_interval_indexes,
                                                 agg_fn = mean) {
   # The ego network graphlet counts are an E x G matrix with rows (E)
@@ -1026,22 +1026,22 @@ mean_density_binned_graphlet_counts <- function(graphlet_counts,
   # to calculate the mean count for each graphlet / density bin combination,
   # so we will use tapply to average counts for each graphlet across density
   # bins, using apply to map this operation over graphlets
-  mean_density_binned_graphlet_counts <-
+  mean_density_binned_counts <-
     apply(graphlet_counts, MARGIN = 2, function(gc) {
       tapply(gc, INDEX = density_interval_indexes, FUN = agg_fn)
     })
 
   # if only 1 bin (i.e. no binning) will be left with a 1D list.
   # convert it into a 2D list.
-  if (is.null(dim(mean_density_binned_graphlet_counts))) {
-    dim(mean_density_binned_graphlet_counts) <-
-      c(1, length(mean_density_binned_graphlet_counts))
+  if (is.null(dim(mean_density_binned_counts))) {
+    dim(mean_density_binned_counts) <-
+      c(1, length(mean_density_binned_counts))
 
-    colnames(mean_density_binned_graphlet_counts) <-
+    colnames(mean_density_binned_counts) <-
       colnames(graphlet_counts)
   }
 
-  mean_density_binned_graphlet_counts
+  mean_density_binned_counts
 }
 
 #' For case where don't want to use binning, return a single bin which covers
@@ -1116,7 +1116,7 @@ exp_counts_bin_gp <- function(bin_idx, graphlet_counts,
   mean_sub_counts <- sweep(counts, 2, means)
 
   # variance in graphlet counts across ego networks in this density bin
-  Vd_sq <- colSums(mean_sub_counts^2) / (nrow(mean_sub_counts) - 1)
+  Vd_sq <- colSums(mean_sub_counts^2) / (nrow(mean_sub_counts) - 1) # nolint: object_name_linter.
 
   # Dealing with zero variance HERE
   ind_zerovar <- (Vd_sq < .00000001)
@@ -1282,7 +1282,7 @@ scale_graphlet_counts_ego <- function(graphlet_counts,
 #' @export
 count_graphlet_tuples <- function(graph_graphlet_counts, max_graphlet_size) {
   # extract node counts from graph_graphlet_counts
-  N <- graph_graphlet_counts["N"]
+  N <- graph_graphlet_counts["N"] # nolint: object_name_linter.
 
   graphlet_key <- graphlet_key(max_graphlet_size)
   graphlet_node_counts <- graphlet_key$node_count
diff --git a/R/measures_net_emd.R b/R/measures_net_emd.R
index fbe74b2d..bb40a999 100755
--- a/R/measures_net_emd.R
+++ b/R/measures_net_emd.R
@@ -284,7 +284,7 @@ netemd_many_to_many <- function(graphs = NULL,
                                 method = "optimise",
                                 smoothing_window_width = 0,
                                 return_details = FALSE,
-                                mc.cores = getOption("mc.cores", 2L),
+                                mc.cores = getOption("mc.cores", 2L), # nolint: object_name_linter.
                                 feature_type = "orbit",
                                 max_graphlet_size = 5,
                                 ego_neighbourhood_size = 0) {
@@ -304,7 +304,7 @@ netemd_many_to_many <- function(graphs = NULL,
   if (.Platform$OS.type != "unix") {
     # Force cores to 1 if system is not unix-like as it will not support
     # forking
-    mc.cores <- 1
+    mc.cores <- 1 # nolint: object_name_linter.
   }
   ## ------------------------------------------------------------------------
   # Check arguments 1
diff --git a/R/net_emd_speed_benchmark.R b/R/net_emd_speed_benchmark.R
index afd495a2..cbe539cd 100644
--- a/R/net_emd_speed_benchmark.R
+++ b/R/net_emd_speed_benchmark.R
@@ -25,13 +25,13 @@ netemd_speed_test <- function() {
       gdd1 <- gdd(g1)
       gdd2 <- gdd(g2)
       netemd_start <- Sys.time()
-      net_emd(gdd1, gdd2)
+      netemd_single_pair(gdd1, gdd2)
       end_time <- Sys.time()
       gdd_build_time <- append(
         gdd_build_time,
         as.double(netemd_start - fulltime_start)
       )
-      net_emd_time <- append(netemd_time, as.double(end_time - netemd_start))
+      netemd_time <- append(netemd_time, as.double(end_time - netemd_start))
     }
   }
   list(gddBuildTime = gdd_build_time, netEMDtime = netemd_time)
diff --git a/R/orca_interface.R b/R/orca_interface.R
index 10065126..82e0555c 100644
--- a/R/orca_interface.R
+++ b/R/orca_interface.R
@@ -226,11 +226,6 @@ graph_features_to_histograms <- function(features_matrix) {
   apply(features_matrix, 2, dhist_from_obs)
 }
 
-
-graph_features_to_histogramsSLOW <- function(features_matrix) {
-  apply(features_matrix, 2, dhist_from_obsSLOW)
-}
-
 #' Graphlet-based degree distributions (GDDs)
 #'
 #' Short-cut function to create graphlet-based degree distributions from
@@ -352,7 +347,7 @@ count_graphlets_for_graph <- function(graph, max_graphlet_size) {
   total_counts <- total_counts / nodes_per_graphlet
 
   # add overall graph node count to total_counts
-  N <- igraph::vcount(graph)
+  N <- igraph::vcount(graph) # nolint: object_name_linter.
   total_counts <- c(N = N, total_counts)
   total_counts
 }
@@ -642,7 +637,7 @@ gdd_for_all_graphs <- function(source_dir,
                                feature_type = "orbit",
                                max_graphlet_size = 4,
                                ego_neighbourhood_size = 0,
-                               mc.cores = getOption("mc.cores", 2L)) {
+                               mc.cores = getOption("mc.cores", 2L)) { # nolint: object_name_linter.
   # Create function to read graph from file and generate GDD
   graphs <- read_simple_graphs(
     source_dir = source_dir, format = format, pattern = pattern
@@ -655,7 +650,7 @@ gdd_for_all_graphs <- function(source_dir,
   if (.Platform$OS.type != "unix") {
     # Force cores to 1 if system is not unix-like as it will not support
     # forking
-    mc.cores <- 1
+    mc.cores <- 1 # nolint: object_name_linter.
   }
   parallel::mcmapply(gdd, graphs,
     MoreArgs =
diff --git a/man/emd_fast_no_smoothing.Rd b/man/emd_fast_no_smoothing.Rd
index 66421f81..44bb7224 100644
--- a/man/emd_fast_no_smoothing.Rd
+++ b/man/emd_fast_no_smoothing.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/RcppExports.R
 \name{emd_fast_no_smoothing}
 \alias{emd_fast_no_smoothing}
-\title{Compute Earth Mover's Distance (EMD) between two Empirical Cumulative 
+\title{Compute Earth Mover's Distance (EMD) between two Empirical Cumulative
 Density Functions (ECDFs)}
 \usage{
 emd_fast_no_smoothing(locations1, values1, locations2, values2)
@@ -17,6 +17,6 @@ emd_fast_no_smoothing(locations1, values1, locations2, values2)
 \item{values2}{Cumulative masses for ECDF 2}
 }
 \description{
-Compute Earth Mover's Distance (EMD) between two Empirical Cumulative 
+Compute Earth Mover's Distance (EMD) between two Empirical Cumulative
 Density Functions (ECDFs)
 }
diff --git a/tests/testthat/test_graph_binning.R b/tests/testthat/test_graph_binning.R
index f2bbf28d..d2bfa1c5 100644
--- a/tests/testthat/test_graph_binning.R
+++ b/tests/testthat/test_graph_binning.R
@@ -209,13 +209,3 @@ test_that("binned_densities_adaptive works", {
     expected_interval_indexes = expected_interval_indexes
   )
 })
-
-expected_binned_graphlet_counts <-
-  function(graphs, binning_fn, max_graphlet_size) {
-    binned_graphs <- binning_fn(graphs)
-    ref_counts <- purrr::map(
-      binned_graphs$graphs, count_graphlets_for_graph,
-      max_graphlet_size
-    )
-    ref_counts
-  }
diff --git a/tests/testthat/test_measures_net_dis.R b/tests/testthat/test_measures_net_dis.R
index 3c4fb628..604b0caf 100644
--- a/tests/testthat/test_measures_net_dis.R
+++ b/tests/testthat/test_measures_net_dis.R
@@ -419,7 +419,7 @@ test_that(
     mean_counts_bin3_o1 <- (exp_counts_o1[3, ] + exp_counts_o1[4, ] +
       exp_counts_o1[5, ] + exp_counts_o1[9, ] +
       exp_counts_o1[10, ]) / 5
-    exp_mean_density_binned_counts_o1 <- rbind(
+    exp_mean_density_binned_counts_o1 <- rbind( # nolint: object_length_linter.
       mean_counts_bin1_o1, mean_counts_bin2_o1, mean_counts_bin3_o1
     )
     rownames(exp_mean_density_binned_counts_o1) <- 1:3
@@ -432,17 +432,17 @@ test_that(
       exp_counts_o2[5, ]) / 2
     mean_counts_bin4_o2 <- (exp_counts_o2[8, ] +
       exp_counts_o2[9, ] + exp_counts_o2[10, ]) / 3
-    exp_mean_density_binned_counts_o2 <- rbind(
+    exp_mean_density_binned_counts_o2 <- rbind( # nolint: object_length_linter.
       mean_counts_bin1_o2, mean_counts_bin2_o2, mean_counts_bin3_o2,
       mean_counts_bin4_o2
     )
     rownames(exp_mean_density_binned_counts_o2) <- 1:4
 
     # Calculate actual output of function under test
-    act_mean_density_binned_counts_o1 <- mean_density_binned_graphlet_counts(
+    act_mean_density_binned_counts_o1 <- mean_density_binned_graphlet_counts( # nolint: object_length_linter.
       exp_counts_o1, exp_interval_indexes_o1
     )
-    act_mean_density_binned_counts_o2 <- mean_density_binned_graphlet_counts(
+    act_mean_density_binned_counts_o2 <- mean_density_binned_graphlet_counts( # nolint: object_length_linter.
       exp_counts_o2, exp_interval_indexes_o2
     )
 
@@ -696,7 +696,7 @@ test_that(
     mean_counts_bin3_o1 <- (exp_counts_o1[3, ] + exp_counts_o1[4, ] +
       exp_counts_o1[5, ] + exp_counts_o1[9, ] +
       exp_counts_o1[10, ]) / 5
-    exp_mean_density_binned_counts_o1 <- rbind(
+    exp_mean_density_binned_counts_o1 <- rbind( # nolint: object_length_linter.
       mean_counts_bin1_o1, mean_counts_bin2_o1, mean_counts_bin3_o1
     )
     rownames(exp_mean_density_binned_counts_o1) <- 1:3
@@ -709,7 +709,7 @@ test_that(
       exp_counts_o2[5, ]) / 2
     mean_counts_bin4_o2 <- (exp_counts_o2[8, ] + exp_counts_o2[9, ] +
       exp_counts_o2[10, ]) / 3
-    exp_mean_density_binned_counts_o2 <- rbind(
+    exp_mean_density_binned_counts_o2 <- rbind( # nolint: object_length_linter.
       mean_counts_bin1_o2, mean_counts_bin2_o2, mean_counts_bin3_o2,
       mean_counts_bin4_o2
     )
@@ -754,7 +754,7 @@ test_that(
       exp_counts_o1[10, ]
     ), 2, max)
 
-    exp_max_density_binned_counts_o1 <- rbind(
+    exp_max_density_binned_counts_o1 <- rbind( # nolint: object_length_linter.
       max_counts_bin1_o1, max_counts_bin2_o1, max_counts_bin3_o1
     )
     rownames(exp_max_density_binned_counts_o1) <- 1:3
@@ -774,7 +774,7 @@ test_that(
       exp_counts_o2[10, ]
     ), 2, max)
 
-    exp_max_density_binned_counts_o2 <- rbind(
+    exp_max_density_binned_counts_o2 <- rbind( # nolint: object_length_linter.
       max_counts_bin1_o2, max_counts_bin2_o2, max_counts_bin3_o2,
       max_counts_bin4_o2
     )
@@ -785,14 +785,14 @@ test_that(
     agg_fn <- max
     scale_fn <- NULL
 
-    act_max_density_binned_counts_o1 <- density_binned_counts(
+    act_max_density_binned_counts_o1 <- density_binned_counts( # nolint: object_length_linter.
       exp_counts_o1,
       exp_interval_indexes_o1,
       agg_fn = agg_fn,
       scale_fn = scale_fn
     )
 
-    act_max_density_binned_counts_o2 <- density_binned_counts(
+    act_max_density_binned_counts_o2 <- density_binned_counts( # nolint: object_length_linter.
       exp_counts_o2,
       exp_interval_indexes_o2,
       agg_fn = agg_fn,
@@ -836,7 +836,7 @@ test_that(
       exp_scaled_counts_o1[5, ] +
       exp_scaled_counts_o1[9, ] +
       exp_scaled_counts_o1[10, ]) / 5
-    exp_scaled_density_binned_counts_o1 <- rbind(
+    exp_scaled_density_binned_counts_o1 <- rbind( # nolint: object_length_linter.
       mean_scaled_counts_bin1_o1,
       mean_scaled_counts_bin2_o1,
       mean_scaled_counts_bin3_o1
@@ -853,7 +853,7 @@ test_that(
     mean_scaled_counts_bin4_o2 <- (exp_scaled_counts_o2[8, ] +
       exp_scaled_counts_o2[9, ] +
       exp_scaled_counts_o2[10, ]) / 3
-    exp_scaled_density_binned_counts_o2 <- rbind(
+    exp_scaled_density_binned_counts_o2 <- rbind( # nolint: object_length_linter.
       mean_scaled_counts_bin1_o2,
       mean_scaled_counts_bin2_o2,
       mean_scaled_counts_bin3_o2,
@@ -863,7 +863,7 @@ test_that(
 
     # Calculate scaled binned counts with density_binned_counts (function to
     # test)
-    act_scaled_density_binned_counts_o1 <- density_binned_counts(
+    act_scaled_density_binned_counts_o1 <- density_binned_counts( # nolint: object_length_linter.
       exp_counts_o1,
       exp_interval_indexes_o1,
       agg_fn = agg_fn,
@@ -871,7 +871,7 @@ test_that(
       max_graphlet_size = max_graphlet_size
     )
 
-    act_scaled_density_binned_counts_o2 <- density_binned_counts(
+    act_scaled_density_binned_counts_o2 <- density_binned_counts( # nolint: object_length_linter.
       exp_counts_o2,
       exp_interval_indexes_o2,
       agg_fn = agg_fn,
@@ -1596,8 +1596,3 @@ test_that("netdis_uptok errors for unsupported max_graphlet_size", {
   #  graphlet size less than 3
   expect_error(netdis_uptok(counts_1, counts_2, 2))
 })
-
-context("Netdis: works correctly when using a single density bin")
-test_that("netdis single density bin works correctly", {
-  # TODO
-})
diff --git a/tests/testthat/test_measures_net_emd.R b/tests/testthat/test_measures_net_emd.R
index 6653f796..1a4ff962 100644
--- a/tests/testthat/test_measures_net_emd.R
+++ b/tests/testthat/test_measures_net_emd.R
@@ -35,7 +35,7 @@ expect_self_netemd_correct <- function(histogram, shift, method,
     net_emd = 0, min_emds = 0, min_offsets = shift,
     min_offsets_std = 0
   )
-  expect_equal(self_net_emd, expected)
+  expect_equal(self_net_emd, expected) # nolint: object_usage_linter.
 }
 
 locations <- c(-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5)
@@ -310,7 +310,7 @@ test_that("emd return 0 when comparing graphlet orbit degree distributions of
 
   # Map over virus PPI networks
   purrr::walk(virus_gdd, function(gdd) {
-    purrr::walk(gdd, function(gdd_Ox) {
+    purrr::walk(gdd, function(gdd_Ox) { # nolint: object_length_linter
       expect_equal(emd(gdd_Ox, gdd_Ox), 0)
     })
   })
@@ -334,7 +334,7 @@ test_that("net_emd return 0 when comparing graphlet orbit degree distributions
 
   # Map over virus PPI networks
   purrr::walk(virus_gdd, function(gdd) {
-    purrr::walk(gdd, function(gdd_Ox) {
+    purrr::walk(gdd, function(gdd_Ox) { # nolint: object_length_linter
       expect_equalish(netemd_one_to_one(
         dhists_1 = gdd_Ox, dhists_2 = gdd_Ox,
         method = "optimise",
@@ -361,7 +361,7 @@ test_that("emd return 0 when comparing graphlet orbit degree distributions of
 
   # Map over random graphs
   purrr::walk(random_gdd, function(gdd) {
-    purrr::walk(gdd, function(gdd_Ox) {
+    purrr::walk(gdd, function(gdd_Ox) { # nolint: object_length_linter
       expect_equal(emd(gdd_Ox, gdd_Ox), 0)
     })
   })
@@ -389,7 +389,7 @@ test_that("net_emd return 0 when comparing graphlet orbit degree distributions
 
   # Map over random graphs
   purrr::walk(random_gdd, function(gdd) {
-    purrr::walk(gdd, function(gdd_Ox) {
+    purrr::walk(gdd, function(gdd_Ox) { # nolint: object_length_linter
       expect_equalish(netemd_one_to_one(
         dhists_1 = gdd_Ox, dhists_2 = gdd_Ox,
         method = "optimise",
diff --git a/tests/testthat/test_orca_interface.R b/tests/testthat/test_orca_interface.R
index 6008415b..67ab7212 100644
--- a/tests/testthat/test_orca_interface.R
+++ b/tests/testthat/test_orca_interface.R
@@ -1498,18 +1498,18 @@ test_that(
       min_ego_nodes = min_ego_nodes,
       min_ego_edges = min_ego_edges
     )
-    exp_counts_with_networks_order_1 <-
+    exp_counts_with_networks_order_1 <- # nolint: object_length_linter
       list(
         graphlet_counts = exp_counts_order_1,
         ego_networks = exp_ego_networks_order_1
       )
-    exp_counts_with_networks_order_2 <-
+    exp_counts_with_networks_order_2 <- # nolint: object_length_linter
       list(
         graphlet_counts = exp_counts_order_2,
         ego_networks = exp_ego_networks_order_2
       )
     # 2. Calculate actual
-    actual_counts_with_networks_order_1 <-
+    actual_counts_with_networks_order_1 <- # nolint: object_length_linter
       count_graphlets_ego(graph,
         max_graphlet_size = max_graphlet_size,
         neighbourhood_size = 1,
@@ -1517,7 +1517,7 @@ test_that(
         min_ego_edges = min_ego_edges,
         return_ego_networks = TRUE
       )
-    actual_counts_with_networks_order_2 <-
+    actual_counts_with_networks_order_2 <- # nolint: object_length_linter
       count_graphlets_ego(graph,
         max_graphlet_size = max_graphlet_size,
         neighbourhood_size = 2,

From fb0f92eed8b87ca4e45fc9945d44a49819b9a9a3 Mon Sep 17 00:00:00 2001
From: jack89roberts <jroberts@turing.ac.uk>
Date: Tue, 7 Jun 2022 20:47:43 +0100
Subject: [PATCH 72/84] delete area_between_offset_ecmfs

unused and incorrectly named arguments
---
 R/dhist.R                        | 17 -----------------
 man/area_between_offset_ecmfs.Rd | 25 -------------------------
 2 files changed, 42 deletions(-)
 delete mode 100644 man/area_between_offset_ecmfs.Rd

diff --git a/R/dhist.R b/R/dhist.R
index 9ffb97fe..dea6dda7 100644
--- a/R/dhist.R
+++ b/R/dhist.R
@@ -399,23 +399,6 @@ segment_area_bowtie <- function(x_diff, y_diff_lower, y_diff_upper) {
     (abs(y_diff_lower) + abs(y_diff_upper))
 }
 
-#' Area between two offset Empirical Cumulative Mass Functions (ECMFs)
-#'
-#' @param ecmf1 An Empirical Cululative Mass Function (ECMF) object of class
-#' \code{dhist_ecmf}
-#' @param ecmf2 An Empirical Cululative Mass Function (ECMF) object of class
-#' \code{dhist_ecmf}
-#' @param offset An offset to add to all locations of the first ECMF. Postive
-#' offsets will shift the ECMF to the right and negative ones to the left.
-#' @return area The area between the two ECMFs, calculated as the integral of
-#' the absolute difference between the two ECMFs
-area_between_offset_ecmfs <- function(ecmf1, ecmf2, offset) {
-  # Construct ECMFs for each normalised histogram
-  ecmf1 <- dhist_ecmf(shift_dhist(dhist1_norm, offset))
-  ecmf2 <- dhist_ecmf(dhist2_norm)
-  area_between_dhist_ecmfs(ecmf1, ecmf2)
-}
-
 #' Sort discrete histogram
 #'
 #' Sort a discrete histogram so that locations are in increasing (default) or
diff --git a/man/area_between_offset_ecmfs.Rd b/man/area_between_offset_ecmfs.Rd
deleted file mode 100644
index 9dc8149d..00000000
--- a/man/area_between_offset_ecmfs.Rd
+++ /dev/null
@@ -1,25 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/dhist.R
-\name{area_between_offset_ecmfs}
-\alias{area_between_offset_ecmfs}
-\title{Area between two offset Empirical Cumulative Mass Functions (ECMFs)}
-\usage{
-area_between_offset_ecmfs(ecmf1, ecmf2, offset)
-}
-\arguments{
-\item{ecmf1}{An Empirical Cululative Mass Function (ECMF) object of class
-\code{dhist_ecmf}}
-
-\item{ecmf2}{An Empirical Cululative Mass Function (ECMF) object of class
-\code{dhist_ecmf}}
-
-\item{offset}{An offset to add to all locations of the first ECMF. Postive
-offsets will shift the ECMF to the right and negative ones to the left.}
-}
-\value{
-area The area between the two ECMFs, calculated as the integral of
-the absolute difference between the two ECMFs
-}
-\description{
-Area between two offset Empirical Cumulative Mass Functions (ECMFs)
-}

From d9d6f5f9d38dbb1304f01ea5c4505a6a836f17cd Mon Sep 17 00:00:00 2001
From: jack89roberts <jroberts@turing.ac.uk>
Date: Tue, 7 Jun 2022 21:19:02 +0100
Subject: [PATCH 73/84] update .Rbuildignore

---
 .Rbuildignore         | 2 ++
 R/PlottingFunctions.R | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/.Rbuildignore b/.Rbuildignore
index 6555a0db..cbf22e53 100755
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -6,3 +6,5 @@
 ^doc$
 ^Meta$
 ^\.github$
+^\.lintr
+^docs$
diff --git a/R/PlottingFunctions.R b/R/PlottingFunctions.R
index cbaae62e..60f41e6c 100644
--- a/R/PlottingFunctions.R
+++ b/R/PlottingFunctions.R
@@ -37,7 +37,7 @@ netdis_plot <- function(netdislist,
   legend1 <- seq(min(adjmat), max(adjmat), length.out = 5)
   levels1 <- round(legend1, digits = 2)
   pheatmap::pheatmap(
-    mat = as.dist(adjmat),
+    mat = stats::as.dist(adjmat),
     cluster_rows = docluster,
     cluster_cols = docluster,
     clustering_method = clustering_method,
@@ -92,7 +92,7 @@ netemd_plot <- function(netemdlist,
   legend1 <- seq(min(adjmat), max(adjmat), length.out = 5)
   levels1 <- round(legend1, digits = 2)
   pheatmap::pheatmap(
-    mat = as.dist(adjmat),
+    mat = stats::as.dist(adjmat),
     cluster_rows = docluster,
     cluster_cols = docluster,
     clustering_method = clustering_method,

From 8313127adf7e4340144965d1e37e8eaad97f3c71 Mon Sep 17 00:00:00 2001
From: jack89roberts <jack89roberts@users.noreply.github.com>
Date: Tue, 7 Jun 2022 20:21:44 +0000
Subject: [PATCH 74/84] Update documentation (bot)

---
 man/emd_fast_no_smoothing.Rd | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/man/emd_fast_no_smoothing.Rd b/man/emd_fast_no_smoothing.Rd
index 44bb7224..66421f81 100644
--- a/man/emd_fast_no_smoothing.Rd
+++ b/man/emd_fast_no_smoothing.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/RcppExports.R
 \name{emd_fast_no_smoothing}
 \alias{emd_fast_no_smoothing}
-\title{Compute Earth Mover's Distance (EMD) between two Empirical Cumulative
+\title{Compute Earth Mover's Distance (EMD) between two Empirical Cumulative 
 Density Functions (ECDFs)}
 \usage{
 emd_fast_no_smoothing(locations1, values1, locations2, values2)
@@ -17,6 +17,6 @@ emd_fast_no_smoothing(locations1, values1, locations2, values2)
 \item{values2}{Cumulative masses for ECDF 2}
 }
 \description{
-Compute Earth Mover's Distance (EMD) between two Empirical Cumulative
+Compute Earth Mover's Distance (EMD) between two Empirical Cumulative 
 Density Functions (ECDFs)
 }

From 13eba3184751fa739ba94a70a65b62020da3e869 Mon Sep 17 00:00:00 2001
From: Oliver Strickson <ostrickson@turing.ac.uk>
Date: Wed, 8 Jun 2022 17:47:49 +0100
Subject: [PATCH 75/84] Update to renamed function in test case

---
 tests/testthat/test_fastEMD.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/testthat/test_fastEMD.R b/tests/testthat/test_fastEMD.R
index cd091401..2387f16e 100644
--- a/tests/testthat/test_fastEMD.R
+++ b/tests/testthat/test_fastEMD.R
@@ -306,8 +306,8 @@ test_that("Old failure case", {
   attr(d2,'class') <- "dhist"
   d2$locations <- c(0,1,2,3) 
   d2$masses <- c(8634,1242,114,10) 
-  sq1 <- net_emd_single_pair(d1,d2,method='optimise',smoothing_window_width = 1)
-  sq2 <- net_emd_single_pair(d1,d2,method='optimiseRonly',smoothing_window_width = 1)
+  sq1 <- netemd_single_pair(d1,d2,method='optimise',smoothing_window_width = 1)
+  sq2 <- netemd_single_pair(d1,d2,method='optimiseRonly',smoothing_window_width = 1)
   expect_lt(abs(sq1$min_emd-sq2$min_emd),10**(-4))
 })
 

From d3ab6628ddcd351bcd29330b7bcef026b9f36321 Mon Sep 17 00:00:00 2001
From: Oliver Strickson <ostrickson@turing.ac.uk>
Date: Wed, 8 Jun 2022 18:41:18 +0100
Subject: [PATCH 76/84] Use Kahan summation in NetEmdSmoothV2

---
 src/fastSmoothV2.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/fastSmoothV2.cpp b/src/fastSmoothV2.cpp
index 0c2ffb5c..31ce2031 100644
--- a/src/fastSmoothV2.cpp
+++ b/src/fastSmoothV2.cpp
@@ -5,6 +5,7 @@
 #include <algorithm>
 #include <numeric>
 #include <math.h>
+#include "emd_fast_no_smoothing.h" // add_element_kahan()
 #include "fastSmoothV2.h"
 
 using namespace Rcpp;
@@ -142,6 +143,7 @@ double NetEmdSmoothV2(NumericVector loc1, NumericVector val1, double binWidth1,
   OverlappingSegments segs(loc1, loc2, binWidth1, binWidth2);
 
   double res = 0.0;
+  double compensation = 0.0;
 
   for (OverlappingSegments::iterator it = segs.begin(), endIt = segs.end();
        it != endIt; ++it)
@@ -184,9 +186,11 @@ double NetEmdSmoothV2(NumericVector loc1, NumericVector val1, double binWidth1,
     double curSeg2Val1 = (it->second > 0) ? val2[it->second - 1] : 0.0;
     double curSeg2Val2 = (it->second >= 0) ? val2[it->second] : 0.0;
 
-    res += get_double_segment_constrained(
+    double element = get_double_segment_constrained(
       curSeg1Loc1, curSeg1Loc2, curSeg1Loc3, curSeg1Val1, curSeg1Val2,
       curSeg2Loc1, curSeg2Loc2, curSeg2Loc3, curSeg2Val1, curSeg2Val2);
+
+    add_element_kahan(res, element, compensation);
   }
 
   return res;

From a543607c1a554c51ecdde7a24ce0219047bed889 Mon Sep 17 00:00:00 2001
From: Oliver Strickson <ostrickson@turing.ac.uk>
Date: Wed, 8 Jun 2022 19:27:16 +0100
Subject: [PATCH 77/84] Remove old unused versions of fastSmooth

---
 NAMESPACE                |   2 -
 R/RcppExports.R          |  20 ---
 src/RcppExports.cpp      |  34 -----
 src/fastSmooth.cpp       | 196 --------------------------
 src/fastSmoothV2_old.cpp | 287 ---------------------------------------
 5 files changed, 539 deletions(-)
 delete mode 100644 src/fastSmooth.cpp
 delete mode 100644 src/fastSmoothV2_old.cpp

diff --git a/NAMESPACE b/NAMESPACE
index 85e26932..9b550b04 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,8 +1,6 @@
 # Generated by roxygen2: do not edit by hand
 
-export(NetEmdSmooth)
 export(NetEmdSmoothV2)
-export(NetEmdSmoothV2_old)
 export(adaptive_breaks)
 export(area_between_dhist_ecmfs)
 export(as_smoothed_dhist)
diff --git a/R/RcppExports.R b/R/RcppExports.R
index 38e022c2..ab9a7e71 100644
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -29,29 +29,9 @@ emd_fast_no_smoothing <- function(locations1, values1, locations2, values2) {
 #' Compute EMD
 NULL
 
-#'
-#' @export
-NetEmdSmooth <- function(loc1, val1, binWidth1, loc2, val2, binWidth2) {
-    .Call(`_netdist_NetEmdSmooth`, loc1, val1, binWidth1, loc2, val2, binWidth2)
-}
-
-#' @title
-#' Compute EMD
-NULL
-
 #'
 #' @export
 NetEmdSmoothV2 <- function(loc1, val1, binWidth1, loc2, val2, binWidth2) {
     .Call(`_netdist_NetEmdSmoothV2`, loc1, val1, binWidth1, loc2, val2, binWidth2)
 }
 
-#' @title
-#' Compute EMD
-NULL
-
-#'
-#' @export
-NetEmdSmoothV2_old <- function(loc1, val1, binWidth1, loc2, val2, binWidth2) {
-    .Call(`_netdist_NetEmdSmoothV2_old`, loc1, val1, binWidth1, loc2, val2, binWidth2)
-}
-
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
index 3779ab6d..0ee8eaf6 100644
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -35,22 +35,6 @@ BEGIN_RCPP
     return rcpp_result_gen;
 END_RCPP
 }
-// NetEmdSmooth
-double NetEmdSmooth(NumericVector loc1, NumericVector val1, double binWidth1, NumericVector loc2, NumericVector val2, double binWidth2);
-RcppExport SEXP _netdist_NetEmdSmooth(SEXP loc1SEXP, SEXP val1SEXP, SEXP binWidth1SEXP, SEXP loc2SEXP, SEXP val2SEXP, SEXP binWidth2SEXP) {
-BEGIN_RCPP
-    Rcpp::RObject rcpp_result_gen;
-    Rcpp::RNGScope rcpp_rngScope_gen;
-    Rcpp::traits::input_parameter< NumericVector >::type loc1(loc1SEXP);
-    Rcpp::traits::input_parameter< NumericVector >::type val1(val1SEXP);
-    Rcpp::traits::input_parameter< double >::type binWidth1(binWidth1SEXP);
-    Rcpp::traits::input_parameter< NumericVector >::type loc2(loc2SEXP);
-    Rcpp::traits::input_parameter< NumericVector >::type val2(val2SEXP);
-    Rcpp::traits::input_parameter< double >::type binWidth2(binWidth2SEXP);
-    rcpp_result_gen = Rcpp::wrap(NetEmdSmooth(loc1, val1, binWidth1, loc2, val2, binWidth2));
-    return rcpp_result_gen;
-END_RCPP
-}
 // NetEmdSmoothV2
 double NetEmdSmoothV2(NumericVector loc1, NumericVector val1, double binWidth1, NumericVector loc2, NumericVector val2, double binWidth2);
 RcppExport SEXP _netdist_NetEmdSmoothV2(SEXP loc1SEXP, SEXP val1SEXP, SEXP binWidth1SEXP, SEXP loc2SEXP, SEXP val2SEXP, SEXP binWidth2SEXP) {
@@ -67,31 +51,13 @@ BEGIN_RCPP
     return rcpp_result_gen;
 END_RCPP
 }
-// NetEmdSmoothV2_old
-double NetEmdSmoothV2_old(NumericVector loc1, NumericVector val1, double binWidth1, NumericVector loc2, NumericVector val2, double binWidth2);
-RcppExport SEXP _netdist_NetEmdSmoothV2_old(SEXP loc1SEXP, SEXP val1SEXP, SEXP binWidth1SEXP, SEXP loc2SEXP, SEXP val2SEXP, SEXP binWidth2SEXP) {
-BEGIN_RCPP
-    Rcpp::RObject rcpp_result_gen;
-    Rcpp::RNGScope rcpp_rngScope_gen;
-    Rcpp::traits::input_parameter< NumericVector >::type loc1(loc1SEXP);
-    Rcpp::traits::input_parameter< NumericVector >::type val1(val1SEXP);
-    Rcpp::traits::input_parameter< double >::type binWidth1(binWidth1SEXP);
-    Rcpp::traits::input_parameter< NumericVector >::type loc2(loc2SEXP);
-    Rcpp::traits::input_parameter< NumericVector >::type val2(val2SEXP);
-    Rcpp::traits::input_parameter< double >::type binWidth2(binWidth2SEXP);
-    rcpp_result_gen = Rcpp::wrap(NetEmdSmoothV2_old(loc1, val1, binWidth1, loc2, val2, binWidth2));
-    return rcpp_result_gen;
-END_RCPP
-}
 
 RcppExport SEXP run_testthat_tests();
 
 static const R_CallMethodDef CallEntries[] = {
     {"_netdist_counts_from_observations", (DL_FUNC) &_netdist_counts_from_observations, 1},
     {"_netdist_emd_fast_no_smoothing", (DL_FUNC) &_netdist_emd_fast_no_smoothing, 4},
-    {"_netdist_NetEmdSmooth", (DL_FUNC) &_netdist_NetEmdSmooth, 6},
     {"_netdist_NetEmdSmoothV2", (DL_FUNC) &_netdist_NetEmdSmoothV2, 6},
-    {"_netdist_NetEmdSmoothV2_old", (DL_FUNC) &_netdist_NetEmdSmoothV2_old, 6},
     {"run_testthat_tests", (DL_FUNC) &run_testthat_tests, 0},
     {NULL, NULL, 0}
 };
diff --git a/src/fastSmooth.cpp b/src/fastSmooth.cpp
deleted file mode 100644
index c927547b..00000000
--- a/src/fastSmooth.cpp
+++ /dev/null
@@ -1,196 +0,0 @@
-// Enable C++11
-// [[Rcpp::plugins(cpp11)]]
-#include <Rcpp.h>
-#include <unordered_set>
-#include <algorithm>
-#include <numeric>
-#include <math.h>
-using namespace Rcpp;
-
-// [[Rcpp::plugins("cpp11")]]
-
-//' @title
-//' Compute EMD
-////'
-////' @param loc1 numeric vector.
-////' @param val1 numeric vector.
-////' @param loc2 numeric vector.
-////' @param val2 numeric vector.
-//'
-//' @export
-// [[Rcpp::export]]
-double NetEmdSmooth(NumericVector loc1,NumericVector val1,double binWidth1,NumericVector loc2,NumericVector val2,double binWidth2)
-{
-  //init
-   double res=0;
-   double curVal1,curVal2;
-   double curPos;
-   double temp1;
-   int count;
-   int i,j,k;
-   //place start of windows before
-   //start of histogram so we can start the loop
-   // stores the result
-   res=0;
-   //TODO be worried about adding lots of small numbers
-
-   // current location on hist 1 and hist 2
-   i=0;
-   j=0;
-   // hist1 variables
-   double loc1SegStart=loc1[0];  //- start of a Segment in x
-   double loc1SegEnd=loc1[0]+binWidth1; //- end of a Segment in x
-   double loc1SegValStart=0; //- start of a Segment in y
-   double loc1SegValEnd=val1[0]; //- end of a Segment in y
-
-   // hist2 variables
-   double loc2SegStart=loc2[0];
-   double loc2SegEnd=loc2[0]+binWidth2;
-   double loc2SegValStart=0;
-   double loc2SegValEnd=val2[0];
-
-   double curStartVal; // start value in y 
-   double curEndVal; // end value in y 
-   double loc1Start; // start value in x hist1 
-   double loc2Start; // start value in x hist2
-   double loc1End; // end value in x hist1 
-   double loc2End; // end value in x hist2
-   double h;
-   res=0;
-   // set as 0 as at bottom of hist
-   curStartVal=0;
-   
-   std::cout <<  "Loc1: "; 
-  for (i=0;i<loc1.size();i++)
-  {
-   std::cout << loc1[i] << ", "; 
-  }
-   std::cout <<  "\n"; 
-   std::cout <<  "Val1: "; 
-  for (i=0;i<val1.size();i++)
-  {
-   std::cout << val1[i] << ", "; 
-  }
-   std::cout <<  "\n"; 
-   std::cout <<  "\nLoc2 "; 
-  for (i=0;i<loc2.size();i++)
-  {
-   std::cout << loc2[i] << ", "; 
-  }
-   std::cout <<  "\n"; 
-   std::cout <<  "Val2: "; 
-  for (i=0;i<val2.size();i++)
-  {
-   std::cout << val2[i] << ", "; 
-  }
-   std::cout <<  "\n"; 
-   // need to know if first y segment ends with hist1 or hist2 
-   // Need to set the first start locations 
-   // Commented this section as they are both set to zero
-   if (loc1SegValStart<loc2SegValStart)
-   {
-     
-     loc2Start=loc2SegStart;
-     loc1Start=loc1SegStart+(loc1SegEnd-loc1SegStart)*(loc2SegValStart-loc1SegValStart)/(loc1SegValEnd-loc1SegValStart);
-   }
-   else
-   {
-     loc1Start=loc1SegStart;
-     loc2Start=loc2SegStart+(loc2SegEnd-loc2SegStart)*(loc1SegValStart-loc2SegValStart)/(loc2SegValEnd-loc2SegValStart);
-   }
-   // loc1Start=loc1SegStart;
-   // loc2Start=loc2SegStart;
-   while (1)
-   {
-        // lets compute the area for this segments
-        //
-        // Case where hist1  ends first 
-        if (loc1SegValEnd<loc2SegValEnd)
-        {
-            curEndVal=loc1SegValEnd; // end of current segment in y
-            loc1End=loc1SegEnd; // end of current segment in x in dhist1
-            // compute where end of current segment will be in x in dhist2 proportionally
-            loc2End=loc2SegStart+(loc2SegEnd-loc2SegStart)*(loc1SegValEnd-loc2SegValStart)/(loc2SegValEnd-loc2SegValStart);
-        }
-        else // hist2 comes first
-        {
-            curEndVal=loc2SegValEnd; // end of current segment in y
-            loc2End=loc2SegEnd; // end of current segment in x in dhist2
-            // compute where end of current segment will be in x in dhist1 proportionally
-            loc1End=loc1SegStart+(loc1SegEnd-loc1SegStart)*(loc2SegValEnd-loc1SegValStart)/(loc1SegValEnd-loc1SegValStart);
-        }
-       std::cout << loc1End << " "<< loc2End << "\n"; 
-        h=(curEndVal-curStartVal)/2.0;
-        if (loc1Start<loc2Start)
-        {
-            //case1 they is no overlap
-            if (loc1End<=loc2End)
-            {
-		            // difference between two right triangles
-                res+=(h)*(loc2Start+loc2End-loc1Start-loc1End);
-            }
-            else // we have a bowtie
-            {
-              res+=(h)*((loc2Start-loc1Start)*(loc2Start-loc1Start)+(loc1End-loc2End)*(loc1End-loc2End))/(loc1End-loc2End+loc2Start-loc1Start);
-            }
-        }
-        else
-        {
-            //case1 they is no overlap
-            if (loc2End<=loc1End)
-            {
-		            // difference between two right triangles
-                res+=(h)*(loc1Start+loc1End-loc2Start-loc2End);
-            }
-            else // we have a bowtie
-            {
-                res+=(h)*((loc1Start-loc2Start)*(loc1Start-loc2Start)+(loc2End-loc1End)*(loc2End-loc1End))/(loc2End-loc1End+loc1Start-loc2Start);
-            }
-        }
-        // update the segment under consideration
-        if (i==val1.size()-1)
-        {
-            if (j==val2.size()-1)
-            {
-		            // we are at the end of both segments
-                break;
-            }
-            else
-            {
-	             // at the end of dhist 1 so move dhist2
-               j+=1;
-               loc2SegStart=loc2[j];
-               loc2SegEnd=loc2[j]+binWidth2;
-               loc2SegValStart=loc2SegValEnd;
-               loc2SegValEnd=val2[j];
-            }
-        }
-        else if ((j==val2.size()-1) || (val1[i+1]<val2[j+1]))
-        {
-	          // at the end of dhist 2 so move dhist1
-	          // OR
-	          // Next segment in dhist1 is smaller than the next segment in dhist2
-            i+=1;
-            loc1SegStart=loc1[i];
-            loc1SegEnd=loc1[i]+binWidth1;
-            loc1SegValStart=loc1SegValEnd;
-            loc1SegValEnd=val1[i];
-        }
-        else
-        {
-	         // Next segment in dhist2 is smaller than the next segment in dhist1
-           j+=1;
-           loc2SegStart=loc2[j];
-           loc2SegEnd=loc2[j]+binWidth2;
-           loc2SegValStart=loc2SegValEnd;
-           loc2SegValEnd=val2[j];
-        }
-	      // Move the start of the segment under consideration 
-        curStartVal=curEndVal;
-	      // Move the start of the x value under consideration 
-        loc2Start=loc2End;
-        loc1Start=loc1End;
-   }
-    return res;
-}
-
diff --git a/src/fastSmoothV2_old.cpp b/src/fastSmoothV2_old.cpp
deleted file mode 100644
index 8eb72d8c..00000000
--- a/src/fastSmoothV2_old.cpp
+++ /dev/null
@@ -1,287 +0,0 @@
-// Enable C++11
-// [[Rcpp::plugins(cpp11)]]
-#include <Rcpp.h>
-#include <unordered_set>
-#include <algorithm>
-#include <numeric>
-#include <math.h>
-using namespace Rcpp;
-
-//compute segment
-inline double get_segment(double start,double end,double val1_start,double val1_end,double val2_start,double val2_end)
-{
-  double length;
-  length = end - start;
-  double topTriangle;
-  double topRectangle;
-  double bottomTriangle;
-  double midPoint;
-  double midValue;
-  double res=0;
-  if (val1_start > val2_start) 
-  {
-    if (val1_end >= val2_end) 
-    {
-     // They are in the same order no bowtie
-      topTriangle = 0.5*length*(val1_end-val1_start);
-      topRectangle = length*(val1_start-val2_start);
-      bottomTriangle = 0.5*length*(val2_end-val2_start); 
-      return topTriangle+topRectangle-bottomTriangle;
-    }
-    else
-    {
-        //bowtie
-        // lets make this really simple as the compiler 
-        // will combine the expressions as needed
-        midPoint = (val1_start-val2_start)/((val2_end-val2_start) - (val1_end-val1_start));
-        midValue = val1_start + midPoint*(val1_end-val1_start);
-        midPoint = midPoint*length;
-//        std::cout << "\n       midPoint: " << midPoint << " midValue: " << midValue << "\n";
-        
-        topTriangle = 0.5*midPoint*(midValue-val1_start);
-        topRectangle = midPoint*(val1_start-val2_start);
-        bottomTriangle = 0.5*midPoint*(midValue-val2_start); 
-        
-        res = topTriangle+topRectangle-bottomTriangle;
-        
-        topTriangle = 0.5*(length-midPoint)*(val2_end-midValue);
-        topRectangle = 0; // midPoint*(val1_start-val2_start);
-        bottomTriangle = 0.5*(length - midPoint)*(val1_end - midValue); 
-        res += topTriangle+topRectangle-bottomTriangle;
-        return res;
-    }
-  }
-  else
-    {
-      if (val1_end > val2_end) 
-      {
-//     std::cout << "\n       Path3";
-        //bowtie
-        midPoint = (val2_start-val1_start)/((val1_end-val1_start) - (val2_end-val2_start));
-        midValue = val2_start + midPoint*(val2_end-val2_start);
-        midPoint = midPoint*length;
-//        std::cout << "\n       midPoint: " << midPoint << " midValue: " << midValue << "\n";
-        
-        topTriangle = 0.5*midPoint*(midValue-val2_start);
-        topRectangle = midPoint*(val2_start-val1_start);
-        bottomTriangle = 0.5*midPoint*(midValue-val1_start); 
-        
-        res = topTriangle+topRectangle-bottomTriangle;
-        
-        topTriangle = 0.5*(length-midPoint)*(val1_end-midValue);
-        topRectangle = 0; // midPoint*(val1_start-val2_start);
-        bottomTriangle = 0.5*(length - midPoint)*(val2_end - midValue); 
-        res += topTriangle+topRectangle-bottomTriangle;
-        return res;
-        
-      }
-      else // same order
-      {
-//        std::cout << "\n       Path4";
-        topTriangle = 0.5*length*(val2_end-val2_start);
-        topRectangle = length*(val2_start-val1_start);
-        bottomTriangle = 0.5*length*(val1_end-val1_start); 
-        return topTriangle+topRectangle-bottomTriangle;
-      }
-    }
-}
-
-// cut down and compute segment
-inline double get_segment_constrained(double seg1L1, double seg1L2, double seg2L1, double seg2L2, double seg1V1, double seg1V2, double seg2V1, double seg2V2)  
-{
-    //We have a valid range
-    double valStart1, valEnd1, valStart2, valEnd2;   
-    double start,end; 
-    start = std::max(seg1L1,seg2L1); 
-    end   = std::min(seg1L2,seg2L2); 
-    if (start<end)
-    {
-      valStart1 = seg1V1 + (seg1V2-seg1V1)*(start - seg1L1)/(seg1L2 - seg1L1);
-      valEnd1   = seg1V1 + (seg1V2-seg1V1)*(end   - seg1L1)/(seg1L2 - seg1L1);
-      valStart2 = seg2V1 + (seg2V2-seg2V1)*(start - seg2L1)/(seg2L2 - seg2L1);
-      valEnd2   = seg2V1 + (seg2V2-seg2V1)*(end   - seg2L1)/(seg2L2 - seg2L1);
-      return get_segment(start,end,valStart1,valEnd1,valStart2,valEnd2);
-    }
-    else
-    {
-      return 0;
-    } 
-}
-
-//' @title
-//' Compute EMD
-////'
-////' @param loc1 numeric vector.
-////' @param val1 numeric vector.
-////' @param loc2 numeric vector.
-////' @param val2 numeric vector.
-//'
-//' @export
-// [[Rcpp::export]]
-double NetEmdSmoothV2_old(NumericVector loc1,NumericVector val1,double binWidth1,NumericVector loc2,NumericVector val2,double binWidth2)
-{
-  int index1,index2;
-  
-  double res=0;
-  
-  // Hist 1
-  double curSeg1Loc1; 
-  double curSeg1Loc2; 
-  double curSeg1Val1; 
-  double curSeg1Val2; 
-  
-  // Hist 1
-  double curSeg2Loc1; 
-  double curSeg2Loc2; 
-  double curSeg2Val1; 
-  double curSeg2Val2; 
-  
-  // need to iterate through regions of constant gradient
-  int i123;
-  double secondStart=-1;
-  
-  double maxLoc = std::max(loc1[loc1.size()-1] +binWidth1,loc2[loc2.size()-1]+binWidth2 );
-  double minLoc = std::min(loc1[0],loc2[0]);
- 
- // Dealing with the non overlapping piece at the beginning
-  if (loc2[0]<loc1[0]) 
-  {
-    curSeg1Loc1=minLoc; 
-    curSeg1Loc2=loc1[0]; 
-    curSeg1Val1=0;
-    curSeg1Val2=0;
-    for (index2=0;index2<loc2.size();index2++) 
-    {
-            curSeg2Loc1=loc2[index2]; 
-            curSeg2Loc2=loc2[index2]+binWidth2; 
-            if (index2==0)
-              {curSeg2Val1=0;}
-            else
-              {curSeg2Val1=val2[index2-1];}
-            curSeg2Val2 = val2[index2]; 
-            
-            // add this segment 
-            res += get_segment_constrained(curSeg1Loc1, curSeg1Loc2, curSeg2Loc1, curSeg2Loc2, curSeg1Val1, curSeg1Val2, curSeg2Val1, curSeg2Val2); 
-            
-            curSeg2Loc1=loc2[index2]+binWidth2; 
-            if (index2==loc2.size()-1)
-              {curSeg2Loc2= maxLoc;}
-            else
-              {curSeg2Loc2=loc2[index2+1];}
-            curSeg2Val1=val2[index2]; 
-            curSeg2Val2=val2[index2]; 
-        
-            res += get_segment_constrained(curSeg1Loc1, curSeg1Loc2, curSeg2Loc1, curSeg2Loc2, curSeg1Val1, curSeg1Val2, curSeg2Val1, curSeg2Val2); 
-            
-            if (curSeg1Loc2<curSeg2Loc1)
-              {break;}
-        }
-  }
-  else
-  {
-    curSeg2Loc1=minLoc; 
-    curSeg2Loc2=loc2[0]; 
-    curSeg2Val1=0;
-    curSeg2Val2=0;
-    for (index1=0;index1<loc1.size();index1++) 
-    {
-            curSeg1Loc1=loc1[index1]; 
-            curSeg1Loc2=loc1[index1]+binWidth2; 
-            if (index1==0)
-              {curSeg1Val1=0;}
-            else
-              {curSeg1Val1=val1[index1-1];}
-            curSeg1Val2 = val1[index1]; 
-            
-            // add this segment 
-            res += get_segment_constrained(curSeg1Loc1, curSeg1Loc2, curSeg2Loc1, curSeg2Loc2, curSeg1Val1, curSeg1Val2, curSeg2Val1, curSeg2Val2); 
-            
-            curSeg1Loc1=loc1[index1]+binWidth1; 
-            if (index1==loc1.size()-1)
-              {curSeg1Loc2= maxLoc;}
-            else
-              {curSeg1Loc2=loc1[index1+1];}
-            curSeg1Val1=val1[index1]; 
-            curSeg1Val2=val1[index1]; 
-        
-            res += get_segment_constrained(curSeg1Loc1, curSeg1Loc2, curSeg2Loc1, curSeg2Loc2, curSeg1Val1, curSeg1Val2, curSeg2Val1, curSeg2Val2); 
-            
-            if (curSeg2Loc2<curSeg1Loc1)
-              {break;}
-        }
-  }
-  secondStart = 0; 
-  for (index1=0;index1<loc1.size();index1++) 
-  {
-      // loop over the linear and constant section of segment 
-      // 0 is linear, 1 is the constant
-      for (i123=0;i123<2;i123++) 
-      {
-        if (i123==0)
-        {
-          curSeg1Loc1=loc1[index1]; 
-          curSeg1Loc2=loc1[index1]+binWidth1; 
-          if (index1==0)
-            {curSeg1Val1=0;}
-          else
-            {curSeg1Val1=val1[index1-1];}
-        }
-        else
-        {
-          curSeg1Loc1=loc1[index1]+binWidth1; 
-          if (index1==loc1.size()-1)
-            {curSeg1Loc2=maxLoc; }
-          else
-            {curSeg1Loc2=loc1[index1+1];}
-          curSeg1Val1=val1[index1]; 
-        }
-        curSeg1Val2=val1[index1]; 
-        // shortcut out of this section
-        if (curSeg1Loc1==curSeg1Loc2)
-          {continue;}
-        for (index2=secondStart;index2<loc2.size();index2++) 
-        {
-                if (index2==-1)
-                {
-                  curSeg2Loc1=minLoc;
-                  curSeg2Loc2=loc2[0]; 
-                  curSeg2Val1=0;
-                  curSeg2Val2=0;
-                }
-                else
-                {
-                    curSeg2Loc1=loc2[index2]; 
-                    curSeg2Loc2=loc2[index2]+binWidth2; 
-                    if (index2==0)
-                      {curSeg2Val1=0;}
-                    else
-                      {curSeg2Val1=val2[index2-1];}
-                    curSeg2Val2 = val2[index2]; 
-                }
-              
-                res += get_segment_constrained(curSeg1Loc1, curSeg1Loc2, curSeg2Loc1, curSeg2Loc2, curSeg1Val1, curSeg1Val2, curSeg2Val1, curSeg2Val2); 
-                
-                if (index2==-1)
-                  {continue;}
-                else
-                {
-                    curSeg2Loc1=loc2[index2]+binWidth2; 
-                    if (index2==loc2.size()-1)
-                      {curSeg2Loc2=maxLoc; }
-                    else
-                      {curSeg2Loc2=loc2[index2+1];}
-                    curSeg2Val1=val2[index2]; 
-                    curSeg2Val2=val2[index2]; 
-                }
-              
-              res += get_segment_constrained(curSeg1Loc1, curSeg1Loc2, curSeg2Loc1, curSeg2Loc2, curSeg1Val1, curSeg1Val2, curSeg2Val1, curSeg2Val2); 
-              
-              if (curSeg1Loc2<curSeg2Loc1)
-              {
-                break;
-              }
-          }
-    }
-  }
-  return res;
-}

From b0eb0a527e2bf8cbeca4d8c8a631ae763f9f3aa8 Mon Sep 17 00:00:00 2001
From: Andrew Elliott <ande.elliott@gmail.com>
Date: Wed, 8 Jun 2022 23:17:24 +0100
Subject: [PATCH 78/84] adding tests with previous version rather than
 integrate

---
 tests/testthat/test_fastEMD.R | 73 ++++++++++++++++++++++++++++++++++-
 1 file changed, 71 insertions(+), 2 deletions(-)

diff --git a/tests/testthat/test_fastEMD.R b/tests/testthat/test_fastEMD.R
index 2387f16e..d8c15b83 100644
--- a/tests/testthat/test_fastEMD.R
+++ b/tests/testthat/test_fastEMD.R
@@ -83,6 +83,31 @@ getVal <- function(x1,v1,w1,x2,v2,w2)
   integrate(f1,mi123,ma123)
 }
 
+getRvalue <- function(x1,v1,w1,x2,v2,w2)
+{
+  v1s <- c(v1[1],diff(v1))
+  v2s <- c(v2[1],diff(v2))
+  d1 = dhist(x1+w1/2,v1s,smoothing_window_width=w1)  
+  d2 = dhist(x2+w2/2,v2s,smoothing_window_width=w2)
+  d1ec <- dhist_ecmf(d1)
+  d2ec <- dhist_ecmf(d2)
+  area_between_dhist_ecmfs(d1ec,d2ec)
+}
+ 
+
+compareRonlyVsOpt <- function(x1,v1,w1,x2,v2,w2)
+{
+  v1s <- c(v1[1],diff(v1))
+  v2s <- c(v2[1],diff(v2))
+  d1 = dhist(x1,v1s,smoothing_window_width=w1)  
+  d2 = dhist(x2,v2s,smoothing_window_width=w2)
+  res1 <- netdist::netemd_single_pair(d1,d2,method = "optimise")
+  res2 <- netdist::netemd_single_pair(d1,d2,method = "optimiseRonly")
+  expect_lt(abs(res1$min_emd-res2$min_emd),10**(-4))
+  c(res1$min_emd,res2$min_emd)
+}
+
+
 
 test_that("3 element test", {
   
@@ -264,16 +289,22 @@ test_that("many element test Mixture ", {
                     f1 <- makeFunction(x1,v1,w1,x2,v2,w2)
                     top1 = max(x2[length(x2)],x1[length(x1)])+max(w1,w2)
                     bottom1 = min(x2[1],x1[1])
+                    
+                    q1 = compareRonlyVsOpt(x1,v1,w1,x2,v2,w2)
+                    
                     res2 <- 0 
                     res2 <- res2 + integrate(f1,bottom1,top1,abs.tol=0.000000001,subdivisions = 100000000)[[1]]
                     
                     res1 <- NetEmdSmoothV2(x1,v1,w1,x2,v2,w2)
+                    
+                    res3 <- getRvalue(x1,v1,w1,x2,v2,w2)
+               #     print(c(q1,res1,res2,res3))
                     #  if (abs(res1-res2)>0.001)
                     #  {
                     #    browser()
                     #  }
                     # Swapped to percentage error
-                    expect_lt(abs(res1-res2),10**(-3))
+                    expect_lt(abs(res1-res3),10**(-3))
                     
                   }
                 }
@@ -338,4 +369,42 @@ test_that("Old Failure Case 2 reverse", {
   res1 <- integrate(f1,min(min(x1),min(x2)),max(max(x1),max(x2))+max(w1,w2))[[1]]
   res2 <- NetEmdSmoothV2(x1,v1,w1,x2,v2,w2)
   expect_lt(abs(res2-res1),10**(-4))
-})
\ No newline at end of file
+})
+
+
+test_that("equal distributions moving upwards",{ 
+  x2 = 1:10
+  v2 = 1:10 
+  w2 = 0.5
+  x1 = 1:10
+  v1 = c(1,2)
+  v1 = 1:10 
+  w1 = 0.5
+  for (i in 0:10)
+  {
+    res2 <- NetEmdSmoothV2(x1,v1+i,w1,x2,v2,w2)
+    res3 <- getRvalue(x1,v1+i,w1,x2,v2,w2)
+    print(c(i,res2,res3))
+    expect_lt(abs(res2-9.25*i),10**(-4))
+    expect_lt(abs(res2-res3),10**(-4))
+  }
+  }
+  )
+
+test_that("equal distributions moving upwards diff width",{ 
+  x2 = 1:10
+  v2 = 1:10 
+  w2 = 0.5
+  x1 = 1:10
+  v1 = c(1,2)
+  v1 = 1:10 
+  w1 = 0.25
+  for (i in 0:10)
+  {
+    res2 <- NetEmdSmoothV2(x1,v1+i,w1,x2,v2,w2)
+    res3 <- getRvalue(x1,v1+i,w1,x2,v2,w2)
+    print(c(i,res2,res3))
+    expect_lt(abs(res2-res3),10**(-4))
+  }
+  }
+  )
\ No newline at end of file

From 2158ae5b7d07391f36b9d20de361845e94bdf606 Mon Sep 17 00:00:00 2001
From: jack89roberts <jroberts@turing.ac.uk>
Date: Thu, 9 Jun 2022 11:32:16 +0100
Subject: [PATCH 79/84] linting

---
 NAMESPACE                     |   2 +-
 R/RcppExports.R               |  15 +-
 R/emd.R                       |  42 ++-
 R/net_emd_speed_benchmark.R   |  57 +--
 src/RcppExports.cpp           |  10 +-
 src/fastSmoothV2.cpp          |   2 +-
 src/fastSmoothV2.h            |   2 +-
 tests/testthat/test_fastEMD.R | 684 +++++++++++++++++-----------------
 8 files changed, 415 insertions(+), 399 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index a52637aa..4df2f57d 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,6 +1,6 @@
 # Generated by roxygen2: do not edit by hand
 
-export(NetEmdSmoothV2)
+export(netemd_smooth)
 export(adaptive_breaks)
 export(area_between_dhist_ecmfs)
 export(as_smoothed_dhist)
diff --git a/R/RcppExports.R b/R/RcppExports.R
index 68bc49fc..ebeb415f 100644
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -12,7 +12,7 @@ counts_from_observations <- function(features) {
 }
 
 #' @title
-#' Compute Earth Mover's Distance (EMD) between two Empirical Cumulative
+#' Compute Earth Mover's Distance (EMD) between two Empirical Cumulative 
 #' Density Functions (ECDFs)
 #'
 #' @param locations1 Locations for ECDF 1
@@ -37,7 +37,14 @@ NULL
 
 #'
 #' @export
-NetEmdSmoothV2 <- function(loc1, val1, binWidth1, loc2, val2, binWidth2) {
-    .Call(`_netdist_NetEmdSmoothV2`, loc1, val1, binWidth1, loc2, val2, binWidth2)
+netemd_smooth <- function(loc1, val1, binWidth1, loc2, val2, binWidth2) {
+    .Call(
+        `_netdist_netemd_smooth`,
+        loc1,
+        val1,
+        binWidth1,
+        loc2,
+        val2,
+        binWidth2
+    )
 }
-
diff --git a/R/emd.R b/R/emd.R
index a228b5d1..462a0f71 100644
--- a/R/emd.R
+++ b/R/emd.R
@@ -80,28 +80,26 @@ min_emd_optimise_fast <- function(dhist1, dhist2) {
     min_emd <- soln$objective
     min_offset <- soln$minimum
     return(list(min_emd = min_emd, min_offset = min_offset))
-  }
-  else #if ((dhist1$smoothing_window_width==1) && (dhist2$smoothing_window_width==1))
+  } else
   {
     val1 <- cumsum(dhist1$masses)
     val2 <- cumsum(dhist2$masses)
-    val1 <- val1/val1[length(val1)]
-    val2 <- val2/val2[length(val2)]
-    loc1=dhist1$locations
-    loc2=dhist2$locations
-    binWidth1=dhist1$smoothing_window_width
-    binWidth2=dhist2$smoothing_window_width
-    count=0
+    val1 <- val1 / val1[length(val1)]
+    val2 <- val2 / val2[length(val2)]
+    loc1 <- dhist1$locations
+    loc2 <- dhist2$locations
+    bin_width_1 <- dhist1$smoothing_window_width
+    bin_width_2 <- dhist2$smoothing_window_width
     # Offset the histograms to make the alignments work
-    loc1Mod <- loc1 - binWidth1/2 
-    loc2Mod <- loc2 - binWidth2/2 
+    loc1_mod <- loc1 - bin_width_1 / 2
+    loc2_mod <- loc2 - bin_width_2 / 2
     # Determine minimum and maximum offset of range in which histograms overlap
     # (based on sliding histogram 1)
-    min_offset <- min(loc2Mod) - max(loc1Mod) - max(binWidth1,binWidth2)
-    max_offset <- max(loc2Mod) - min(loc1Mod) + max(binWidth1,binWidth2)
+    min_offset <- min(loc2_mod) - max(loc1_mod) - max(bin_width_1, bin_width_2)
+    max_offset <- max(loc2_mod) - min(loc1_mod) + max(bin_width_1, bin_width_2)
     # Set lower and upper range for optimise algorithm to be somewhat wider than
     # range defined by the minimum and maximum offset. This guards against a
-    # couple of issues that arise if the optimise range is exactly min_offset 
+    # couple of issues that arise if the optimise range is exactly min_offset
     # to max_offset
     # 1) If lower and upper are equal, the optimise method will throw and error
     # 2) It seems that optimise is not guaranteed to explore its lower and upper
@@ -112,18 +110,26 @@ min_emd_optimise_fast <- function(dhist1, dhist2) {
     max_offset <- max_offset + buffer
     # Define a single parameter function to minimise emd as a function of offset
     emd_offset <- function(offset) {
-      temp1<- NetEmdSmoothV2(loc1Mod+offset,val1,binWidth1,loc2Mod,val2,binWidth2)
+      temp1 <- netemd_smooth(
+        loc1_mod + offset,
+        val1,
+        bin_width_1,
+        loc2_mod,
+        val2,
+        bin_width_2
+      )
       temp1
     }
     # Get solution from optimiser
-    soln <- stats::optimise(emd_offset, lower = min_offset, upper = max_offset, 
-                            tol = .Machine$double.eps*1000)
+    soln <- stats::optimise(emd_offset,
+      lower = min_offset, upper = max_offset,
+      tol = .Machine$double.eps * 1000
+    )
     # Return mnimum EMD and associated offset
     min_emd <- soln$objective
     min_offset <- soln$minimum
     return(list(min_emd = min_emd, min_offset = min_offset))
   }
-
 }
 
 
diff --git a/R/net_emd_speed_benchmark.R b/R/net_emd_speed_benchmark.R
index eb63bd7c..fc751115 100644
--- a/R/net_emd_speed_benchmark.R
+++ b/R/net_emd_speed_benchmark.R
@@ -34,42 +34,45 @@ netemd_speed_test <- function() {
       netemd_time <- append(netemd_time, as.double(end_time - netemd_start))
     }
   }
-  list(gddBuildTime = gdd_build_time, netEMDtime = netemd_time)
+  list(gdd_build_time = gdd_build_time, netemd_time = netemd_time)
 }
 
 #' @export
-netEMDSpeedTestSmooth <- function()
-{
-  ##load the data
+net_emd_speed_test_smooth <- function() {
+  ## load the data
   source_dir <- system.file(file.path("extdata", "random"), package = "netdist")
   print(source_dir)
-  edge_format = "ncol"
-  file_pattern = ""
-  #    source_dir <- system.file(file.path("extdata", "VRPINS"), package = "netdist")
-  #    edge_format = "ncol"
-  #    file_pattern = ".txt"
-  graphs <- read_simple_graphs(source_dir = source_dir, format = edge_format, pattern = file_pattern)
-  n1=names(graphs)
-  lab1=c()
-  gddBuildTime=c()
-  netEMDtime=c()
+  edge_format <- "ncol"
+  file_pattern <- ""
+  graphs <- read_simple_graphs(
+    source_dir = source_dir,
+    format = edge_format,
+    pattern = file_pattern
+  )
+  n1 <- names(graphs)
+  lab1 <- c()
+  gdd_build_time <- c()
+  netemd_time <- c()
   for (i in 1:length(graphs))
   {
     for (j in 1:(i))
     {
-      g1=graphs[[i]]
-      g2=graphs[[j]]
-      lab1=append(lab1,paste(n1[i],n1[j],sep=','))
-      print(paste(n1[i],n1[j],sep=','))
-      fulltimeStart=Sys.time()
-      gdd1=gdd(g1)
-      gdd2=gdd(g2)
-      netEMDStart=Sys.time()
-      net_emd(gdd1,gdd2,smoothing_window_width = 1)
-      endTime=Sys.time()
-      gddBuildTime=append(gddBuildTime,as.double(netEMDStart-fulltimeStart))
-      netEMDtime=append(netEMDtime,as.double(endTime-netEMDStart))
+      g1 <- graphs[[i]]
+      g2 <- graphs[[j]]
+      lab1 <- append(lab1, paste(n1[i], n1[j], sep = ","))
+      print(paste(n1[i], n1[j], sep = ","))
+      fulltime_start <- Sys.time()
+      gdd1 <- gdd(g1)
+      gdd2 <- gdd(g2)
+      netemd_start <- Sys.time()
+      net_emd(gdd1, gdd2, smoothing_window_width = 1)
+      endTime <- Sys.time()
+      gdd_build_time <- append(
+        gdd_build_time,
+        as.double(netemd_start - fulltime_start)
+      )
+      netemd_time <- append(netemd_time, as.double(endTime - netemd_start))
     }
   }
-  list(gddBuildTime = gddBuildTime, netEMDtime = netEMDtime)
+  list(gdd_build_time = gdd_build_time, netemd_time = netemd_time)
 }
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
index 0ee8eaf6..f020ef84 100644
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -35,9 +35,9 @@ BEGIN_RCPP
     return rcpp_result_gen;
 END_RCPP
 }
-// NetEmdSmoothV2
-double NetEmdSmoothV2(NumericVector loc1, NumericVector val1, double binWidth1, NumericVector loc2, NumericVector val2, double binWidth2);
-RcppExport SEXP _netdist_NetEmdSmoothV2(SEXP loc1SEXP, SEXP val1SEXP, SEXP binWidth1SEXP, SEXP loc2SEXP, SEXP val2SEXP, SEXP binWidth2SEXP) {
+// netemd_smooth
+double netemd_smooth(NumericVector loc1, NumericVector val1, double binWidth1, NumericVector loc2, NumericVector val2, double binWidth2);
+RcppExport SEXP _netdist_netemd_smooth(SEXP loc1SEXP, SEXP val1SEXP, SEXP binWidth1SEXP, SEXP loc2SEXP, SEXP val2SEXP, SEXP binWidth2SEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
@@ -47,7 +47,7 @@ BEGIN_RCPP
     Rcpp::traits::input_parameter< NumericVector >::type loc2(loc2SEXP);
     Rcpp::traits::input_parameter< NumericVector >::type val2(val2SEXP);
     Rcpp::traits::input_parameter< double >::type binWidth2(binWidth2SEXP);
-    rcpp_result_gen = Rcpp::wrap(NetEmdSmoothV2(loc1, val1, binWidth1, loc2, val2, binWidth2));
+    rcpp_result_gen = Rcpp::wrap(netemd_smooth(loc1, val1, binWidth1, loc2, val2, binWidth2));
     return rcpp_result_gen;
 END_RCPP
 }
@@ -57,7 +57,7 @@ RcppExport SEXP run_testthat_tests();
 static const R_CallMethodDef CallEntries[] = {
     {"_netdist_counts_from_observations", (DL_FUNC) &_netdist_counts_from_observations, 1},
     {"_netdist_emd_fast_no_smoothing", (DL_FUNC) &_netdist_emd_fast_no_smoothing, 4},
-    {"_netdist_NetEmdSmoothV2", (DL_FUNC) &_netdist_NetEmdSmoothV2, 6},
+    {"_netdist_netemd_smooth", (DL_FUNC) &_netdist_netemd_smooth, 6},
     {"run_testthat_tests", (DL_FUNC) &run_testthat_tests, 0},
     {NULL, NULL, 0}
 };
diff --git a/src/fastSmoothV2.cpp b/src/fastSmoothV2.cpp
index 31ce2031..d1c208b6 100644
--- a/src/fastSmoothV2.cpp
+++ b/src/fastSmoothV2.cpp
@@ -137,7 +137,7 @@ double get_double_segment_constrained(
 //'
 //' @export
 // [[Rcpp::export]]
-double NetEmdSmoothV2(NumericVector loc1, NumericVector val1, double binWidth1,
+double netemd_smooth(NumericVector loc1, NumericVector val1, double binWidth1,
                       NumericVector loc2, NumericVector val2, double binWidth2)
 {
   OverlappingSegments segs(loc1, loc2, binWidth1, binWidth2);
diff --git a/src/fastSmoothV2.h b/src/fastSmoothV2.h
index 77e3d461..e734080b 100644
--- a/src/fastSmoothV2.h
+++ b/src/fastSmoothV2.h
@@ -230,7 +230,7 @@ double get_double_segment_constrained(
   double seg2Loc1, double seg2Loc2, double seg2Loc3,
   double seg2Val1, double seg2Val2);
 
-double NetEmdSmoothV2(NumericVector loc1, NumericVector val1, double binWidth1,
+double netemd_smooth(NumericVector loc1, NumericVector val1, double binWidth1,
                       NumericVector loc2, NumericVector val2, double binWidth2);
 
 #endif // FASTSMOOTHV2_H
diff --git a/tests/testthat/test_fastEMD.R b/tests/testthat/test_fastEMD.R
index d8c15b83..7d74fd34 100644
--- a/tests/testthat/test_fastEMD.R
+++ b/tests/testthat/test_fastEMD.R
@@ -2,409 +2,409 @@ context("Name of test context")
 
 library(Rcpp)
 
-makeFunction <- function(x1,v1,w1,x2,v2,w2)
-{
-  f1 <- function(x)
-  {
-    res1 = rep(0,length(x))
-    m1 = x1[1]<x
-    m2 = x<=(x1[1]+w1)
-    m3 = m1*m2
-    res1 = res1 + m3*(x-x1[1])*(v1[1]-0)/w1
-    if (length(x1)>1)
-    {
-      m1 = (x1[1]+w1)<x
-      m2 = x<=(x1[2])
-      m3 = m1*m2
-      res1 = res1 + m3*v1[1] 
+make_function <- function(x1, v1, w1, x2, v2, w2) {
+  f1 <- function(x) {
+    res1 <- rep(0, length(x))
+    m1 <- x1[1] < x
+    m2 <- x <= (x1[1] + w1)
+    m3 <- m1 * m2
+    res1 <- res1 + m3 * (x - x1[1]) * (v1[1] - 0) / w1
+    if (length(x1) > 1) {
+      m1 <- (x1[1] + w1) < x
+      m2 <- x <= (x1[2])
+      m3 <- m1 * m2
+      res1 <- res1 + m3 * v1[1]
       for (i in 2:length(x1))
       {
-        m1 = x1[i]<x
-        m2 = x<=(x1[i]+w1)
-        m3 = m1*m2
-        res1 = res1 + m3*((x-x1[i])*(v1[i]-v1[i-1])/w1+v1[i-1])
-        if (i<length(x1)) 
-        {
-          m1 = (x1[i]+w1)<x
-          m2 = x<=(x1[i+1])
-          m3 = m1*m2
-          res1 = res1 + m3*v1[i]
+        m1 <- x1[i] < x
+        m2 <- x <= (x1[i] + w1)
+        m3 <- m1 * m2
+        res1 <- res1 + m3 * ((x - x1[i]) * (v1[i] - v1[i - 1]) / w1 + v1[i - 1])
+        if (i < length(x1)) {
+          m1 <- (x1[i] + w1) < x
+          m2 <- x <= (x1[i + 1])
+          m3 <- m1 * m2
+          res1 <- res1 + m3 * v1[i]
         }
-  
       }
-      
     }
-    m1 = x>(x1[length(v1)]+w1)
-    res1 = res1 + m1*v1[length(v1)]
-
-    
-    
-    res2= rep(0,length(x))
-    m1 = x2[1]<x
-    m2 = x<=(x2[1]+w2)
-    m3 = m1*m2
-    res2 = m3*(x-x2[1])*(v2[1]-0)/w2
-    
-    if (length(x2)>1)
-    {
-      m1 = (x2[1]+w2)<x
-      m2 = x<=(x2[2])
-      m3 = m1*m2
-      res2 = res2 + m3*v2[1] 
+    m1 <- x > (x1[length(v1)] + w1)
+    res1 <- res1 + m1 * v1[length(v1)]
+
+
+
+    res2 <- rep(0, length(x))
+    m1 <- x2[1] < x
+    m2 <- x <= (x2[1] + w2)
+    m3 <- m1 * m2
+    res2 <- m3 * (x - x2[1]) * (v2[1] - 0) / w2
+
+    if (length(x2) > 1) {
+      m1 <- (x2[1] + w2) < x
+      m2 <- x <= (x2[2])
+      m3 <- m1 * m2
+      res2 <- res2 + m3 * v2[1]
     }
-    
+
     for (i in 2:length(x2))
     {
-      m1 = x2[i]<x
-      m2 = x<=(x2[i]+w2)
-      m3 = m1*m2
-      res2 = res2 + m3*((x-x2[i])*(v2[i]-v2[i-1])/w2+v2[i-1])
-      if (i<length(x2)) 
-      {
-        m1 = (x2[i]+w2)<x
-        m2 = x<=(x2[i+1])
-        m3 = m1*m2
-        res2 = res2 + m3*v2[i]
+      m1 <- x2[i] < x
+      m2 <- x <= (x2[i] + w2)
+      m3 <- m1 * m2
+      res2 <- res2 + m3 * ((x - x2[i]) * (v2[i] - v2[i - 1]) / w2 + v2[i - 1])
+      if (i < length(x2)) {
+        m1 <- (x2[i] + w2) < x
+        m2 <- x <= (x2[i + 1])
+        m3 <- m1 * m2
+        res2 <- res2 + m3 * v2[i]
       }
     }
-    m1 = x>(w2+x2[length(v2)])
-    res2 = res2 + m1*v2[length(v2)]
-    
-    abs(res1-res2)
+    m1 <- x > (w2 + x2[length(v2)])
+    res2 <- res2 + m1 * v2[length(v2)]
+
+    abs(res1 - res2)
   }
   f1
 }
 
-getVal <- function(x1,v1,w1,x2,v2,w2)
-{
-  f1 <- makeFunction(x1,v1,w1,x2,v2,w2)
-  mi123 = min(min(x1),min(x2))
-  ma123 = max(max(x1)+w1,max(x2)+w2)
-  integrate(f1,mi123,ma123)
+get_val <- function(x1, v1, w1, x2, v2, w2) {
+  f1 <- make_function(x1, v1, w1, x2, v2, w2)
+  mi123 <- min(min(x1), min(x2))
+  ma123 <- max(max(x1) + w1, max(x2) + w2)
+  integrate(f1, mi123, ma123)
 }
 
-getRvalue <- function(x1,v1,w1,x2,v2,w2)
-{
-  v1s <- c(v1[1],diff(v1))
-  v2s <- c(v2[1],diff(v2))
-  d1 = dhist(x1+w1/2,v1s,smoothing_window_width=w1)  
-  d2 = dhist(x2+w2/2,v2s,smoothing_window_width=w2)
+get_R_value <- function(x1, v1, w1, x2, v2, w2) {
+  v1s <- c(v1[1], diff(v1))
+  v2s <- c(v2[1], diff(v2))
+  d1 <- dhist(x1 + w1 / 2, v1s, smoothing_window_width = w1)
+  d2 <- dhist(x2 + w2 / 2, v2s, smoothing_window_width = w2)
   d1ec <- dhist_ecmf(d1)
   d2ec <- dhist_ecmf(d2)
-  area_between_dhist_ecmfs(d1ec,d2ec)
+  area_between_dhist_ecmfs(d1ec, d2ec)
 }
- 
-
-compareRonlyVsOpt <- function(x1,v1,w1,x2,v2,w2)
-{
-  v1s <- c(v1[1],diff(v1))
-  v2s <- c(v2[1],diff(v2))
-  d1 = dhist(x1,v1s,smoothing_window_width=w1)  
-  d2 = dhist(x2,v2s,smoothing_window_width=w2)
-  res1 <- netdist::netemd_single_pair(d1,d2,method = "optimise")
-  res2 <- netdist::netemd_single_pair(d1,d2,method = "optimiseRonly")
-  expect_lt(abs(res1$min_emd-res2$min_emd),10**(-4))
-  c(res1$min_emd,res2$min_emd)
+
+
+compareRonlyVsOpt <- function(x1, v1, w1, x2, v2, w2) {
+  v1s <- c(v1[1], diff(v1))
+  v2s <- c(v2[1], diff(v2))
+  d1 <- dhist(x1, v1s, smoothing_window_width = w1)
+  d2 <- dhist(x2, v2s, smoothing_window_width = w2)
+  res1 <- netdist::netemd_single_pair(d1, d2, method = "optimise")
+  res2 <- netdist::netemd_single_pair(d1, d2, method = "optimiseRonly")
+  expect_lt(abs(res1$min_emd - res2$min_emd), 10**(-4))
+  c(res1$min_emd, res2$min_emd)
 }
 
 
 
 test_that("3 element test", {
-  
-            for (w1 in (1:10)/10.0)
-            {
-              for (w2 in (1:10)/10.0)
-              {
-                x1 <- c(1,2,3)
-                v1 <- c(0.25,0.70,1.00)
-                x2 <- c(1,2,3)
-                v2 <- c(0.25,0.70,1.00)
-                f1 <- makeFunction(x1,v1,w1,x2,v2,w2)
-                res2 <- integrate(f1,0,max(x2[3],x1[3])+max(w1,w2),abs.tol=0.000000001)[[1]]
-                
-                res1 <- NetEmdSmoothV2(x1,v1,w1,x2,v2,w2)
-                
-                expect_lt(abs(res1-res2),10**(-3))
-              }
-            }
-            })
+  for (w1 in (1:10) / 10.0)
+  {
+    for (w2 in (1:10) / 10.0)
+    {
+      x1 <- c(1, 2, 3)
+      v1 <- c(0.25, 0.70, 1.00)
+      x2 <- c(1, 2, 3)
+      v2 <- c(0.25, 0.70, 1.00)
+      f1 <- make_function(x1, v1, w1, x2, v2, w2)
+      res2 <- integrate(
+        f1, 0, max(x2[3], x1[3]) + max(w1, w2), abs.tol = 0.000000001
+      )[[1]]
+
+      res1 <- netemd_smooth(x1, v1, w1, x2, v2, w2)
+
+      expect_lt(abs(res1 - res2), 10**(-3))
+    }
+  }
+})
 
 test_that("2 element test w1=0.1, w2=0.2", {
-  
-  w1 = 0.1
-  w2 = 0.2
-  x1 <- c(1,2)
-  v1 <- c(0.25,0.75)
-  x2 <- c(1,2)
-  v2 <- c(0.5,1.00)
-  f1 <- makeFunction(x1,v1,w1,x2,v2,w2)
-  res2 <- integrate(f1,0,max(x2[2],x1[2])+max(w1,w2),abs.tol=0.000000001)[[1]]
-  res1 <- NetEmdSmoothV2(x1,v1,w1,x2,v2,w2)
-  
-  expect_lt(abs(res1-res2),10**(-4))
+  w1 <- 0.1
+  w2 <- 0.2
+  x1 <- c(1, 2)
+  v1 <- c(0.25, 0.75)
+  x2 <- c(1, 2)
+  v2 <- c(0.5, 1.00)
+  f1 <- make_function(x1, v1, w1, x2, v2, w2)
+  res2 <- integrate(
+    f1, 0, max(x2[2], x1[2]) + max(w1, w2), abs.tol = 0.000000001
+  )[[1]]
+  res1 <- netemd_smooth(x1, v1, w1, x2, v2, w2)
+
+  expect_lt(abs(res1 - res2), 10**(-4))
 })
 
 
 test_that("1 element at 0 vs many test Mixture", {
-  
-  for (w1 in (1:10)*2)
+  for (w1 in (1:10) * 2)
   {
-      x1 <- c(0)
-      v1 <- c(1.00)
-      x2 <- 1:w1
-      v2 <- (1:w1)/w1
-      f1 <- makeFunction(x1,v1,1,x2,v2,1)
-      res2 <- integrate(f1,0,w1+1,abs.tol=0.000000001)[[1]]
-      
-      res1 <- NetEmdSmoothV2(x1,v1,1,x2,v2,1)
-      
-      expect_lt(abs(res1-res2),10**(-3))
-    }
+    x1 <- c(0)
+    v1 <- c(1.00)
+    x2 <- 1:w1
+    v2 <- (1:w1) / w1
+    f1 <- make_function(x1, v1, 1, x2, v2, 1)
+    res2 <- integrate(f1, 0, w1 + 1, abs.tol = 0.000000001)[[1]]
+
+    res1 <- netemd_smooth(x1, v1, 1, x2, v2, 1)
+
+    expect_lt(abs(res1 - res2), 10**(-3))
+  }
 })
 
 
 test_that("1 element vs many test Mixture", {
-  
-  for (w1 in (1:10)*2)
+  for (w1 in (1:10) * 2)
   {
-      x1 <- c(w1/2)
-      v1 <- c(1.00)
-      x2 <- 1:w1
-      v2 <- (1:w1)/w1
-      f1 <- makeFunction(x1,v1,1,x2,v2,1)
-      res2 <- integrate(f1,0,w1+1,abs.tol=0.000000001)[[1]]
-      
-      res1 <- NetEmdSmoothV2(x1,v1,1,x2,v2,1)
-      
-      expect_lt(abs(res1-res2),10**(-3))
+    x1 <- c(w1 / 2)
+    v1 <- c(1.00)
+    x2 <- 1:w1
+    v2 <- (1:w1) / w1
+    f1 <- make_function(x1, v1, 1, x2, v2, 1)
+    res2 <- integrate(f1, 0, w1 + 1, abs.tol = 0.000000001)[[1]]
+
+    res1 <- netemd_smooth(x1, v1, 1, x2, v2, 1)
+
+    expect_lt(abs(res1 - res2), 10**(-3))
+  }
+})
+
+
+
+test_that("3 element test Mixture", {
+  for (w1 in (1:10) / 10.0)
+  {
+    for (w2 in (1:10) / 10.0)
+    {
+      x1 <- c(1, 2, 3)
+      v1 <- c(0.65, 0.70, 1.00)
+      x2 <- c(1, 2, 3)
+      v2 <- c(0.25, 0.70, 1.00)
+      f1 <- make_function(x1, v1, w1, x2, v2, w2)
+      res2 <- integrate(
+        f1, 0, max(x2[3], x1[3]) + max(w1, w2), abs.tol = 0.000000001
+      )[[1]]
+
+      res1 <- netemd_smooth(x1, v1, w1, x2, v2, w2)
+
+      expect_lt(abs(res1 - res2), 10**(-3))
     }
+  }
 })
 
+test_that("3 element test Mixture MidPoint", {
+  w1 <- 1
+  w2 <- 1
+  for (v1_2 in (1:10) / 10.0)
+  {
+    for (v2_2 in (1:10) / 10.0)
+    {
+      x1 <- c(1, 2, 3)
+      v1 <- c(0.1, v1_2, 1.00)
+      x2 <- c(1, 2, 3)
+      v2 <- c(0.1, v2_2, 1.00)
+      f1 <- make_function(x1, v1, w1, x2, v2, w2)
+      res2 <- integrate(
+        f1, 0, max(x2[3], x1[3]) + max(w1, w2), abs.tol = 0.000000001
+      )[[1]]
+
+      res1 <- netemd_smooth(x1, v1, w1, x2, v2, w2)
+
+      expect_lt(abs(res1 - res2), 10**(-3))
+    }
+  }
+})
+
+
+test_that("3 element test Mixture StartPoint", {
+  w1 <- 1
+  w2 <- 1
+  for (v1_1 in (1:5) / 10.0)
+  {
+    for (v2_1 in (1:5) / 10.0)
+    {
+      x1 <- c(1, 2, 3)
+      v1 <- c(v1_1, 0.5, 1.00)
+      x2 <- c(1, 2, 3)
+      v2 <- c(v2_1, 0.5, 1.00)
+      f1 <- make_function(x1, v1, w1, x2, v2, w2)
+      res2 <- integrate(
+        f1, 0, max(x2[3], x1[3]) + max(w1, w2), abs.tol = 0.000000001
+      )[[1]]
+
+      res1 <- netemd_smooth(x1, v1, w1, x2, v2, w2)
+
+      expect_lt(abs(res1 - res2), 10**(-3))
+    }
+  }
+})
 
 
-test_that("3 element test Mixture", { 
-            for (w1 in (1:10)/10.0)
-            {
-              for (w2 in (1:10)/10.0)
-              {
-                x1 <- c(1,2,3)
-                v1 <- c(0.65,0.70,1.00)
-                x2 <- c(1,2,3)
-                v2 <- c(0.25,0.70,1.00)
-                f1 <- makeFunction(x1,v1,w1,x2,v2,w2)
-                res2 <- integrate(f1,0,max(x2[3],x1[3])+max(w1,w2),abs.tol=0.000000001)[[1]]
-                
-                res1 <- NetEmdSmoothV2(x1,v1,w1,x2,v2,w2)
-                
-                expect_lt(abs(res1-res2),10**(-3))
-              }
-            }
-            })
-
-test_that("3 element test Mixture MidPoint", { 
-            w1 = 1
-            w2 = 1
-            for (v1_2 in (1:10)/10.0)
-            {
-              for (v2_2 in (1:10)/10.0)
-              {
-                x1 <- c(1,2,3)
-                v1 <- c(0.1,v1_2,1.00)
-                x2 <- c(1,2,3)
-                v2 <- c(0.1,v2_2,1.00)
-                f1 <- makeFunction(x1,v1,w1,x2,v2,w2)
-                res2 <- integrate(f1,0,max(x2[3],x1[3])+max(w1,w2),abs.tol=0.000000001)[[1]]
-                
-                res1 <- NetEmdSmoothV2(x1,v1,w1,x2,v2,w2)
-                
-                expect_lt(abs(res1-res2),10**(-3))
-              }
-            }
-            })
-
-
-test_that("3 element test Mixture StartPoint", { 
-            w1 = 1
-            w2 = 1
-            for (v1_1 in (1:5)/10.0)
-            {
-              for (v2_1 in (1:5)/10.0)
-              {
-                x1 <- c(1,2,3)
-                v1 <- c(v1_1,0.5,1.00)
-                x2 <- c(1,2,3)
-                v2 <- c(v2_1,0.5,1.00)
-                f1 <- makeFunction(x1,v1,w1,x2,v2,w2)
-                res2 <- integrate(f1,0,max(x2[3],x1[3])+max(w1,w2),abs.tol=0.000000001)[[1]]
-                
-                res1 <- NetEmdSmoothV2(x1,v1,w1,x2,v2,w2)
-                
-                expect_lt(abs(res1-res2),10**(-3))
-              }
-            }
-            })
-
-
-test_that("3 element test Mixture StartLoc", { 
-            w1 = 1
-            w2 = 1
-            for (x1_1 in (1:9)/10.0)
-            {
-              for (x2_1 in (1:9)/10.0)
-              {
-                x1 <- c(x1_1,2,3)
-                v1 <- c(0.25,0.5,1.00)
-                x2 <- c(x2_1,2,4)
-                v2 <- c(0.3,0.5,1.00)
-                f1 <- makeFunction(x1,v1,w1,x2,v2,w2)
-                res2 <- integrate(f1,0,max(x2[3],x1[3])+max(w1,w2),abs.tol=0.000000001)[[1]]
-                
-                res1 <- NetEmdSmoothV2(x1,v1,w1,x2,v2,w2)
-                
-                expect_lt(abs(res1-res2),10**(-3))
-              }
-            }
-            })
-
-
-test_that("many element test Mixture ", { 
-            w1 = 1
-            w2 = 1
-            for (i in (2:10)*1)
-            {
-              for (j in (2:10)*1)
-              {
-                for (x123 in (1:2))
-                {
-                  for (y123 in (1:2))
-                  {
-                    x1 <- cumsum(abs(rnorm(i)))
-                    v1 <- cumsum(abs(rnorm(i)))
-                    w1 <- min(diff(x1))/x123
-                    v1 = v1/v1[length(v1)]
-                    x2 <- cumsum(abs(rnorm(j)))
-                    w2 <- min(diff(x2))/y123
-                    v2 <- cumsum(abs(rnorm(j)))
-                    v2 = v2/v2[length(v2)]
-                    f1 <- makeFunction(x1,v1,w1,x2,v2,w2)
-                    top1 = max(x2[length(x2)],x1[length(x1)])+max(w1,w2)
-                    bottom1 = min(x2[1],x1[1])
-                    
-                    q1 = compareRonlyVsOpt(x1,v1,w1,x2,v2,w2)
-                    
-                    res2 <- 0 
-                    res2 <- res2 + integrate(f1,bottom1,top1,abs.tol=0.000000001,subdivisions = 100000000)[[1]]
-                    
-                    res1 <- NetEmdSmoothV2(x1,v1,w1,x2,v2,w2)
-                    
-                    res3 <- getRvalue(x1,v1,w1,x2,v2,w2)
-               #     print(c(q1,res1,res2,res3))
-                    #  if (abs(res1-res2)>0.001)
-                    #  {
-                    #    browser()
-                    #  }
-                    # Swapped to percentage error
-                    expect_lt(abs(res1-res3),10**(-3))
-                    
-                  }
-                }
-              }
-            }
-            })
+test_that("3 element test Mixture StartLoc", {
+  w1 <- 1
+  w2 <- 1
+  for (x1_1 in (1:9) / 10.0)
+  {
+    for (x2_1 in (1:9) / 10.0)
+    {
+      x1 <- c(x1_1, 2, 3)
+      v1 <- c(0.25, 0.5, 1.00)
+      x2 <- c(x2_1, 2, 4)
+      v2 <- c(0.3, 0.5, 1.00)
+      f1 <- make_function(x1, v1, w1, x2, v2, w2)
+      res2 <- integrate(
+        f1, 0, max(x2[3], x1[3]) + max(w1, w2), abs.tol = 0.000000001
+      )[[1]]
+
+      res1 <- netemd_smooth(x1, v1, w1, x2, v2, w2)
+
+      expect_lt(abs(res1 - res2), 10**(-3))
+    }
+  }
+})
+
+
+test_that("many element test Mixture ", {
+  w1 <- 1
+  w2 <- 1
+  for (i in (2:10) * 1)
+  {
+    for (j in (2:10) * 1)
+    {
+      for (x123 in (1:2))
+      {
+        for (y123 in (1:2))
+        {
+          x1 <- cumsum(abs(rnorm(i)))
+          v1 <- cumsum(abs(rnorm(i)))
+          w1 <- min(diff(x1)) / x123
+          v1 <- v1 / v1[length(v1)]
+          x2 <- cumsum(abs(rnorm(j)))
+          w2 <- min(diff(x2)) / y123
+          v2 <- cumsum(abs(rnorm(j)))
+          v2 <- v2 / v2[length(v2)]
+          f1 <- make_function(x1, v1, w1, x2, v2, w2)
+          top1 <- max(x2[length(x2)], x1[length(x1)]) + max(w1, w2)
+          bottom1 <- min(x2[1], x1[1])
+
+          q1 <- compareRonlyVsOpt(x1, v1, w1, x2, v2, w2)
+
+          res2 <- 0
+          res2 <- res2 + integrate(
+            f1, bottom1, top1, abs.tol = 0.000000001, subdivisions = 100000000
+          )[[1]]
+
+          res1 <- netemd_smooth(x1, v1, w1, x2, v2, w2)
+
+          res3 <- get_R_value(x1, v1, w1, x2, v2, w2)
+          # Swapped to percentage error
+          expect_lt(abs(res1 - res3), 10**(-3))
+        }
+      }
+    }
+  }
+})
 
 
 test_that("3 element test w1=0.1, w2=0.2", {
-  
-  w1 = 0.1
-  w2 = 0.2
-              x1 <- c(1,2,3)
-              v1 <- c(0.25,0.70,1.00)
-              x2 <- c(1,2,3)
-              v2 <- c(0.25,0.70,1.00)
-              f1 <- makeFunction(x1,v1,w1,x2,v2,w2)
-              res2 <- integrate(f1,0,max(x2[3],x1[3])+max(w1,w2),abs.tol=0.0000000001)[[1]]
-              res1 <- NetEmdSmoothV2(x1,v1,w1,x2,v2,w2)
-              
-              expect_lt(abs(res1-res2),10**(-4))
-            })
+  w1 <- 0.1
+  w2 <- 0.2
+  x1 <- c(1, 2, 3)
+  v1 <- c(0.25, 0.70, 1.00)
+  x2 <- c(1, 2, 3)
+  v2 <- c(0.25, 0.70, 1.00)
+  f1 <- make_function(x1, v1, w1, x2, v2, w2)
+  res2 <- integrate(
+    f1, 0, max(x2[3], x1[3]) + max(w1, w2), abs.tol = 0.0000000001
+  )[[1]]
+  res1 <- netemd_smooth(x1, v1, w1, x2, v2, w2)
+
+  expect_lt(abs(res1 - res2), 10**(-4))
+})
 
 test_that("Old failure case", {
-  d1 = list()
-  attr(d1,'class') <- "dhist"
-  d1$locations <- 0 
-  d1$masses <- 1000 
-  d2 = list()
-  attr(d2,'class') <- "dhist"
-  d2$locations <- c(0,1,2,3) 
-  d2$masses <- c(8634,1242,114,10) 
-  sq1 <- netemd_single_pair(d1,d2,method='optimise',smoothing_window_width = 1)
-  sq2 <- netemd_single_pair(d1,d2,method='optimiseRonly',smoothing_window_width = 1)
-  expect_lt(abs(sq1$min_emd-sq2$min_emd),10**(-4))
+  d1 <- list()
+  attr(d1, "class") <- "dhist"
+  d1$locations <- 0
+  d1$masses <- 1000
+  d2 <- list()
+  attr(d2, "class") <- "dhist"
+  d2$locations <- c(0, 1, 2, 3)
+  d2$masses <- c(8634, 1242, 114, 10)
+  sq1 <- netemd_single_pair(
+    d1, d2, method = "optimise", smoothing_window_width = 1
+  )
+  sq2 <- netemd_single_pair(
+    d1, d2, method = "optimiseRonly", smoothing_window_width = 1
+  )
+  expect_lt(abs(sq1$min_emd - sq2$min_emd), 10**(-4))
 })
 
 
 test_that("Old failure case 2", {
-  x1 = c(0.2862492, 0.6917626)
-  v1 = c(0.6519357, 1.0000000)
-  w1 = 0.2027567
-  x2 = c(0.9990626, 1.4882579)
-  v2 = c(0.6519357, 1.0000000)
-  w2 =  0.2445976
-  f1 <- makeFunction(x1,v1,w1,x2,v2,w2)
-  res1 <- integrate(f1,min(min(x1),min(x2)),max(max(x1),max(x2))+max(w1,w2))[[1]]
-  res2 <- NetEmdSmoothV2(x1,v1,w1,x2,v2,w2)
-  expect_lt(abs(res2-res1),10**(-4))
+  x1 <- c(0.2862492, 0.6917626)
+  v1 <- c(0.6519357, 1.0000000)
+  w1 <- 0.2027567
+  x2 <- c(0.9990626, 1.4882579)
+  v2 <- c(0.6519357, 1.0000000)
+  w2 <- 0.2445976
+  f1 <- make_function(x1, v1, w1, x2, v2, w2)
+  res1 <- integrate(
+    f1, min(min(x1), min(x2)), max(max(x1), max(x2)) + max(w1, w2)
+  )[[1]]
+  res2 <- netemd_smooth(x1, v1, w1, x2, v2, w2)
+  expect_lt(abs(res2 - res1), 10**(-4))
 })
-  
+
 
 
 test_that("Old Failure Case 2 reverse", {
-  x2 = c(0.2862492, 0.6917626)
-  v2 = c(0.6519357, 1.0000000)
-  w2 = 0.2027567
-  x1 = c(0.9990626, 1.4882579)
-  v1 = c(0.6519357, 1.0000000)
-  w1 =  0.2445976
-  f1 <- makeFunction(x1,v1,w1,x2,v2,w2)
-  res1 <- integrate(f1,min(min(x1),min(x2)),max(max(x1),max(x2))+max(w1,w2))[[1]]
-  res2 <- NetEmdSmoothV2(x1,v1,w1,x2,v2,w2)
-  expect_lt(abs(res2-res1),10**(-4))
+  x2 <- c(0.2862492, 0.6917626)
+  v2 <- c(0.6519357, 1.0000000)
+  w2 <- 0.2027567
+  x1 <- c(0.9990626, 1.4882579)
+  v1 <- c(0.6519357, 1.0000000)
+  w1 <- 0.2445976
+  f1 <- make_function(x1, v1, w1, x2, v2, w2)
+  res1 <- integrate(
+    f1, min(min(x1), min(x2)), max(max(x1), max(x2)) + max(w1, w2)
+  )[[1]]
+  res2 <- netemd_smooth(x1, v1, w1, x2, v2, w2)
+  expect_lt(abs(res2 - res1), 10**(-4))
 })
 
 
-test_that("equal distributions moving upwards",{ 
-  x2 = 1:10
-  v2 = 1:10 
-  w2 = 0.5
-  x1 = 1:10
-  v1 = c(1,2)
-  v1 = 1:10 
-  w1 = 0.5
+test_that("equal distributions moving upwards", {
+  x2 <- 1:10
+  v2 <- 1:10
+  w2 <- 0.5
+  x1 <- 1:10
+  v1 <- c(1, 2)
+  v1 <- 1:10
+  w1 <- 0.5
   for (i in 0:10)
   {
-    res2 <- NetEmdSmoothV2(x1,v1+i,w1,x2,v2,w2)
-    res3 <- getRvalue(x1,v1+i,w1,x2,v2,w2)
-    print(c(i,res2,res3))
-    expect_lt(abs(res2-9.25*i),10**(-4))
-    expect_lt(abs(res2-res3),10**(-4))
-  }
+    res2 <- netemd_smooth(x1, v1 + i, w1, x2, v2, w2)
+    res3 <- get_R_value(x1, v1 + i, w1, x2, v2, w2)
+    print(c(i, res2, res3))
+    expect_lt(abs(res2 - 9.25 * i), 10**(-4))
+    expect_lt(abs(res2 - res3), 10**(-4))
   }
-  )
+})
 
-test_that("equal distributions moving upwards diff width",{ 
-  x2 = 1:10
-  v2 = 1:10 
-  w2 = 0.5
-  x1 = 1:10
-  v1 = c(1,2)
-  v1 = 1:10 
-  w1 = 0.25
+test_that("equal distributions moving upwards diff width", {
+  x2 <- 1:10
+  v2 <- 1:10
+  w2 <- 0.5
+  x1 <- 1:10
+  v1 <- c(1, 2)
+  v1 <- 1:10
+  w1 <- 0.25
   for (i in 0:10)
   {
-    res2 <- NetEmdSmoothV2(x1,v1+i,w1,x2,v2,w2)
-    res3 <- getRvalue(x1,v1+i,w1,x2,v2,w2)
-    print(c(i,res2,res3))
-    expect_lt(abs(res2-res3),10**(-4))
+    res2 <- netemd_smooth(x1, v1 + i, w1, x2, v2, w2)
+    res3 <- get_R_value(x1, v1 + i, w1, x2, v2, w2)
+    print(c(i, res2, res3))
+    expect_lt(abs(res2 - res3), 10**(-4))
   }
-  }
-  )
\ No newline at end of file
+})

From 9fc44275303d7bf94cd79876051dc050f1284f51 Mon Sep 17 00:00:00 2001
From: jack89roberts <jroberts@turing.ac.uk>
Date: Thu, 9 Jun 2022 11:32:54 +0100
Subject: [PATCH 80/84] net_emd -> netemd throughout

---
 R/measures_net_emd.R                   | 20 +++++-----
 R/net_emd_speed_benchmark.R            |  4 +-
 README.md                              |  2 +-
 man/netemd_many_to_many.Rd             |  4 +-
 man/netemd_one_to_one.Rd               |  4 +-
 man/netemd_single_pair.Rd              |  4 +-
 tests/testthat/test_measures_net_emd.R | 54 +++++++++++++-------------
 7 files changed, 46 insertions(+), 46 deletions(-)

diff --git a/R/measures_net_emd.R b/R/measures_net_emd.R
index bb40a999..acdff938 100755
--- a/R/measures_net_emd.R
+++ b/R/measures_net_emd.R
@@ -50,7 +50,7 @@
 #' alone, hence the default value is 0.
 #' @return NetEMD measure for the two sets of discrete histograms (or graphs).
 #' If (\code{return_details = FALSE}) then a list with the following named
-#' elements is returned \code{net_emd}: the NetEMD for the set of histogram
+#' elements is returned \code{netemd}: the NetEMD for the set of histogram
 #' pairs (or graphs), \code{min_emds}: the minimal EMD for each pair of
 #' histograms, \code{min_offsets}: the associated offsets giving the minimal EMD
 #' for each pair of histograms
@@ -205,14 +205,14 @@ netemd_one_to_one <- function(graph_1 = NULL,
     # The NetEMD is the arithmetic mean of the minimum EMDs for each pair of
     # histograms
     arithmetic_mean <- sum(min_emds) / length(min_emds)
-    net_emd <- arithmetic_mean
+    netemd <- arithmetic_mean
     # Return just the NetEMD or a list including the NetEMD plus the details of
     # the minumum EMD and associated offsets for the individual histograms
     # Note that the offsets represent shifts after the histograms have been
     # scaled to unit variance
     if (return_details) {
       return(list(
-        net_emd = net_emd,
+        netemd = netemd,
         min_emds = min_emds,
         min_offsets = min_offsets,
         min_offsets_std = min_offsets_std
@@ -222,7 +222,7 @@ netemd_one_to_one <- function(graph_1 = NULL,
     }
   } else {
     # Wrap each member of a single pair of histograms is a list and recursively
-    # call this net_emd function. This ensures they are treated the same.
+    # call this netemd function. This ensures they are treated the same.
     return(netemd_one_to_one(
       dhists_1 = list(dhists_1), dhists_2 = list(dhists_2),
       method = method,
@@ -271,7 +271,7 @@ netemd_one_to_one <- function(graph_1 = NULL,
 #' @return NetEMD measures between all pairs of graphs for which features
 #' were provided. Format of returned data depends on the \code{return_details}
 #' parameter. If set to FALSE, a list is returned with the following named
-#' elements:\code{net_emd}: a vector of NetEMDs for each pair of graphs,
+#' elements:\code{netemd}: a vector of NetEMDs for each pair of graphs,
 #' \code{comp_spec}: a comparison specification table containing the graph names
 #' and indices within the input GDD list for each pair of graphs compared.
 #' If \code{return_details} is set to FALSE, the list also contains the
@@ -385,7 +385,7 @@ netemd_many_to_many <- function(graphs = NULL,
     )
   )
   if (return_details) {
-    net_emds <- purrr::simplify(purrr::map(out, ~ .$net_emd))
+    netemds <- purrr::simplify(purrr::map(out, ~ .$netemd))
     min_emds <- matrix(
       purrr::simplify(purrr::map(out, ~ .$min_emds)),
       ncol = num_features,
@@ -411,15 +411,15 @@ netemd_many_to_many <- function(graphs = NULL,
       purrr::map(1:num_features, ~ paste("MinOffsetsStd_O", . - 1, sep = ""))
     )
     ret <- list(
-      netemds = net_emds,
+      netemds = netemds,
       comp_spec = comp_spec,
       min_emds = min_emds,
       min_offsets = min_offsets,
       min_offsets_std = min_offsets_std
     )
   } else {
-    net_emds <- out
-    ret <- list(netemds = net_emds, comp_spec = comp_spec)
+    netemds <- out
+    ret <- list(netemds = netemds, comp_spec = comp_spec)
   }
   return(ret)
 }
@@ -450,7 +450,7 @@ netemd_many_to_many <- function(graphs = NULL,
 #' \code{smoothing_window_width} that is appropriate for the discrete domain
 #' (e.g.for the integer domain a width of 1 is the natural choice)
 #' @return A list with the following named elements
-#' \code{net_emd}: the NetEMD for the set of histogram pairs,
+#' \code{netemd}: the NetEMD for the set of histogram pairs,
 #' \code{min_offsets}: the associated offsets giving the minimal EMD for each
 #' pair of histograms and \code{min_offset_std}: Offset used in the standardised
 #' histograms.
diff --git a/R/net_emd_speed_benchmark.R b/R/net_emd_speed_benchmark.R
index fc751115..c9a80e44 100644
--- a/R/net_emd_speed_benchmark.R
+++ b/R/net_emd_speed_benchmark.R
@@ -38,7 +38,7 @@ netemd_speed_test <- function() {
 }
 
 #' @export
-net_emd_speed_test_smooth <- function() {
+netemd_speed_test_smooth <- function() {
   ## load the data
   source_dir <- system.file(file.path("extdata", "random"), package = "netdist")
   print(source_dir)
@@ -65,7 +65,7 @@ net_emd_speed_test_smooth <- function() {
       gdd1 <- gdd(g1)
       gdd2 <- gdd(g2)
       netemd_start <- Sys.time()
-      net_emd(gdd1, gdd2, smoothing_window_width = 1)
+      netemd(gdd1, gdd2, smoothing_window_width = 1)
       endTime <- Sys.time()
       gdd_build_time <- append(
         gdd_build_time,
diff --git a/README.md b/README.md
index 11f205fe..819b7b17 100755
--- a/README.md
+++ b/README.md
@@ -57,7 +57,7 @@ using `browseVignettes(package = "netdist")`.
 
 You can list the functions available in the package with `library(help = "netdist")`
 and get more detailed help on individual functions using `?function_name` (e.g.
-`?net_emd`). In RStudio, typing `?netdist::` should also provide a drop down list
+`?netemd`). In RStudio, typing `?netdist::` should also provide a drop down list
 of functions you can select to load the more detailed help.
 
 ## References
diff --git a/man/netemd_many_to_many.Rd b/man/netemd_many_to_many.Rd
index 0ce1f608..907e5ccd 100644
--- a/man/netemd_many_to_many.Rd
+++ b/man/netemd_many_to_many.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/measures_net_emd.R
+% Please edit documentation in R/measures_netemd.R
 \name{netemd_many_to_many}
 \alias{netemd_many_to_many}
 \title{NetEMDs between all graph pairs using provided Graphlet-based Degree
@@ -64,7 +64,7 @@ alone, hence the default value is 0.}
 NetEMD measures between all pairs of graphs for which features
 were provided. Format of returned data depends on the \code{return_details}
 parameter. If set to FALSE, a list is returned with the following named
-elements:\code{net_emd}: a vector of NetEMDs for each pair of graphs,
+elements:\code{netemd}: a vector of NetEMDs for each pair of graphs,
 \code{comp_spec}: a comparison specification table containing the graph names
 and indices within the input GDD list for each pair of graphs compared.
 If \code{return_details} is set to FALSE, the list also contains the
diff --git a/man/netemd_one_to_one.Rd b/man/netemd_one_to_one.Rd
index 7750c68b..97112b07 100644
--- a/man/netemd_one_to_one.Rd
+++ b/man/netemd_one_to_one.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/measures_net_emd.R
+% Please edit documentation in R/measures_netemd.R
 \name{netemd_one_to_one}
 \alias{netemd_one_to_one}
 \title{NetEMD Network Earth Mover's Distance between a pair of networks.}
@@ -67,7 +67,7 @@ alone, hence the default value is 0.}
 \value{
 NetEMD measure for the two sets of discrete histograms (or graphs).
 If (\code{return_details = FALSE}) then a list with the following named
-elements is returned \code{net_emd}: the NetEMD for the set of histogram
+elements is returned \code{netemd}: the NetEMD for the set of histogram
 pairs (or graphs), \code{min_emds}: the minimal EMD for each pair of
 histograms, \code{min_offsets}: the associated offsets giving the minimal EMD
 for each pair of histograms
diff --git a/man/netemd_single_pair.Rd b/man/netemd_single_pair.Rd
index 0c805821..5d7dc0c4 100644
--- a/man/netemd_single_pair.Rd
+++ b/man/netemd_single_pair.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/measures_net_emd.R
+% Please edit documentation in R/measures_netemd.R
 \name{netemd_single_pair}
 \alias{netemd_single_pair}
 \title{Internal function to compute the minimum Earth Mover's Distance between
@@ -35,7 +35,7 @@ objects}
 }
 \value{
 A list with the following named elements
-\code{net_emd}: the NetEMD for the set of histogram pairs,
+\code{netemd}: the NetEMD for the set of histogram pairs,
 \code{min_offsets}: the associated offsets giving the minimal EMD for each
 pair of histograms and \code{min_offset_std}: Offset used in the standardised
 histograms.
diff --git a/tests/testthat/test_measures_net_emd.R b/tests/testthat/test_measures_net_emd.R
index 1a4ff962..6f68111e 100644
--- a/tests/testthat/test_measures_net_emd.R
+++ b/tests/testthat/test_measures_net_emd.R
@@ -1,5 +1,5 @@
 
-self_net_emd <- function(histogram, shift, method) {
+self_netemd <- function(histogram, shift, method) {
   netemd_one_to_one(
     dhists_1 = histogram,
     dhists_2 = shift_dhist(histogram, shift),
@@ -12,30 +12,30 @@ locations <- c(-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5)
 masses <- c(0, 1, 2, 3, 4, 5, 4, 3, 2, 1, 0)
 histogram <- dhist(locations = locations, masses = masses)
 
-expect_equal(self_net_emd(histogram, shift = 1, "optimise"), expected)
-expect_equal(self_net_emd(histogram, shift = 1, "exhaustive"), expected)
-expect_equal(self_net_emd(histogram, shift = 0.5, "optimise"), expected)
-expect_equal(self_net_emd(histogram, shift = 0.5, "exhaustive"), expected)
-expect_equal(self_net_emd(histogram, shift = 0.1, "optimise"), expected)
-expect_equal(self_net_emd(histogram, shift = 0.1, "exhaustive"), expected)
-expect_equal(self_net_emd(histogram, shift = 0.05, "optimise"), expected)
-expect_equal(self_net_emd(histogram, shift = 0.05, "exhaustive"), expected)
-expect_equal(self_net_emd(histogram, shift = 0.01, "optimise"), expected)
-expect_equal(self_net_emd(histogram, shift = 0.01, "exhaustive"), expected)
-expect_equal(self_net_emd(histogram, shift = 0, "optimise"), expected)
-expect_equal(self_net_emd(histogram, shift = 0, "exhaustive"), expected)
+expect_equal(self_netemd(histogram, shift = 1, "optimise"), expected)
+expect_equal(self_netemd(histogram, shift = 1, "exhaustive"), expected)
+expect_equal(self_netemd(histogram, shift = 0.5, "optimise"), expected)
+expect_equal(self_netemd(histogram, shift = 0.5, "exhaustive"), expected)
+expect_equal(self_netemd(histogram, shift = 0.1, "optimise"), expected)
+expect_equal(self_netemd(histogram, shift = 0.1, "exhaustive"), expected)
+expect_equal(self_netemd(histogram, shift = 0.05, "optimise"), expected)
+expect_equal(self_netemd(histogram, shift = 0.05, "exhaustive"), expected)
+expect_equal(self_netemd(histogram, shift = 0.01, "optimise"), expected)
+expect_equal(self_netemd(histogram, shift = 0.01, "exhaustive"), expected)
+expect_equal(self_netemd(histogram, shift = 0, "optimise"), expected)
+expect_equal(self_netemd(histogram, shift = 0, "exhaustive"), expected)
 
 expect_self_netemd_correct <- function(histogram, shift, method,
                                        return_details = FALSE) {
-  self_net_emd <- netemd_one_to_one(
+  self_netemd <- netemd_one_to_one(
     dhists_1 = histogram, dhists_2 = shift_dhist(histogram, shift),
     method = method, return_details = return_details
   )
   expected <- list(
-    net_emd = 0, min_emds = 0, min_offsets = shift,
+    netemd = 0, min_emds = 0, min_offsets = shift,
     min_offsets_std = 0
   )
-  expect_equal(self_net_emd, expected) # nolint: object_usage_linter.
+  expect_equal(self_netemd, expected) # nolint: object_usage_linter.
 }
 
 locations <- c(-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5)
@@ -92,7 +92,7 @@ expect_self_netemd_correct(histogram,
 
 test_that(
   paste(
-    "net_emd returns 0 when comparing any normal histogram against itself",
+    "netemd returns 0 when comparing any normal histogram against itself",
     "(no offset)"
   ),
   {
@@ -131,7 +131,7 @@ test_that(
   }
 )
 
-test_that("net_emd returns 0 when comparing any normal histogram randomly offset
+test_that("netemd returns 0 when comparing any normal histogram randomly offset
           against itself", {
   num_hists <- 2
   num_bins <- 101
@@ -189,7 +189,7 @@ test_that("net_emd returns 0 when comparing any normal histogram randomly offset
   })
 })
 
-test_that("net_emd returns min_emd = 0 and min_offset = 0 when comparing any
+test_that("netemd returns min_emd = 0 and min_offset = 0 when comparing any
           normal histogram randomly offset against itself", {
   num_hists <- 2
   num_bins <- 101
@@ -213,7 +213,7 @@ test_that("net_emd returns min_emd = 0 and min_offset = 0 when comparing any
 
   expect_self_netemd_correct <-
     function(histogram, shift, method, return_details = FALSE) {
-      self_net_emd <- netemd_one_to_one(
+      self_netemd <- netemd_one_to_one(
         dhists_1 = histogram,
         dhists_2 = shift_dhist(histogram, shift),
         method = method,
@@ -223,10 +223,10 @@ test_that("net_emd returns min_emd = 0 and min_offset = 0 when comparing any
       mass <- histogram$masses
       var <- sum(loc * loc * mass) / sum(mass) - (sum(loc * mass) / sum(mass))^2
       expected <- list(
-        net_emd = 0, min_emds = 0, min_offsets = shift,
+        netemd = 0, min_emds = 0, min_offsets = shift,
         min_offsets_std = 0
       )
-      expect_equal(self_net_emd, expected)
+      expect_equal(self_netemd, expected)
     }
 
   purrr::walk2(rand_dhists, offset_lists, function(dhist, offsets) {
@@ -248,7 +248,7 @@ test_that("net_emd returns min_emd = 0 and min_offset = 0 when comparing any
 
 test_that(
   paste(
-    "net_emd returns analytically derived non-zero solutions for",
+    "netemd returns analytically derived non-zero solutions for",
     "distributions where the analytical solution is known"
   ),
   {
@@ -298,7 +298,7 @@ test_that(
 )
 
 context("Measures NetEMD: Virus PPI (EMD)")
-# EMD and NET_EMD: Virus PPI datasets
+# EMD and netemd: Virus PPI datasets
 test_that("emd return 0 when comparing graphlet orbit degree distributions of
           virus PPI graphs to themselves", {
   # Load viurs PPI network data in ORCA-compatible edge list format
@@ -317,7 +317,7 @@ test_that("emd return 0 when comparing graphlet orbit degree distributions of
 })
 
 context("Measures NetEMD: Virus PPI (NetEMD)")
-test_that("net_emd return 0 when comparing graphlet orbit degree distributions
+test_that("netemd return 0 when comparing graphlet orbit degree distributions
           of virus PPI graphs to themselves", {
   # Load virus PPI network data in ORCA-compatible edge list format
   data_indexes <- 1:length(virusppi)
@@ -345,7 +345,7 @@ test_that("net_emd return 0 when comparing graphlet orbit degree distributions
 })
 
 context("Measures NetEMD: Random graphs (EMD)")
-# EMD and NET_EMD: Random graph datasets
+# EMD and netemd: Random graph datasets
 test_that("emd return 0 when comparing graphlet orbit degree distributions of
           random graphs to themselves", {
   # Load random graph data in ORCA-compatible edge list format
@@ -368,7 +368,7 @@ test_that("emd return 0 when comparing graphlet orbit degree distributions of
 })
 
 context("Measures NetEMD: Random graphs (NetEMD)")
-test_that("net_emd return 0 when comparing graphlet orbit degree distributions
+test_that("netemd return 0 when comparing graphlet orbit degree distributions
           of random graphs to themselves", {
   # Load random graph data in ORCA-compatible edge list format
   random_graphs <- read_simple_graphs(

From de4a41a813eabbc55b290baf58dc341677c6835b Mon Sep 17 00:00:00 2001
From: jack89roberts <jroberts@turing.ac.uk>
Date: Thu, 9 Jun 2022 11:37:36 +0100
Subject: [PATCH 81/84] more linting

---
 R/net_emd_speed_benchmark.R   |  6 +++---
 tests/testthat/test_fastEMD.R | 14 +++++++-------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/R/net_emd_speed_benchmark.R b/R/net_emd_speed_benchmark.R
index c9a80e44..51b544a2 100644
--- a/R/net_emd_speed_benchmark.R
+++ b/R/net_emd_speed_benchmark.R
@@ -65,13 +65,13 @@ netemd_speed_test_smooth <- function() {
       gdd1 <- gdd(g1)
       gdd2 <- gdd(g2)
       netemd_start <- Sys.time()
-      netemd(gdd1, gdd2, smoothing_window_width = 1)
-      endTime <- Sys.time()
+      netemd_single_pair(gdd1, gdd2, smoothing_window_width = 1)
+      end_time <- Sys.time()
       gdd_build_time <- append(
         gdd_build_time,
         as.double(netemd_start - fulltime_start)
       )
-      netemd_time <- append(netemd_time, as.double(endTime - netemd_start))
+      netemd_time <- append(netemd_time, as.double(end_time - netemd_start))
     }
   }
   list(gdd_build_time = gdd_build_time, netemd_time = netemd_time)
diff --git a/tests/testthat/test_fastEMD.R b/tests/testthat/test_fastEMD.R
index 7d74fd34..0c75abcf 100644
--- a/tests/testthat/test_fastEMD.R
+++ b/tests/testthat/test_fastEMD.R
@@ -74,7 +74,7 @@ get_val <- function(x1, v1, w1, x2, v2, w2) {
   integrate(f1, mi123, ma123)
 }
 
-get_R_value <- function(x1, v1, w1, x2, v2, w2) {
+get_r_value <- function(x1, v1, w1, x2, v2, w2) {
   v1s <- c(v1[1], diff(v1))
   v2s <- c(v2[1], diff(v2))
   d1 <- dhist(x1 + w1 / 2, v1s, smoothing_window_width = w1)
@@ -85,14 +85,14 @@ get_R_value <- function(x1, v1, w1, x2, v2, w2) {
 }
 
 
-compareRonlyVsOpt <- function(x1, v1, w1, x2, v2, w2) {
+compar_r_only_vs_opt <- function(x1, v1, w1, x2, v2, w2) {
   v1s <- c(v1[1], diff(v1))
   v2s <- c(v2[1], diff(v2))
   d1 <- dhist(x1, v1s, smoothing_window_width = w1)
   d2 <- dhist(x2, v2s, smoothing_window_width = w2)
   res1 <- netdist::netemd_single_pair(d1, d2, method = "optimise")
   res2 <- netdist::netemd_single_pair(d1, d2, method = "optimiseRonly")
-  expect_lt(abs(res1$min_emd - res2$min_emd), 10**(-4))
+  testthat::expect_lt(abs(res1$min_emd - res2$min_emd), 10**(-4))
   c(res1$min_emd, res2$min_emd)
 }
 
@@ -287,7 +287,7 @@ test_that("many element test Mixture ", {
           top1 <- max(x2[length(x2)], x1[length(x1)]) + max(w1, w2)
           bottom1 <- min(x2[1], x1[1])
 
-          q1 <- compareRonlyVsOpt(x1, v1, w1, x2, v2, w2)
+          q1 <- compar_r_only_vs_opt(x1, v1, w1, x2, v2, w2)
 
           res2 <- 0
           res2 <- res2 + integrate(
@@ -296,7 +296,7 @@ test_that("many element test Mixture ", {
 
           res1 <- netemd_smooth(x1, v1, w1, x2, v2, w2)
 
-          res3 <- get_R_value(x1, v1, w1, x2, v2, w2)
+          res3 <- get_r_value(x1, v1, w1, x2, v2, w2)
           # Swapped to percentage error
           expect_lt(abs(res1 - res3), 10**(-3))
         }
@@ -385,7 +385,7 @@ test_that("equal distributions moving upwards", {
   for (i in 0:10)
   {
     res2 <- netemd_smooth(x1, v1 + i, w1, x2, v2, w2)
-    res3 <- get_R_value(x1, v1 + i, w1, x2, v2, w2)
+    res3 <- get_r_value(x1, v1 + i, w1, x2, v2, w2)
     print(c(i, res2, res3))
     expect_lt(abs(res2 - 9.25 * i), 10**(-4))
     expect_lt(abs(res2 - res3), 10**(-4))
@@ -403,7 +403,7 @@ test_that("equal distributions moving upwards diff width", {
   for (i in 0:10)
   {
     res2 <- netemd_smooth(x1, v1 + i, w1, x2, v2, w2)
-    res3 <- get_R_value(x1, v1 + i, w1, x2, v2, w2)
+    res3 <- get_r_value(x1, v1 + i, w1, x2, v2, w2)
     print(c(i, res2, res3))
     expect_lt(abs(res2 - res3), 10**(-4))
   }

From 5388779e503ee7e15ae43555556a55eacb59a4a6 Mon Sep 17 00:00:00 2001
From: jack89roberts <jroberts@turing.ac.uk>
Date: Thu, 9 Jun 2022 11:40:01 +0100
Subject: [PATCH 82/84] update docs

---
 NAMESPACE                  |  4 ++--
 R/RcppExports.R            | 19 +++----------------
 man/netemd_many_to_many.Rd |  2 +-
 man/netemd_one_to_one.Rd   |  2 +-
 man/netemd_single_pair.Rd  |  2 +-
 5 files changed, 8 insertions(+), 21 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 4df2f57d..06751647 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,6 +1,5 @@
 # Generated by roxygen2: do not edit by hand
 
-export(netemd_smooth)
 export(adaptive_breaks)
 export(area_between_dhist_ecmfs)
 export(as_smoothed_dhist)
@@ -49,7 +48,6 @@ export(min_emd)
 export(min_emd_exhaustive)
 export(min_emd_optimise)
 export(min_emd_optimise_fast)
-export(netEMDSpeedTestSmooth)
 export(netdis)
 export(netdis_centred_graphlet_counts)
 export(netdis_expected_counts)
@@ -63,6 +61,8 @@ export(netemd_many_to_many)
 export(netemd_one_to_one)
 export(netemd_plot)
 export(netemd_single_pair)
+export(netemd_smooth)
+export(netemd_speed_test_smooth)
 export(normalise_dhist_mass)
 export(normalise_dhist_variance)
 export(orbit_key)
diff --git a/R/RcppExports.R b/R/RcppExports.R
index ebeb415f..39a5aee8 100644
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -22,13 +22,7 @@ counts_from_observations <- function(features) {
 #'
 #' @export
 emd_fast_no_smoothing <- function(locations1, values1, locations2, values2) {
-    .Call(
-        `_netdist_emd_fast_no_smoothing`,
-        locations1,
-        values1,
-        locations2,
-        values2
-    )
+    .Call(`_netdist_emd_fast_no_smoothing`, locations1, values1, locations2, values2)
 }
 
 #' @title
@@ -38,13 +32,6 @@ NULL
 #'
 #' @export
 netemd_smooth <- function(loc1, val1, binWidth1, loc2, val2, binWidth2) {
-    .Call(
-        `_netdist_netemd_smooth`,
-        loc1,
-        val1,
-        binWidth1,
-        loc2,
-        val2,
-        binWidth2
-    )
+    .Call(`_netdist_netemd_smooth`, loc1, val1, binWidth1, loc2, val2, binWidth2)
 }
+
diff --git a/man/netemd_many_to_many.Rd b/man/netemd_many_to_many.Rd
index 907e5ccd..4e6946a0 100644
--- a/man/netemd_many_to_many.Rd
+++ b/man/netemd_many_to_many.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/measures_netemd.R
+% Please edit documentation in R/measures_net_emd.R
 \name{netemd_many_to_many}
 \alias{netemd_many_to_many}
 \title{NetEMDs between all graph pairs using provided Graphlet-based Degree
diff --git a/man/netemd_one_to_one.Rd b/man/netemd_one_to_one.Rd
index 97112b07..48f6fe3f 100644
--- a/man/netemd_one_to_one.Rd
+++ b/man/netemd_one_to_one.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/measures_netemd.R
+% Please edit documentation in R/measures_net_emd.R
 \name{netemd_one_to_one}
 \alias{netemd_one_to_one}
 \title{NetEMD Network Earth Mover's Distance between a pair of networks.}
diff --git a/man/netemd_single_pair.Rd b/man/netemd_single_pair.Rd
index 5d7dc0c4..99b27af9 100644
--- a/man/netemd_single_pair.Rd
+++ b/man/netemd_single_pair.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/measures_netemd.R
+% Please edit documentation in R/measures_net_emd.R
 \name{netemd_single_pair}
 \alias{netemd_single_pair}
 \title{Internal function to compute the minimum Earth Mover's Distance between

From fa187db1f82c55d1a3e8fa09bd10c14cfa54e0df Mon Sep 17 00:00:00 2001
From: jack89roberts <jroberts@turing.ac.uk>
Date: Thu, 9 Jun 2022 11:42:57 +0100
Subject: [PATCH 83/84] net_dis net_emd -> netdis netemd in file names

---
 R/{measures_net_dis.R => measures_netdis.R} | 0
 R/{measures_net_emd.R => measures_netemd.R} | 0
 man/count_graphlet_tuples.Rd                | 2 +-
 man/count_graphlet_tuples_ego.Rd            | 2 +-
 man/density_binned_counts.Rd                | 2 +-
 man/density_binned_counts_gp.Rd             | 2 +-
 man/density_from_counts.Rd                  | 2 +-
 man/ego_network_density.Rd                  | 2 +-
 man/exp_counts_bin_gp.Rd                    | 2 +-
 man/mean_density_binned_graphlet_counts.Rd  | 2 +-
 man/netdis.Rd                               | 2 +-
 man/netdis_centred_graphlet_counts.Rd       | 2 +-
 man/netdis_const_expected_counts.Rd         | 2 +-
 man/netdis_expected_counts.Rd               | 2 +-
 man/netdis_expected_counts_ego.Rd           | 2 +-
 man/netdis_many_to_many.Rd                  | 2 +-
 man/netdis_one_to_many.Rd                   | 2 +-
 man/netdis_one_to_one.Rd                    | 2 +-
 man/netdis_subtract_exp_counts.Rd           | 2 +-
 man/netdis_uptok.Rd                         | 2 +-
 man/netemd_many_to_many.Rd                  | 2 +-
 man/netemd_one_to_one.Rd                    | 2 +-
 man/netemd_single_pair.Rd                   | 2 +-
 man/scale_graphlet_count.Rd                 | 2 +-
 man/scale_graphlet_counts_ego.Rd            | 2 +-
 man/single_density_bin.Rd                   | 2 +-
 man/zeros_to_ones.Rd                        | 2 +-
 27 files changed, 25 insertions(+), 25 deletions(-)
 rename R/{measures_net_dis.R => measures_netdis.R} (100%)
 rename R/{measures_net_emd.R => measures_netemd.R} (100%)

diff --git a/R/measures_net_dis.R b/R/measures_netdis.R
similarity index 100%
rename from R/measures_net_dis.R
rename to R/measures_netdis.R
diff --git a/R/measures_net_emd.R b/R/measures_netemd.R
similarity index 100%
rename from R/measures_net_emd.R
rename to R/measures_netemd.R
diff --git a/man/count_graphlet_tuples.Rd b/man/count_graphlet_tuples.Rd
index 4109039a..6bf797a8 100644
--- a/man/count_graphlet_tuples.Rd
+++ b/man/count_graphlet_tuples.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/measures_net_dis.R
+% Please edit documentation in R/measures_netdis.R
 \name{count_graphlet_tuples}
 \alias{count_graphlet_tuples}
 \title{For each graphlet calculate the number of possible sets of k nodes in the
diff --git a/man/count_graphlet_tuples_ego.Rd b/man/count_graphlet_tuples_ego.Rd
index 1bd70670..5a34711a 100644
--- a/man/count_graphlet_tuples_ego.Rd
+++ b/man/count_graphlet_tuples_ego.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/measures_net_dis.R
+% Please edit documentation in R/measures_netdis.R
 \name{count_graphlet_tuples_ego}
 \alias{count_graphlet_tuples_ego}
 \title{Run count_graphlet_tuples across pre-computed ego networks.}
diff --git a/man/density_binned_counts.Rd b/man/density_binned_counts.Rd
index 91bf7eca..90adc699 100644
--- a/man/density_binned_counts.Rd
+++ b/man/density_binned_counts.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/measures_net_dis.R
+% Please edit documentation in R/measures_netdis.R
 \name{density_binned_counts}
 \alias{density_binned_counts}
 \title{Used to calculate aggregated graphlet counts for each density bin.}
diff --git a/man/density_binned_counts_gp.Rd b/man/density_binned_counts_gp.Rd
index cf722709..90bbe06c 100644
--- a/man/density_binned_counts_gp.Rd
+++ b/man/density_binned_counts_gp.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/measures_net_dis.R
+% Please edit documentation in R/measures_netdis.R
 \name{density_binned_counts_gp}
 \alias{density_binned_counts_gp}
 \title{Calculate expected counts in density bins using the
diff --git a/man/density_from_counts.Rd b/man/density_from_counts.Rd
index 6436ca4e..a5331548 100644
--- a/man/density_from_counts.Rd
+++ b/man/density_from_counts.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/measures_net_dis.R
+% Please edit documentation in R/measures_netdis.R
 \name{density_from_counts}
 \alias{density_from_counts}
 \title{Calculate edge density for a single graph.}
diff --git a/man/ego_network_density.Rd b/man/ego_network_density.Rd
index 43114243..f1bcb65a 100644
--- a/man/ego_network_density.Rd
+++ b/man/ego_network_density.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/measures_net_dis.R
+% Please edit documentation in R/measures_netdis.R
 \name{ego_network_density}
 \alias{ego_network_density}
 \title{Calculate ego network edge densities.}
diff --git a/man/exp_counts_bin_gp.Rd b/man/exp_counts_bin_gp.Rd
index 4ed9ea90..3e717809 100644
--- a/man/exp_counts_bin_gp.Rd
+++ b/man/exp_counts_bin_gp.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/measures_net_dis.R
+% Please edit documentation in R/measures_netdis.R
 \name{exp_counts_bin_gp}
 \alias{exp_counts_bin_gp}
 \title{INTERNAL FUNCTION - DO NOT CALL DIRECTLY
diff --git a/man/mean_density_binned_graphlet_counts.Rd b/man/mean_density_binned_graphlet_counts.Rd
index cd2d0ad0..864f84c7 100644
--- a/man/mean_density_binned_graphlet_counts.Rd
+++ b/man/mean_density_binned_graphlet_counts.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/measures_net_dis.R
+% Please edit documentation in R/measures_netdis.R
 \name{mean_density_binned_graphlet_counts}
 \alias{mean_density_binned_graphlet_counts}
 \title{mean_density_binned_graphlet_counts}
diff --git a/man/netdis.Rd b/man/netdis.Rd
index a3ed7f4a..c838c8c9 100644
--- a/man/netdis.Rd
+++ b/man/netdis.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/measures_net_dis.R
+% Please edit documentation in R/measures_netdis.R
 \name{netdis}
 \alias{netdis}
 \title{Netdis - for one graphlet size}
diff --git a/man/netdis_centred_graphlet_counts.Rd b/man/netdis_centred_graphlet_counts.Rd
index 91b0ac40..477b5034 100644
--- a/man/netdis_centred_graphlet_counts.Rd
+++ b/man/netdis_centred_graphlet_counts.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/measures_net_dis.R
+% Please edit documentation in R/measures_netdis.R
 \name{netdis_centred_graphlet_counts}
 \alias{netdis_centred_graphlet_counts}
 \title{netdis_centred_graphlet_counts}
diff --git a/man/netdis_const_expected_counts.Rd b/man/netdis_const_expected_counts.Rd
index 5eb624a6..8c83c73f 100644
--- a/man/netdis_const_expected_counts.Rd
+++ b/man/netdis_const_expected_counts.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/measures_net_dis.R
+% Please edit documentation in R/measures_netdis.R
 \name{netdis_const_expected_counts}
 \alias{netdis_const_expected_counts}
 \title{Create matrix of constant value to use as expected counts.}
diff --git a/man/netdis_expected_counts.Rd b/man/netdis_expected_counts.Rd
index 747840a9..bcb190f9 100644
--- a/man/netdis_expected_counts.Rd
+++ b/man/netdis_expected_counts.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/measures_net_dis.R
+% Please edit documentation in R/measures_netdis.R
 \name{netdis_expected_counts}
 \alias{netdis_expected_counts}
 \title{netdis_expected_counts}
diff --git a/man/netdis_expected_counts_ego.Rd b/man/netdis_expected_counts_ego.Rd
index 1272fc18..f2443138 100644
--- a/man/netdis_expected_counts_ego.Rd
+++ b/man/netdis_expected_counts_ego.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/measures_net_dis.R
+% Please edit documentation in R/measures_netdis.R
 \name{netdis_expected_counts_ego}
 \alias{netdis_expected_counts_ego}
 \title{netdis_expected_counts_ego
diff --git a/man/netdis_many_to_many.Rd b/man/netdis_many_to_many.Rd
index 9313d7e5..03e8b4ee 100644
--- a/man/netdis_many_to_many.Rd
+++ b/man/netdis_many_to_many.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/measures_net_dis.R
+% Please edit documentation in R/measures_netdis.R
 \name{netdis_many_to_many}
 \alias{netdis_many_to_many}
 \title{Compute any of the Netdis variants between all graph pairs.}
diff --git a/man/netdis_one_to_many.Rd b/man/netdis_one_to_many.Rd
index bfc2344a..0bb4f606 100644
--- a/man/netdis_one_to_many.Rd
+++ b/man/netdis_one_to_many.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/measures_net_dis.R
+% Please edit documentation in R/measures_netdis.R
 \name{netdis_one_to_many}
 \alias{netdis_one_to_many}
 \title{Netdis comparisons between one graph and many other graphs.}
diff --git a/man/netdis_one_to_one.Rd b/man/netdis_one_to_one.Rd
index 63474c02..ff31956e 100644
--- a/man/netdis_one_to_one.Rd
+++ b/man/netdis_one_to_one.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/measures_net_dis.R
+% Please edit documentation in R/measures_netdis.R
 \name{netdis_one_to_one}
 \alias{netdis_one_to_one}
 \title{Netdis between two graphs}
diff --git a/man/netdis_subtract_exp_counts.Rd b/man/netdis_subtract_exp_counts.Rd
index 3540c12f..a8891ff5 100644
--- a/man/netdis_subtract_exp_counts.Rd
+++ b/man/netdis_subtract_exp_counts.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/measures_net_dis.R
+% Please edit documentation in R/measures_netdis.R
 \name{netdis_subtract_exp_counts}
 \alias{netdis_subtract_exp_counts}
 \title{netdis_subtract_exp_counts}
diff --git a/man/netdis_uptok.Rd b/man/netdis_uptok.Rd
index e4f7a51e..545c2fe1 100644
--- a/man/netdis_uptok.Rd
+++ b/man/netdis_uptok.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/measures_net_dis.R
+% Please edit documentation in R/measures_netdis.R
 \name{netdis_uptok}
 \alias{netdis_uptok}
 \title{Netdis - for all graphlet sizes up to max_graphlet_size}
diff --git a/man/netemd_many_to_many.Rd b/man/netemd_many_to_many.Rd
index 4e6946a0..907e5ccd 100644
--- a/man/netemd_many_to_many.Rd
+++ b/man/netemd_many_to_many.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/measures_net_emd.R
+% Please edit documentation in R/measures_netemd.R
 \name{netemd_many_to_many}
 \alias{netemd_many_to_many}
 \title{NetEMDs between all graph pairs using provided Graphlet-based Degree
diff --git a/man/netemd_one_to_one.Rd b/man/netemd_one_to_one.Rd
index 48f6fe3f..97112b07 100644
--- a/man/netemd_one_to_one.Rd
+++ b/man/netemd_one_to_one.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/measures_net_emd.R
+% Please edit documentation in R/measures_netemd.R
 \name{netemd_one_to_one}
 \alias{netemd_one_to_one}
 \title{NetEMD Network Earth Mover's Distance between a pair of networks.}
diff --git a/man/netemd_single_pair.Rd b/man/netemd_single_pair.Rd
index 99b27af9..5d7dc0c4 100644
--- a/man/netemd_single_pair.Rd
+++ b/man/netemd_single_pair.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/measures_net_emd.R
+% Please edit documentation in R/measures_netemd.R
 \name{netemd_single_pair}
 \alias{netemd_single_pair}
 \title{Internal function to compute the minimum Earth Mover's Distance between
diff --git a/man/scale_graphlet_count.Rd b/man/scale_graphlet_count.Rd
index 8a313c27..8381bb0c 100644
--- a/man/scale_graphlet_count.Rd
+++ b/man/scale_graphlet_count.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/measures_net_dis.R
+% Please edit documentation in R/measures_netdis.R
 \name{scale_graphlet_count}
 \alias{scale_graphlet_count}
 \title{Divide graphlet counts by pre-computed scaling factor from
diff --git a/man/scale_graphlet_counts_ego.Rd b/man/scale_graphlet_counts_ego.Rd
index 477a1ec5..16dfc96c 100644
--- a/man/scale_graphlet_counts_ego.Rd
+++ b/man/scale_graphlet_counts_ego.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/measures_net_dis.R
+% Please edit documentation in R/measures_netdis.R
 \name{scale_graphlet_counts_ego}
 \alias{scale_graphlet_counts_ego}
 \title{Scale graphlet counts for an ego network by the n choose k possible
diff --git a/man/single_density_bin.Rd b/man/single_density_bin.Rd
index 2c3cd29f..be469af3 100644
--- a/man/single_density_bin.Rd
+++ b/man/single_density_bin.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/measures_net_dis.R
+% Please edit documentation in R/measures_netdis.R
 \name{single_density_bin}
 \alias{single_density_bin}
 \title{For case where don't want to use binning, return a single bin which covers
diff --git a/man/zeros_to_ones.Rd b/man/zeros_to_ones.Rd
index a33206bf..32befead 100644
--- a/man/zeros_to_ones.Rd
+++ b/man/zeros_to_ones.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/measures_net_dis.R
+% Please edit documentation in R/measures_netdis.R
 \name{zeros_to_ones}
 \alias{zeros_to_ones}
 \title{Replace zero values in a vector with ones. Used by

From 111bbedd9ac1da29b5124043448f7e2d33d5e92e Mon Sep 17 00:00:00 2001
From: jack89roberts <jroberts@turing.ac.uk>
Date: Thu, 9 Jun 2022 11:55:46 +0100
Subject: [PATCH 84/84] styler

---
 R/emd.R                       |  3 +--
 tests/testthat/test_fastEMD.R | 30 ++++++++++++++++++++----------
 2 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/R/emd.R b/R/emd.R
index 462a0f71..a12e3cec 100644
--- a/R/emd.R
+++ b/R/emd.R
@@ -80,8 +80,7 @@ min_emd_optimise_fast <- function(dhist1, dhist2) {
     min_emd <- soln$objective
     min_offset <- soln$minimum
     return(list(min_emd = min_emd, min_offset = min_offset))
-  } else
-  {
+  } else {
     val1 <- cumsum(dhist1$masses)
     val2 <- cumsum(dhist2$masses)
     val1 <- val1 / val1[length(val1)]
diff --git a/tests/testthat/test_fastEMD.R b/tests/testthat/test_fastEMD.R
index 0c75abcf..8c85769a 100644
--- a/tests/testthat/test_fastEMD.R
+++ b/tests/testthat/test_fastEMD.R
@@ -109,7 +109,8 @@ test_that("3 element test", {
       v2 <- c(0.25, 0.70, 1.00)
       f1 <- make_function(x1, v1, w1, x2, v2, w2)
       res2 <- integrate(
-        f1, 0, max(x2[3], x1[3]) + max(w1, w2), abs.tol = 0.000000001
+        f1, 0, max(x2[3], x1[3]) + max(w1, w2),
+        abs.tol = 0.000000001
       )[[1]]
 
       res1 <- netemd_smooth(x1, v1, w1, x2, v2, w2)
@@ -128,7 +129,8 @@ test_that("2 element test w1=0.1, w2=0.2", {
   v2 <- c(0.5, 1.00)
   f1 <- make_function(x1, v1, w1, x2, v2, w2)
   res2 <- integrate(
-    f1, 0, max(x2[2], x1[2]) + max(w1, w2), abs.tol = 0.000000001
+    f1, 0, max(x2[2], x1[2]) + max(w1, w2),
+    abs.tol = 0.000000001
   )[[1]]
   res1 <- netemd_smooth(x1, v1, w1, x2, v2, w2)
 
@@ -182,7 +184,8 @@ test_that("3 element test Mixture", {
       v2 <- c(0.25, 0.70, 1.00)
       f1 <- make_function(x1, v1, w1, x2, v2, w2)
       res2 <- integrate(
-        f1, 0, max(x2[3], x1[3]) + max(w1, w2), abs.tol = 0.000000001
+        f1, 0, max(x2[3], x1[3]) + max(w1, w2),
+        abs.tol = 0.000000001
       )[[1]]
 
       res1 <- netemd_smooth(x1, v1, w1, x2, v2, w2)
@@ -205,7 +208,8 @@ test_that("3 element test Mixture MidPoint", {
       v2 <- c(0.1, v2_2, 1.00)
       f1 <- make_function(x1, v1, w1, x2, v2, w2)
       res2 <- integrate(
-        f1, 0, max(x2[3], x1[3]) + max(w1, w2), abs.tol = 0.000000001
+        f1, 0, max(x2[3], x1[3]) + max(w1, w2),
+        abs.tol = 0.000000001
       )[[1]]
 
       res1 <- netemd_smooth(x1, v1, w1, x2, v2, w2)
@@ -229,7 +233,8 @@ test_that("3 element test Mixture StartPoint", {
       v2 <- c(v2_1, 0.5, 1.00)
       f1 <- make_function(x1, v1, w1, x2, v2, w2)
       res2 <- integrate(
-        f1, 0, max(x2[3], x1[3]) + max(w1, w2), abs.tol = 0.000000001
+        f1, 0, max(x2[3], x1[3]) + max(w1, w2),
+        abs.tol = 0.000000001
       )[[1]]
 
       res1 <- netemd_smooth(x1, v1, w1, x2, v2, w2)
@@ -253,7 +258,8 @@ test_that("3 element test Mixture StartLoc", {
       v2 <- c(0.3, 0.5, 1.00)
       f1 <- make_function(x1, v1, w1, x2, v2, w2)
       res2 <- integrate(
-        f1, 0, max(x2[3], x1[3]) + max(w1, w2), abs.tol = 0.000000001
+        f1, 0, max(x2[3], x1[3]) + max(w1, w2),
+        abs.tol = 0.000000001
       )[[1]]
 
       res1 <- netemd_smooth(x1, v1, w1, x2, v2, w2)
@@ -291,7 +297,8 @@ test_that("many element test Mixture ", {
 
           res2 <- 0
           res2 <- res2 + integrate(
-            f1, bottom1, top1, abs.tol = 0.000000001, subdivisions = 100000000
+            f1, bottom1, top1,
+            abs.tol = 0.000000001, subdivisions = 100000000
           )[[1]]
 
           res1 <- netemd_smooth(x1, v1, w1, x2, v2, w2)
@@ -315,7 +322,8 @@ test_that("3 element test w1=0.1, w2=0.2", {
   v2 <- c(0.25, 0.70, 1.00)
   f1 <- make_function(x1, v1, w1, x2, v2, w2)
   res2 <- integrate(
-    f1, 0, max(x2[3], x1[3]) + max(w1, w2), abs.tol = 0.0000000001
+    f1, 0, max(x2[3], x1[3]) + max(w1, w2),
+    abs.tol = 0.0000000001
   )[[1]]
   res1 <- netemd_smooth(x1, v1, w1, x2, v2, w2)
 
@@ -332,10 +340,12 @@ test_that("Old failure case", {
   d2$locations <- c(0, 1, 2, 3)
   d2$masses <- c(8634, 1242, 114, 10)
   sq1 <- netemd_single_pair(
-    d1, d2, method = "optimise", smoothing_window_width = 1
+    d1, d2,
+    method = "optimise", smoothing_window_width = 1
   )
   sq2 <- netemd_single_pair(
-    d1, d2, method = "optimiseRonly", smoothing_window_width = 1
+    d1, d2,
+    method = "optimiseRonly", smoothing_window_width = 1
   )
   expect_lt(abs(sq1$min_emd - sq2$min_emd), 10**(-4))
 })