diff --git a/_bookdown.yml b/_bookdown.yml
index 82222f0..f41cdfe 100644
--- a/_bookdown.yml
+++ b/_bookdown.yml
@@ -27,6 +27,9 @@ rmd_files:
     "ch-workstation.md",
     "ch-tools.md",
     "ch-team.md",
+    "ch-redcap-user.md",
+    "ch-redcap-developer.md",
+    "ch-redcap-admin.md",
 
     # Appendices
     "ch-git.md",
diff --git a/ch-redcap-admin.md b/ch-redcap-admin.md
new file mode 100644
index 0000000..ff81c64
--- /dev/null
+++ b/ch-redcap-admin.md
@@ -0,0 +1,2 @@
+Material for REDCap Admins {#redcap-admin}
+====================================
diff --git a/ch-redcap-developer.md b/ch-redcap-developer.md
new file mode 100644
index 0000000..4f2aaae
--- /dev/null
+++ b/ch-redcap-developer.md
@@ -0,0 +1,3 @@
+Material for REDCap Developers {#redcap-developer}
+====================================
+
diff --git a/ch-redcap-user.md b/ch-redcap-user.md
new file mode 100644
index 0000000..21dbbcc
--- /dev/null
+++ b/ch-redcap-user.md
@@ -0,0 +1,9 @@
+Material for REDCap Users {#redcap-user}
+====================================
+
+Login {#redcap-user-login}
+------------------------------------
+
+Developing Reports {#redcap-user-report-develop}
+------------------------------------
+Please first read [Login](#redcap-user-login)
diff --git a/docs/acknowledgements.html b/docs/acknowledgements.html
index e262353..5cd50ab 100644
--- a/docs/acknowledgements.html
+++ b/docs/acknowledgements.html
@@ -24,7 +24,7 @@
 <meta name="author" content="Will Beasley" />
 
 
-<meta name="date" content="2021-05-20" />
+<meta name="date" content="2021-06-08" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1" />
   <meta name="apple-mobile-web-app-capable" content="yes" />
@@ -456,6 +456,13 @@
 <li class="chapter" data-level="20.2" data-path="team.html"><a href="team.html#training-to-data-science"><i class="fa fa-check"></i><b>20.2</b> Training to Data Science</a></li>
 <li class="chapter" data-level="20.3" data-path="team.html"><a href="team.html#bridges-outside-the-team"><i class="fa fa-check"></i><b>20.3</b> Bridges Outside the Team</a></li>
 </ul></li>
+<li class="chapter" data-level="21" data-path="redcap-user.html"><a href="redcap-user.html"><i class="fa fa-check"></i><b>21</b> Material for REDCap Users</a>
+<ul>
+<li class="chapter" data-level="21.1" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-login"><i class="fa fa-check"></i><b>21.1</b> Login</a></li>
+<li class="chapter" data-level="21.2" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-report-develop"><i class="fa fa-check"></i><b>21.2</b> Developing Reports</a></li>
+</ul></li>
+<li class="chapter" data-level="22" data-path="redcap-developer.html"><a href="redcap-developer.html"><i class="fa fa-check"></i><b>22</b> Material for REDCap Developers</a></li>
+<li class="chapter" data-level="23" data-path="redcap-admin.html"><a href="redcap-admin.html"><i class="fa fa-check"></i><b>23</b> Material for REDCap Admins</a></li>
 <li class="appendix"><span><b>Appendix</b></span></li>
 <li class="chapter" data-level="A" data-path="git.html"><a href="git.html"><i class="fa fa-check"></i><b>A</b> Git &amp; GitHub</a>
 <ul>
@@ -548,7 +555,7 @@ <h1>
           <div class="page-inner">
 
             <section class="normal" id="section-">
-<div id="acknowledgements" class="section level1" number="27">
+<div id="acknowledgements" class="section level1" number="30">
 <h1><span class="header-section-number">G</span> Acknowledgements</h1>
 <p>The authors thank all our colleagues for the discussions and experiences about data science that lead to this book. At OUHSC, this includes
 <a href="https://github.com/adrose">@adrose</a>,
diff --git a/docs/architecture.html b/docs/architecture.html
index c33821b..6547b42 100644
--- a/docs/architecture.html
+++ b/docs/architecture.html
@@ -24,7 +24,7 @@
 <meta name="author" content="Will Beasley" />
 
 
-<meta name="date" content="2021-05-20" />
+<meta name="date" content="2021-06-08" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1" />
   <meta name="apple-mobile-web-app-capable" content="yes" />
@@ -456,6 +456,13 @@
 <li class="chapter" data-level="20.2" data-path="team.html"><a href="team.html#training-to-data-science"><i class="fa fa-check"></i><b>20.2</b> Training to Data Science</a></li>
 <li class="chapter" data-level="20.3" data-path="team.html"><a href="team.html#bridges-outside-the-team"><i class="fa fa-check"></i><b>20.3</b> Bridges Outside the Team</a></li>
 </ul></li>
+<li class="chapter" data-level="21" data-path="redcap-user.html"><a href="redcap-user.html"><i class="fa fa-check"></i><b>21</b> Material for REDCap Users</a>
+<ul>
+<li class="chapter" data-level="21.1" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-login"><i class="fa fa-check"></i><b>21.1</b> Login</a></li>
+<li class="chapter" data-level="21.2" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-report-develop"><i class="fa fa-check"></i><b>21.2</b> Developing Reports</a></li>
+</ul></li>
+<li class="chapter" data-level="22" data-path="redcap-developer.html"><a href="redcap-developer.html"><i class="fa fa-check"></i><b>22</b> Material for REDCap Developers</a></li>
+<li class="chapter" data-level="23" data-path="redcap-admin.html"><a href="redcap-admin.html"><i class="fa fa-check"></i><b>23</b> Material for REDCap Admins</a></li>
 <li class="appendix"><span><b>Appendix</b></span></li>
 <li class="chapter" data-level="A" data-path="git.html"><a href="git.html"><i class="fa fa-check"></i><b>A</b> Git &amp; GitHub</a>
 <ul>
diff --git a/docs/automation.html b/docs/automation.html
index aa0182d..36b7e38 100644
--- a/docs/automation.html
+++ b/docs/automation.html
@@ -24,7 +24,7 @@
 <meta name="author" content="Will Beasley" />
 
 
-<meta name="date" content="2021-05-20" />
+<meta name="date" content="2021-06-08" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1" />
   <meta name="apple-mobile-web-app-capable" content="yes" />
@@ -456,6 +456,13 @@
 <li class="chapter" data-level="20.2" data-path="team.html"><a href="team.html#training-to-data-science"><i class="fa fa-check"></i><b>20.2</b> Training to Data Science</a></li>
 <li class="chapter" data-level="20.3" data-path="team.html"><a href="team.html#bridges-outside-the-team"><i class="fa fa-check"></i><b>20.3</b> Bridges Outside the Team</a></li>
 </ul></li>
+<li class="chapter" data-level="21" data-path="redcap-user.html"><a href="redcap-user.html"><i class="fa fa-check"></i><b>21</b> Material for REDCap Users</a>
+<ul>
+<li class="chapter" data-level="21.1" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-login"><i class="fa fa-check"></i><b>21.1</b> Login</a></li>
+<li class="chapter" data-level="21.2" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-report-develop"><i class="fa fa-check"></i><b>21.2</b> Developing Reports</a></li>
+</ul></li>
+<li class="chapter" data-level="22" data-path="redcap-developer.html"><a href="redcap-developer.html"><i class="fa fa-check"></i><b>22</b> Material for REDCap Developers</a></li>
+<li class="chapter" data-level="23" data-path="redcap-admin.html"><a href="redcap-admin.html"><i class="fa fa-check"></i><b>23</b> Material for REDCap Admins</a></li>
 <li class="appendix"><span><b>Appendix</b></span></li>
 <li class="chapter" data-level="A" data-path="git.html"><a href="git.html"><i class="fa fa-check"></i><b>A</b> Git &amp; GitHub</a>
 <ul>
diff --git a/docs/coding.html b/docs/coding.html
index 88e738d..7a35a40 100644
--- a/docs/coding.html
+++ b/docs/coding.html
@@ -24,7 +24,7 @@
 <meta name="author" content="Will Beasley" />
 
 
-<meta name="date" content="2021-05-20" />
+<meta name="date" content="2021-06-08" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1" />
   <meta name="apple-mobile-web-app-capable" content="yes" />
@@ -456,6 +456,13 @@
 <li class="chapter" data-level="20.2" data-path="team.html"><a href="team.html#training-to-data-science"><i class="fa fa-check"></i><b>20.2</b> Training to Data Science</a></li>
 <li class="chapter" data-level="20.3" data-path="team.html"><a href="team.html#bridges-outside-the-team"><i class="fa fa-check"></i><b>20.3</b> Bridges Outside the Team</a></li>
 </ul></li>
+<li class="chapter" data-level="21" data-path="redcap-user.html"><a href="redcap-user.html"><i class="fa fa-check"></i><b>21</b> Material for REDCap Users</a>
+<ul>
+<li class="chapter" data-level="21.1" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-login"><i class="fa fa-check"></i><b>21.1</b> Login</a></li>
+<li class="chapter" data-level="21.2" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-report-develop"><i class="fa fa-check"></i><b>21.2</b> Developing Reports</a></li>
+</ul></li>
+<li class="chapter" data-level="22" data-path="redcap-developer.html"><a href="redcap-developer.html"><i class="fa fa-check"></i><b>22</b> Material for REDCap Developers</a></li>
+<li class="chapter" data-level="23" data-path="redcap-admin.html"><a href="redcap-admin.html"><i class="fa fa-check"></i><b>23</b> Material for REDCap Admins</a></li>
 <li class="appendix"><span><b>Appendix</b></span></li>
 <li class="chapter" data-level="A" data-path="git.html"><a href="git.html"><i class="fa fa-check"></i><b>A</b> Git &amp; GitHub</a>
 <ul>
diff --git a/docs/collaboration.html b/docs/collaboration.html
index d681be4..036d719 100644
--- a/docs/collaboration.html
+++ b/docs/collaboration.html
@@ -24,7 +24,7 @@
 <meta name="author" content="Will Beasley" />
 
 
-<meta name="date" content="2021-05-20" />
+<meta name="date" content="2021-06-08" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1" />
   <meta name="apple-mobile-web-app-capable" content="yes" />
@@ -456,6 +456,13 @@
 <li class="chapter" data-level="20.2" data-path="team.html"><a href="team.html#training-to-data-science"><i class="fa fa-check"></i><b>20.2</b> Training to Data Science</a></li>
 <li class="chapter" data-level="20.3" data-path="team.html"><a href="team.html#bridges-outside-the-team"><i class="fa fa-check"></i><b>20.3</b> Bridges Outside the Team</a></li>
 </ul></li>
+<li class="chapter" data-level="21" data-path="redcap-user.html"><a href="redcap-user.html"><i class="fa fa-check"></i><b>21</b> Material for REDCap Users</a>
+<ul>
+<li class="chapter" data-level="21.1" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-login"><i class="fa fa-check"></i><b>21.1</b> Login</a></li>
+<li class="chapter" data-level="21.2" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-report-develop"><i class="fa fa-check"></i><b>21.2</b> Developing Reports</a></li>
+</ul></li>
+<li class="chapter" data-level="22" data-path="redcap-developer.html"><a href="redcap-developer.html"><i class="fa fa-check"></i><b>22</b> Material for REDCap Developers</a></li>
+<li class="chapter" data-level="23" data-path="redcap-admin.html"><a href="redcap-admin.html"><i class="fa fa-check"></i><b>23</b> Material for REDCap Admins</a></li>
 <li class="appendix"><span><b>Appendix</b></span></li>
 <li class="chapter" data-level="A" data-path="git.html"><a href="git.html"><i class="fa fa-check"></i><b>A</b> Git &amp; GitHub</a>
 <ul>
diff --git a/docs/document.html b/docs/document.html
index 205f7e1..b1bd1c5 100644
--- a/docs/document.html
+++ b/docs/document.html
@@ -24,7 +24,7 @@
 <meta name="author" content="Will Beasley" />
 
 
-<meta name="date" content="2021-05-20" />
+<meta name="date" content="2021-06-08" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1" />
   <meta name="apple-mobile-web-app-capable" content="yes" />
@@ -456,6 +456,13 @@
 <li class="chapter" data-level="20.2" data-path="team.html"><a href="team.html#training-to-data-science"><i class="fa fa-check"></i><b>20.2</b> Training to Data Science</a></li>
 <li class="chapter" data-level="20.3" data-path="team.html"><a href="team.html#bridges-outside-the-team"><i class="fa fa-check"></i><b>20.3</b> Bridges Outside the Team</a></li>
 </ul></li>
+<li class="chapter" data-level="21" data-path="redcap-user.html"><a href="redcap-user.html"><i class="fa fa-check"></i><b>21</b> Material for REDCap Users</a>
+<ul>
+<li class="chapter" data-level="21.1" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-login"><i class="fa fa-check"></i><b>21.1</b> Login</a></li>
+<li class="chapter" data-level="21.2" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-report-develop"><i class="fa fa-check"></i><b>21.2</b> Developing Reports</a></li>
+</ul></li>
+<li class="chapter" data-level="22" data-path="redcap-developer.html"><a href="redcap-developer.html"><i class="fa fa-check"></i><b>22</b> Material for REDCap Developers</a></li>
+<li class="chapter" data-level="23" data-path="redcap-admin.html"><a href="redcap-admin.html"><i class="fa fa-check"></i><b>23</b> Material for REDCap Admins</a></li>
 <li class="appendix"><span><b>Appendix</b></span></li>
 <li class="chapter" data-level="A" data-path="git.html"><a href="git.html"><i class="fa fa-check"></i><b>A</b> Git &amp; GitHub</a>
 <ul>
diff --git a/docs/example-chapter.html b/docs/example-chapter.html
index 516faae..aa15347 100644
--- a/docs/example-chapter.html
+++ b/docs/example-chapter.html
@@ -24,7 +24,7 @@
 <meta name="author" content="Will Beasley" />
 
 
-<meta name="date" content="2021-05-20" />
+<meta name="date" content="2021-06-08" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1" />
   <meta name="apple-mobile-web-app-capable" content="yes" />
@@ -456,6 +456,13 @@
 <li class="chapter" data-level="20.2" data-path="team.html"><a href="team.html#training-to-data-science"><i class="fa fa-check"></i><b>20.2</b> Training to Data Science</a></li>
 <li class="chapter" data-level="20.3" data-path="team.html"><a href="team.html#bridges-outside-the-team"><i class="fa fa-check"></i><b>20.3</b> Bridges Outside the Team</a></li>
 </ul></li>
+<li class="chapter" data-level="21" data-path="redcap-user.html"><a href="redcap-user.html"><i class="fa fa-check"></i><b>21</b> Material for REDCap Users</a>
+<ul>
+<li class="chapter" data-level="21.1" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-login"><i class="fa fa-check"></i><b>21.1</b> Login</a></li>
+<li class="chapter" data-level="21.2" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-report-develop"><i class="fa fa-check"></i><b>21.2</b> Developing Reports</a></li>
+</ul></li>
+<li class="chapter" data-level="22" data-path="redcap-developer.html"><a href="redcap-developer.html"><i class="fa fa-check"></i><b>22</b> Material for REDCap Developers</a></li>
+<li class="chapter" data-level="23" data-path="redcap-admin.html"><a href="redcap-admin.html"><i class="fa fa-check"></i><b>23</b> Material for REDCap Admins</a></li>
 <li class="appendix"><span><b>Appendix</b></span></li>
 <li class="chapter" data-level="A" data-path="git.html"><a href="git.html"><i class="fa fa-check"></i><b>A</b> Git &amp; GitHub</a>
 <ul>
@@ -548,13 +555,13 @@ <h1>
           <div class="page-inner">
 
             <section class="normal" id="section-">
-<div id="example-chapter" class="section level1" number="26">
+<div id="example-chapter" class="section level1" number="29">
 <h1><span class="header-section-number">F</span> Example Chapter</h1>
 <p><em>This intro was copied from the 1st chapter of the example bookdown repo. I’m keeping it temporarily for reference.</em></p>
 <p>You can label chapter and section titles using <code>{#label}</code> after them, e.g., we can reference the <a href="index.html#intro">Intro</a> Chapter. If you do not manually label them, there will be automatic labels anyway</p>
 <p>Figures and tables with captions will be placed in <code>figure</code> and <code>table</code> environments, respectively.</p>
-<div class="sourceCode" id="cb75"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb75-1"><a href="example-chapter.html#cb75-1" aria-hidden="true" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mar =</span> <span class="fu">c</span>(<span class="dv">4</span>, <span class="dv">4</span>, .<span class="dv">1</span>, .<span class="dv">1</span>))</span>
-<span id="cb75-2"><a href="example-chapter.html#cb75-2" aria-hidden="true" tabindex="-1"></a><span class="fu">plot</span>(pressure, <span class="at">type =</span> <span class="st">&#39;b&#39;</span>, <span class="at">pch =</span> <span class="dv">19</span>)</span></code></pre></div>
+<div class="sourceCode" id="cb76"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb76-1"><a href="example-chapter.html#cb76-1" aria-hidden="true" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mar =</span> <span class="fu">c</span>(<span class="dv">4</span>, <span class="dv">4</span>, .<span class="dv">1</span>, .<span class="dv">1</span>))</span>
+<span id="cb76-2"><a href="example-chapter.html#cb76-2" aria-hidden="true" tabindex="-1"></a><span class="fu">plot</span>(pressure, <span class="at">type =</span> <span class="st">&#39;b&#39;</span>, <span class="at">pch =</span> <span class="dv">19</span>)</span></code></pre></div>
 <div class="figure" style="text-align: center"><span id="fig:nice-fig"></span>
 <img src="data-science-practices-1_files/figure-html/nice-fig-1.png" alt="Here is a nice figure!" width="80%" />
 <p class="caption">
@@ -562,10 +569,10 @@ <h1><span class="header-section-number">F</span> Example Chapter</h1>
 </p>
 </div>
 <p>Reference a figure by its code chunk label with the <code>fig:</code> prefix, e.g., see Figure <a href="example-chapter.html#fig:nice-fig">F.1</a>. Similarly, you can reference tables generated from <code>knitr::kable()</code>, e.g., see Table <a href="example-chapter.html#tab:nice-tab">F.1</a>.</p>
-<div class="sourceCode" id="cb76"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb76-1"><a href="example-chapter.html#cb76-1" aria-hidden="true" tabindex="-1"></a>knitr<span class="sc">::</span><span class="fu">kable</span>(</span>
-<span id="cb76-2"><a href="example-chapter.html#cb76-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">head</span>(iris, <span class="dv">20</span>), <span class="at">caption =</span> <span class="st">&#39;Here is a nice table!&#39;</span>,</span>
-<span id="cb76-3"><a href="example-chapter.html#cb76-3" aria-hidden="true" tabindex="-1"></a>  <span class="at">booktabs =</span> <span class="cn">TRUE</span></span>
-<span id="cb76-4"><a href="example-chapter.html#cb76-4" aria-hidden="true" tabindex="-1"></a>)</span></code></pre></div>
+<div class="sourceCode" id="cb77"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb77-1"><a href="example-chapter.html#cb77-1" aria-hidden="true" tabindex="-1"></a>knitr<span class="sc">::</span><span class="fu">kable</span>(</span>
+<span id="cb77-2"><a href="example-chapter.html#cb77-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">head</span>(iris, <span class="dv">20</span>), <span class="at">caption =</span> <span class="st">&#39;Here is a nice table!&#39;</span>,</span>
+<span id="cb77-3"><a href="example-chapter.html#cb77-3" aria-hidden="true" tabindex="-1"></a>  <span class="at">booktabs =</span> <span class="cn">TRUE</span></span>
+<span id="cb77-4"><a href="example-chapter.html#cb77-4" aria-hidden="true" tabindex="-1"></a>)</span></code></pre></div>
 <table>
 <caption><span id="tab:nice-tab">Table F.1: </span>Here is a nice table!</caption>
 <thead>
diff --git a/docs/example-dashboard.html b/docs/example-dashboard.html
index 95fd801..48347c9 100644
--- a/docs/example-dashboard.html
+++ b/docs/example-dashboard.html
@@ -24,7 +24,7 @@
 <meta name="author" content="Will Beasley" />
 
 
-<meta name="date" content="2021-05-20" />
+<meta name="date" content="2021-06-08" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1" />
   <meta name="apple-mobile-web-app-capable" content="yes" />
@@ -456,6 +456,13 @@
 <li class="chapter" data-level="20.2" data-path="team.html"><a href="team.html#training-to-data-science"><i class="fa fa-check"></i><b>20.2</b> Training to Data Science</a></li>
 <li class="chapter" data-level="20.3" data-path="team.html"><a href="team.html#bridges-outside-the-team"><i class="fa fa-check"></i><b>20.3</b> Bridges Outside the Team</a></li>
 </ul></li>
+<li class="chapter" data-level="21" data-path="redcap-user.html"><a href="redcap-user.html"><i class="fa fa-check"></i><b>21</b> Material for REDCap Users</a>
+<ul>
+<li class="chapter" data-level="21.1" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-login"><i class="fa fa-check"></i><b>21.1</b> Login</a></li>
+<li class="chapter" data-level="21.2" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-report-develop"><i class="fa fa-check"></i><b>21.2</b> Developing Reports</a></li>
+</ul></li>
+<li class="chapter" data-level="22" data-path="redcap-developer.html"><a href="redcap-developer.html"><i class="fa fa-check"></i><b>22</b> Material for REDCap Developers</a></li>
+<li class="chapter" data-level="23" data-path="redcap-admin.html"><a href="redcap-admin.html"><i class="fa fa-check"></i><b>23</b> Material for REDCap Admins</a></li>
 <li class="appendix"><span><b>Appendix</b></span></li>
 <li class="chapter" data-level="A" data-path="git.html"><a href="git.html"><i class="fa fa-check"></i><b>A</b> Git &amp; GitHub</a>
 <ul>
@@ -548,7 +555,7 @@ <h1>
           <div class="page-inner">
 
             <section class="normal" id="section-">
-<div id="example-dashboard" class="section level1" number="25">
+<div id="example-dashboard" class="section level1" number="28">
 <h1><span class="header-section-number">E</span> Example Dashboard</h1>
 <p>Communicating quantitative trends to a community with a <a href="publication.html#publication-phobic">quantitative phobia</a> can be difficult. This appendix showcases a dashboard style that has evolved during the past few years of <a href="https://www.ok.gov/health/Family_Health/Family_Support_and_Prevention_Service/MIECHV_Program_-_Federal_Home_Visiting_Grant/About_the_MIECHV_Program_-_Federal_Home_Visiting_Grant/index.html">OSDH Home Visiting</a>, where twelve local programs practitioners implemented their own intervention ideas tailored to their interests and community.</p>
 <p>Over 50 dashboards have been developed: a custom dashboard is developed for each program’s cycle, and a three additional dashboards communicate the results of program-agnostic investigations. A style guide is an important tool when managing this many unique investigations</p>
@@ -559,17 +566,17 @@ <h1><span class="header-section-number">E</span> Example Dashboard</h1>
 <li>The lessons we’ve learned (and mistakes we’ve made) can be applied to later dashboards. The quality should improve and the development should quicken.</li>
 </ol>
 <p>Just like our CQI grant encourages an HV program to learn from its history and to learn from others, we as analysts should too. As we work with the programs to design a PDSA, each one analyst will learn about the strengths and weaknesses of our current dashboard style, and propose improvements.</p>
-<div id="example-dashboard-example" class="section level2" number="25.1">
+<div id="example-dashboard-example" class="section level2" number="28.1">
 <h2><span class="header-section-number">E.1</span> Example</h2>
 <p>A example dashboard that mimic the real CQI is available at <a href="https://ouhscbbmc.github.io/data-science-practices-1/dashboard-1.html">https://ouhscbbmc.github.io/data-science-practices-1/dashboard-1.html</a>. The dashboard source code is available in the <a href="https://github.com/wibeasley/RAnalysisSkeleton/tree/master/analysis/dashboard-1"><code>analysis/dashboard-1</code></a> directory of the <a href="https://github.com/wibeasley/RAnalysisSkeleton#readme">R Analysis Skeleton</a> repository’; this repo contains the code and documents the entire pipeline leading up to this dashboard.</p>
 <p>We’ve had success developing and distributing dashboards as self-contained html files. They are portable and don’t have dependencies on local data files or remote databases, yet the JavaScript and CSS provide a modest amount of interactivity. The dashboard’s principal components are <a href="https://rmarkdown.rstudio.com/flexdashboard/">flexdashboard</a>, <a href="https://plot.ly/r/">plotly</a>, <a href="https://ggplot2.tidyverse.org/">ggplot2</a>, and <a href="https://rmarkdown.rstudio.com/">R Markdown</a>.</p>
 <p>In this dashboard of synthetic data, a cognitive measure is tracked across 14 years in three home visiting counties.</p>
 </div>
-<div id="example-dashboard-guide" class="section level2" number="25.2">
+<div id="example-dashboard-guide" class="section level2" number="28.2">
 <h2><span class="header-section-number">E.2</span> Style Guide</h2>
 <p>This section describes a set of practices that the <a href="https://ouhsc.edu/bbmc/">BBMC analysts</a> have decided are best for the CQI dashboards used in our MIECHV evaluations. In a sense, this CQI dashboard guide supplements our overall <a href="style.html#style">style guide</a>.</p>
 <p>The MIECHV CQI dashboards are based on RStudio’s <a href="https://rmarkdown.rstudio.com/flexdashboard/">flexdashboard</a> package, which uses <a href="https://CRAN.R-project.org/package=rmarkdown">rmarkdown</a>, JavaScript, and CSS. flexdashboard has a <a href="https://rmarkdown.rstudio.com/flexdashboard/">great website</a> that should be read by anyone adapting this guide for their own CQI projects.</p>
-<div id="headline-page" class="section level3" number="25.2.1">
+<div id="headline-page" class="section level3" number="28.2.1">
 <h3><span class="header-section-number">E.2.1</span> Headline page</h3>
 <p><a href="https://ouhscbbmc.github.io/data-science-practices-1/dashboard-1.html#headline">
 <img src="resources/example-dashboard/headline.png" style="width: 2000px;"/>
@@ -592,7 +599,7 @@ <h3><span class="header-section-number">E.2.1</span> Headline page</h3>
 </ul></li>
 </ol>
 </div>
-<div id="tables-page" class="section level3" number="25.2.2">
+<div id="tables-page" class="section level3" number="28.2.2">
 <h3><span class="header-section-number">E.2.2</span> Tables page</h3>
 <p><a href="https://ouhscbbmc.github.io/data-science-practices-1/dashboard-1.html#tables">
 <img src="resources/example-dashboard/tables.png" style="width: 2000px;"/>
@@ -612,7 +619,7 @@ <h3><span class="header-section-number">E.2.2</span> Tables page</h3>
 </ol></li>
 </ol>
 </div>
-<div id="graphs-page" class="section level3" number="25.2.3">
+<div id="graphs-page" class="section level3" number="28.2.3">
 <h3><span class="header-section-number">E.2.3</span> Graphs page</h3>
 <p><a href="https://ouhscbbmc.github.io/data-science-practices-1/dashboard-1.html#graphs">
 <img src="resources/example-dashboard/graphs.png" style="width: 2000px;"/>
@@ -637,7 +644,7 @@ <h3><span class="header-section-number">E.2.3</span> Graphs page</h3>
 <li>use <code>spaghetti_2()</code> located in <a href="https://github.com/OuhscBbmc/miechv-3/blob/master/analysis/common/display-1.R">display-1.R</a>. <strong>(not yet developed.)</strong> Add hover text to each spaghetti.</li>
 </ul>
 </div>
-<div id="marginal-graphs-page" class="section level3" number="25.2.4">
+<div id="marginal-graphs-page" class="section level3" number="28.2.4">
 <h3><span class="header-section-number">E.2.4</span> Marginal Graphs page</h3>
 <p><a href="https://ouhscbbmc.github.io/data-science-practices-1/dashboard-1.html#marginals">
 <img src="resources/example-dashboard/marginals.png" style="width: 2000px;"/>
@@ -657,7 +664,7 @@ <h3><span class="header-section-number">E.2.4</span> Marginal Graphs page</h3>
 <li><p>histograms have a more specific <em>y</em>-axis. For example, “Count of Months” instead of “Frequency”</p></li>
 </ul>
 </div>
-<div id="documentation-page" class="section level3" number="25.2.5">
+<div id="documentation-page" class="section level3" number="28.2.5">
 <h3><span class="header-section-number">E.2.5</span> Documentation page</h3>
 <p><a href="https://ouhscbbmc.github.io/data-science-practices-1/dashboard-1.html#documentation">
 <img src="resources/example-dashboard/documentation.png" style="width: 2000px;"/>
@@ -676,7 +683,7 @@ <h3><span class="header-section-number">E.2.5</span> Documentation page</h3>
 </ol></li>
 </ol>
 </div>
-<div id="miscellaneous-notes" class="section level3" number="25.2.6">
+<div id="miscellaneous-notes" class="section level3" number="28.2.6">
 <h3><span class="header-section-number">E.2.6</span> Miscellaneous Notes</h3>
 <ul>
 <li><p>The hierarchy level in this outline indicates the HTML-heading level. Numbers are H1 (<em>i.e.</em>, <code>======</code>) that specify pages, roman numerals are H2 (<em>i.e.</em>, <code>------</code>) that specify columns, and letters are H3 (<em>i.e.</em>, <code>###</code>) that specify tabs.</p></li>
@@ -692,19 +699,19 @@ <h3><span class="header-section-number">E.2.6</span> Miscellaneous Notes</h3>
 </ul>
 </div>
 </div>
-<div id="example-dashboard-architecture" class="section level2" number="25.3">
+<div id="example-dashboard-architecture" class="section level2" number="28.3">
 <h2><span class="header-section-number">E.3</span> Architecture</h2>
 <p>The dashboard is only one piece of a large workflow. The design and construction of this workflow are discussed in this book, which are highlighted below.</p>
 <p><a href="https://github.com/wibeasley/RAnalysisSkeleton#intra-individual-differences">
 <img src="https://github.com/wibeasley/RAnalysisSkeleton/blob/master/documentation/images/flow-skeleton.png?raw=true" style="width: 1000px;"/>.
 </a></p>
-<div id="data-from-external-system" class="section level3" number="25.3.1">
+<div id="data-from-external-system" class="section level3" number="28.3.1">
 <h3><span class="header-section-number">E.3.1</span> Data from External System</h3>
 </div>
-<div id="groomed-data-in-warehouse" class="section level3" number="25.3.2">
+<div id="groomed-data-in-warehouse" class="section level3" number="28.3.2">
 <h3><span class="header-section-number">E.3.2</span> Groomed Data in Warehouse</h3>
 </div>
-<div id="analysis-ready-dataset" class="section level3" number="25.3.3">
+<div id="analysis-ready-dataset" class="section level3" number="28.3.3">
 <h3><span class="header-section-number">E.3.3</span> Analysis-Ready Dataset</h3>
 <ul>
 <li><p>Very little data manipulation should occur in the dashboard. The upstream <a href="patterns.html#pattern-scribe">scribe</a> should produce an analysis-ready rds file. The dashboard should be concerned only with presenting the graphs, tables, summary text, and documentation.</p></li>
diff --git a/docs/file-prototype-r.html b/docs/file-prototype-r.html
index bec2d7f..6306335 100644
--- a/docs/file-prototype-r.html
+++ b/docs/file-prototype-r.html
@@ -24,7 +24,7 @@
 <meta name="author" content="Will Beasley" />
 
 
-<meta name="date" content="2021-05-20" />
+<meta name="date" content="2021-06-08" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1" />
   <meta name="apple-mobile-web-app-capable" content="yes" />
@@ -456,6 +456,13 @@
 <li class="chapter" data-level="20.2" data-path="team.html"><a href="team.html#training-to-data-science"><i class="fa fa-check"></i><b>20.2</b> Training to Data Science</a></li>
 <li class="chapter" data-level="20.3" data-path="team.html"><a href="team.html#bridges-outside-the-team"><i class="fa fa-check"></i><b>20.3</b> Bridges Outside the Team</a></li>
 </ul></li>
+<li class="chapter" data-level="21" data-path="redcap-user.html"><a href="redcap-user.html"><i class="fa fa-check"></i><b>21</b> Material for REDCap Users</a>
+<ul>
+<li class="chapter" data-level="21.1" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-login"><i class="fa fa-check"></i><b>21.1</b> Login</a></li>
+<li class="chapter" data-level="21.2" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-report-develop"><i class="fa fa-check"></i><b>21.2</b> Developing Reports</a></li>
+</ul></li>
+<li class="chapter" data-level="22" data-path="redcap-developer.html"><a href="redcap-developer.html"><i class="fa fa-check"></i><b>22</b> Material for REDCap Developers</a></li>
+<li class="chapter" data-level="23" data-path="redcap-admin.html"><a href="redcap-admin.html"><i class="fa fa-check"></i><b>23</b> Material for REDCap Admins</a></li>
 <li class="appendix"><span><b>Appendix</b></span></li>
 <li class="chapter" data-level="A" data-path="git.html"><a href="git.html"><i class="fa fa-check"></i><b>A</b> Git &amp; GitHub</a>
 <ul>
diff --git a/docs/file-prototype-sql.html b/docs/file-prototype-sql.html
index df08012..25cd058 100644
--- a/docs/file-prototype-sql.html
+++ b/docs/file-prototype-sql.html
@@ -24,7 +24,7 @@
 <meta name="author" content="Will Beasley" />
 
 
-<meta name="date" content="2021-05-20" />
+<meta name="date" content="2021-06-08" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1" />
   <meta name="apple-mobile-web-app-capable" content="yes" />
@@ -456,6 +456,13 @@
 <li class="chapter" data-level="20.2" data-path="team.html"><a href="team.html#training-to-data-science"><i class="fa fa-check"></i><b>20.2</b> Training to Data Science</a></li>
 <li class="chapter" data-level="20.3" data-path="team.html"><a href="team.html#bridges-outside-the-team"><i class="fa fa-check"></i><b>20.3</b> Bridges Outside the Team</a></li>
 </ul></li>
+<li class="chapter" data-level="21" data-path="redcap-user.html"><a href="redcap-user.html"><i class="fa fa-check"></i><b>21</b> Material for REDCap Users</a>
+<ul>
+<li class="chapter" data-level="21.1" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-login"><i class="fa fa-check"></i><b>21.1</b> Login</a></li>
+<li class="chapter" data-level="21.2" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-report-develop"><i class="fa fa-check"></i><b>21.2</b> Developing Reports</a></li>
+</ul></li>
+<li class="chapter" data-level="22" data-path="redcap-developer.html"><a href="redcap-developer.html"><i class="fa fa-check"></i><b>22</b> Material for REDCap Developers</a></li>
+<li class="chapter" data-level="23" data-path="redcap-admin.html"><a href="redcap-admin.html"><i class="fa fa-check"></i><b>23</b> Material for REDCap Admins</a></li>
 <li class="appendix"><span><b>Appendix</b></span></li>
 <li class="chapter" data-level="A" data-path="git.html"><a href="git.html"><i class="fa fa-check"></i><b>A</b> Git &amp; GitHub</a>
 <ul>
diff --git a/docs/git.html b/docs/git.html
index e91a618..b634727 100644
--- a/docs/git.html
+++ b/docs/git.html
@@ -24,14 +24,14 @@
 <meta name="author" content="Will Beasley" />
 
 
-<meta name="date" content="2021-05-20" />
+<meta name="date" content="2021-06-08" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1" />
   <meta name="apple-mobile-web-app-capable" content="yes" />
   <meta name="apple-mobile-web-app-status-bar-style" content="black" />
   
   
-<link rel="prev" href="team.html"/>
+<link rel="prev" href="redcap-admin.html"/>
 <link rel="next" href="snippets.html"/>
 <script src="libs/header-attrs-2.8/header-attrs.js"></script>
 <script src="libs/jquery-2.2.3/jquery.min.js"></script>
@@ -456,6 +456,13 @@
 <li class="chapter" data-level="20.2" data-path="team.html"><a href="team.html#training-to-data-science"><i class="fa fa-check"></i><b>20.2</b> Training to Data Science</a></li>
 <li class="chapter" data-level="20.3" data-path="team.html"><a href="team.html#bridges-outside-the-team"><i class="fa fa-check"></i><b>20.3</b> Bridges Outside the Team</a></li>
 </ul></li>
+<li class="chapter" data-level="21" data-path="redcap-user.html"><a href="redcap-user.html"><i class="fa fa-check"></i><b>21</b> Material for REDCap Users</a>
+<ul>
+<li class="chapter" data-level="21.1" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-login"><i class="fa fa-check"></i><b>21.1</b> Login</a></li>
+<li class="chapter" data-level="21.2" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-report-develop"><i class="fa fa-check"></i><b>21.2</b> Developing Reports</a></li>
+</ul></li>
+<li class="chapter" data-level="22" data-path="redcap-developer.html"><a href="redcap-developer.html"><i class="fa fa-check"></i><b>22</b> Material for REDCap Developers</a></li>
+<li class="chapter" data-level="23" data-path="redcap-admin.html"><a href="redcap-admin.html"><i class="fa fa-check"></i><b>23</b> Material for REDCap Admins</a></li>
 <li class="appendix"><span><b>Appendix</b></span></li>
 <li class="chapter" data-level="A" data-path="git.html"><a href="git.html"><i class="fa fa-check"></i><b>A</b> Git &amp; GitHub</a>
 <ul>
@@ -548,9 +555,9 @@ <h1>
           <div class="page-inner">
 
             <section class="normal" id="section-">
-<div id="git" class="section level1" number="21">
+<div id="git" class="section level1" number="24">
 <h1><span class="header-section-number">A</span> Git &amp; GitHub</h1>
-<div id="git-code" class="section level2" number="21.1">
+<div id="git-code" class="section level2" number="24.1">
 <h2><span class="header-section-number">A.1</span> for Code Development</h2>
 <p><a href="https://github.com/jennybc">Jenny Bryan</a> and <a href="https://github.com/jimhester">Jim Hester</a> have published a thorough description of using Git from a data scientist’s perspective (<a href="https://happygitwithr.com/">Happy Git and GitHub for the useR</a>), and we recommend following their guidance. It is consistent with our approach, with a few exceptions noted below. A complementary resource is <em><a href="https://smile.amazon.com/dp/1449302440">Team Geek</a></em>, which has insightful advice for the human and collaborative aspects of version control.</p>
 <p>Other Resources</p>
@@ -558,7 +565,7 @@ <h2><span class="header-section-number">A.1</span> for Code Development</h2>
 <li><a href="https://blog.travis-ci.com/2019-05-30-setting-up-a-ci-cd-process-on-github">Setting up a CI/CD Process on GitHub with Travis CI</a>. Travis-CI blob from August 2019.</li>
 </ol>
 </div>
-<div id="git-collaboration" class="section level2" number="21.2">
+<div id="git-collaboration" class="section level2" number="24.2">
 <h2><span class="header-section-number">A.2</span> for Collaboration</h2>
 <ol style="list-style-type: decimal">
 <li><p>Somewhat separate from it’s version control capabilities, GitHub provides built-in tools for coordinating projects across people and time. This tools revolves around <a href="https://guides.github.com/features/issues/">GitHub Issues</a>, which allow teammates to</p></li>
@@ -581,7 +588,7 @@ <h2><span class="header-section-number">A.2</span> for Collaboration</h2>
 </ol>
 <!-- Consider including good examples, like https://github.com/OuhscBbmc/miechv-3/issues/2073.  Probably shorten some for clarity. -->
 </div>
-<div id="git-stability" class="section level2" number="21.3">
+<div id="git-stability" class="section level2" number="24.3">
 <h2><span class="header-section-number">A.3</span> for Stability</h2>
 <ol style="list-style-type: decimal">
 <li><p>Review Git commits closely</p>
@@ -592,14 +599,14 @@ <h2><span class="header-section-number">A.3</span> for Stability</h2>
 </ol></li>
 </ol>
 </div>
-<div id="git-collaborators" class="section level2" number="21.4">
+<div id="git-collaborators" class="section level2" number="24.4">
 <h2><span class="header-section-number">A.4</span> for New Collaborators</h2>
 </div>
-<div id="git-contribution" class="section level2" number="21.5">
+<div id="git-contribution" class="section level2" number="24.5">
 <h2><span class="header-section-number">A.5</span> Steps for Contributing to Repo</h2>
-<div id="git-contribution-regular" class="section level3" number="21.5.1">
+<div id="git-contribution-regular" class="section level3" number="24.5.1">
 <h3><span class="header-section-number">A.5.1</span> Regular Contributions</h3>
-<div id="git-contribution-regular-pull" class="section level4" number="21.5.1.1">
+<div id="git-contribution-regular-pull" class="section level4" number="24.5.1.1">
 <h4><span class="header-section-number">A.5.1.1</span> Keep your dev branch fresh</h4>
 <p>We recommend doing this at least every day you write code in a repo. Perhaps more frequently if a lot of developers are pushing code (<em>e.g.</em>, right before a reporting deadline).</p>
 <ol style="list-style-type: decimal">
@@ -608,7 +615,7 @@ <h4><span class="header-section-number">A.5.1.1</span> Keep your dev branch fres
 <li>Push your local dev branch to the GitHub server</li>
 </ol>
 </div>
-<div id="git-contribution-regular-push" class="section level4" number="21.5.1.2">
+<div id="git-contribution-regular-push" class="section level4" number="24.5.1.2">
 <h4><span class="header-section-number">A.5.1.2</span> Make your code contributions available to other analysts</h4>
 <p>At least every few days, push your changes to the master branch so teammates can benefit from your work. Especially if you are improving the pipeline code (<em>e.g.</em> Ellises or REDCap Arches)</p>
 <ol style="list-style-type: decimal">
@@ -635,7 +642,7 @@ <h4><span class="header-section-number">A.5.1.2</span> Make your code contributi
           </div>
         </div>
       </div>
-<a href="team.html" class="navigation navigation-prev " aria-label="Previous page"><i class="fa fa-angle-left"></i></a>
+<a href="redcap-admin.html" class="navigation navigation-prev " aria-label="Previous page"><i class="fa fa-angle-left"></i></a>
 <a href="snippets.html" class="navigation navigation-next " aria-label="Next page"><i class="fa fa-angle-right"></i></a>
     </div>
   </div>
diff --git a/docs/index.html b/docs/index.html
index d52b834..9ea8de4 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -24,7 +24,7 @@
 <meta name="author" content="Will Beasley" />
 
 
-<meta name="date" content="2021-05-20" />
+<meta name="date" content="2021-06-08" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1" />
   <meta name="apple-mobile-web-app-capable" content="yes" />
@@ -456,6 +456,13 @@
 <li class="chapter" data-level="20.2" data-path="team.html"><a href="team.html#training-to-data-science"><i class="fa fa-check"></i><b>20.2</b> Training to Data Science</a></li>
 <li class="chapter" data-level="20.3" data-path="team.html"><a href="team.html#bridges-outside-the-team"><i class="fa fa-check"></i><b>20.3</b> Bridges Outside the Team</a></li>
 </ul></li>
+<li class="chapter" data-level="21" data-path="redcap-user.html"><a href="redcap-user.html"><i class="fa fa-check"></i><b>21</b> Material for REDCap Users</a>
+<ul>
+<li class="chapter" data-level="21.1" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-login"><i class="fa fa-check"></i><b>21.1</b> Login</a></li>
+<li class="chapter" data-level="21.2" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-report-develop"><i class="fa fa-check"></i><b>21.2</b> Developing Reports</a></li>
+</ul></li>
+<li class="chapter" data-level="22" data-path="redcap-developer.html"><a href="redcap-developer.html"><i class="fa fa-check"></i><b>22</b> Material for REDCap Developers</a></li>
+<li class="chapter" data-level="23" data-path="redcap-admin.html"><a href="redcap-admin.html"><i class="fa fa-check"></i><b>23</b> Material for REDCap Admins</a></li>
 <li class="appendix"><span><b>Appendix</b></span></li>
 <li class="chapter" data-level="A" data-path="git.html"><a href="git.html"><i class="fa fa-check"></i><b>A</b> Git &amp; GitHub</a>
 <ul>
@@ -551,7 +558,7 @@ <h1>
 <div id="header">
 <h1 class="title">Collaborative Data Science Practices</h1>
 <p class="author"><em>Will Beasley</em></p>
-<p class="date" style="margin-top: 1.5em;"><em>2021-05-20</em></p>
+<p class="date" style="margin-top: 1.5em;"><em>2021-06-08</em></p>
 </div>
 <div id="intro" class="section level1" number="1">
 <h1><span class="header-section-number">Chapter 1</span> Introduction</h1>
diff --git a/docs/patterns.html b/docs/patterns.html
index c1fc69f..8118a98 100644
--- a/docs/patterns.html
+++ b/docs/patterns.html
@@ -24,7 +24,7 @@
 <meta name="author" content="Will Beasley" />
 
 
-<meta name="date" content="2021-05-20" />
+<meta name="date" content="2021-06-08" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1" />
   <meta name="apple-mobile-web-app-capable" content="yes" />
@@ -456,6 +456,13 @@
 <li class="chapter" data-level="20.2" data-path="team.html"><a href="team.html#training-to-data-science"><i class="fa fa-check"></i><b>20.2</b> Training to Data Science</a></li>
 <li class="chapter" data-level="20.3" data-path="team.html"><a href="team.html#bridges-outside-the-team"><i class="fa fa-check"></i><b>20.3</b> Bridges Outside the Team</a></li>
 </ul></li>
+<li class="chapter" data-level="21" data-path="redcap-user.html"><a href="redcap-user.html"><i class="fa fa-check"></i><b>21</b> Material for REDCap Users</a>
+<ul>
+<li class="chapter" data-level="21.1" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-login"><i class="fa fa-check"></i><b>21.1</b> Login</a></li>
+<li class="chapter" data-level="21.2" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-report-develop"><i class="fa fa-check"></i><b>21.2</b> Developing Reports</a></li>
+</ul></li>
+<li class="chapter" data-level="22" data-path="redcap-developer.html"><a href="redcap-developer.html"><i class="fa fa-check"></i><b>22</b> Material for REDCap Developers</a></li>
+<li class="chapter" data-level="23" data-path="redcap-admin.html"><a href="redcap-admin.html"><i class="fa fa-check"></i><b>23</b> Material for REDCap Admins</a></li>
 <li class="appendix"><span><b>Appendix</b></span></li>
 <li class="chapter" data-level="A" data-path="git.html"><a href="git.html"><i class="fa fa-check"></i><b>A</b> Git &amp; GitHub</a>
 <ul>
diff --git a/docs/presentations.html b/docs/presentations.html
index ef732a8..ee4efe3 100644
--- a/docs/presentations.html
+++ b/docs/presentations.html
@@ -24,7 +24,7 @@
 <meta name="author" content="Will Beasley" />
 
 
-<meta name="date" content="2021-05-20" />
+<meta name="date" content="2021-06-08" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1" />
   <meta name="apple-mobile-web-app-capable" content="yes" />
@@ -456,6 +456,13 @@
 <li class="chapter" data-level="20.2" data-path="team.html"><a href="team.html#training-to-data-science"><i class="fa fa-check"></i><b>20.2</b> Training to Data Science</a></li>
 <li class="chapter" data-level="20.3" data-path="team.html"><a href="team.html#bridges-outside-the-team"><i class="fa fa-check"></i><b>20.3</b> Bridges Outside the Team</a></li>
 </ul></li>
+<li class="chapter" data-level="21" data-path="redcap-user.html"><a href="redcap-user.html"><i class="fa fa-check"></i><b>21</b> Material for REDCap Users</a>
+<ul>
+<li class="chapter" data-level="21.1" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-login"><i class="fa fa-check"></i><b>21.1</b> Login</a></li>
+<li class="chapter" data-level="21.2" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-report-develop"><i class="fa fa-check"></i><b>21.2</b> Developing Reports</a></li>
+</ul></li>
+<li class="chapter" data-level="22" data-path="redcap-developer.html"><a href="redcap-developer.html"><i class="fa fa-check"></i><b>22</b> Material for REDCap Developers</a></li>
+<li class="chapter" data-level="23" data-path="redcap-admin.html"><a href="redcap-admin.html"><i class="fa fa-check"></i><b>23</b> Material for REDCap Admins</a></li>
 <li class="appendix"><span><b>Appendix</b></span></li>
 <li class="chapter" data-level="A" data-path="git.html"><a href="git.html"><i class="fa fa-check"></i><b>A</b> Git &amp; GitHub</a>
 <ul>
@@ -548,17 +555,17 @@ <h1>
           <div class="page-inner">
 
             <section class="normal" id="section-">
-<div id="presentations" class="section level1" number="23">
+<div id="presentations" class="section level1" number="26">
 <h1><span class="header-section-number">C</span> Presentations</h1>
 <p>Here is a collection of presentations by the BBMC and friends that may help demonstrate concepts discussed in the previous chapters.</p>
-<div id="cdw" class="section level2" number="23.1">
+<div id="cdw" class="section level2" number="26.1">
 <h2><span class="header-section-number">C.1</span> CDW</h2>
 <ol style="list-style-type: decimal">
 <li><strong><a href="https://github.com/OuhscBbmc/prairie-outpost-public">prairie-outpost-public</a></strong>: Documentation and starter files for OUHSC’s Clinical Data Warehouse.</li>
 <li><strong><a href="https://github.com/OuhscBbmc/BbmcResources/blob/master/Publications/presentation-2015-11-18-cdw-bse/clinical-data-warehouse-bse.pdf">OUHSC CDW</a></strong></li>
 </ol>
 </div>
-<div id="redcap" class="section level2" number="23.2">
+<div id="redcap" class="section level2" number="26.2">
 <h2><span class="header-section-number">C.2</span> REDCap</h2>
 <ol style="list-style-type: decimal">
 <li><strong><a href="https://github.com/OuhscBbmc/BbmcResources/blob/master/Publications/Presentation2015-09-REDCapCon/REDCapIntegration.pdf">REDCap Systems Integration</a></strong>. <a href="https://projectredcap.org/about/redcapcon/">REDCap Con</a> 2015, Portland, Oregon.</li>
@@ -567,7 +574,7 @@ <h2><span class="header-section-number">C.2</span> REDCap</h2>
 <li><strong><a href="https://github.com/OuhscBbmc/StatisticalComputing/blob/master/2013_Presentations/03_March/RedcapForUserGroup.pptx">Optimizing Study Management using REDCap, R, and other software tools</a></strong>. <a href="https://github.com/OuhscBbmc/StatisticalComputing">SCUG</a> 2013.</li>
 </ol>
 </div>
-<div id="reproducible-research-visualization" class="section level2" number="23.3">
+<div id="reproducible-research-visualization" class="section level2" number="26.3">
 <h2><span class="header-section-number">C.3</span> Reproducible Research &amp; Visualization</h2>
 <ol style="list-style-type: decimal">
 <li><strong><a href="https://github.com/dss-ialh/displaying-health-data/blob/master/documentation/products/beasley/dhd-2018-uvic-3-a-beasley-2018-11-29.pdf">Building pipelines and dashboards for practitioners</a></strong>: Mobilizing knowledge with reproducible reporting. <a href="https://github.com/dss-ialh/displaying-health-data">Displaying Health Data Colloquium</a> 2018, University of Victoria.</li>
@@ -576,7 +583,7 @@ <h2><span class="header-section-number">C.3</span> Reproducible Research &amp; V
 <li><strong><a href="https://github.com/OuhscBbmc/Wats">WATS: wrap-around time series</a></strong>: Code to accompany WATS Plot article, 2014.</li>
 </ol>
 </div>
-<div id="data-management" class="section level2" number="23.4">
+<div id="data-management" class="section level2" number="26.4">
 <h2><span class="header-section-number">C.4</span> Data Management</h2>
 <ol style="list-style-type: decimal">
 <li><strong><a href="https://rawgit.com/OuhscBbmc/StatisticalComputing/master/2016-presentations/11-november/beasley-scug-validator-2016-11.html#/">BBMC Validator</a></strong>: catch and communicate data errors. <a href="https://github.com/OuhscBbmc/StatisticalComputing">SCUG</a> 2016.</li>
@@ -584,13 +591,13 @@ <h2><span class="header-section-number">C.4</span> Data Management</h2>
 <li><strong><a href="https://rawgit.com/wibeasley/RAnalysisSkeleton/master/documentation/time-and-effort-synthesis.html#/">Time and Effort Data Synthesis</a></strong>. <a href="https://github.com/OuhscBbmc/StatisticalComputing">SCUG</a> 2015.</li>
 </ol>
 </div>
-<div id="github-1" class="section level2" number="23.5">
+<div id="github-1" class="section level2" number="26.5">
 <h2><span class="header-section-number">C.5</span> GitHub</h2>
 <ol style="list-style-type: decimal">
 <li><strong><a href="https://github.com/OuhscBbmc/BbmcResources/blob/master/Publications/Presentation2015-08-GitHub/beasley-github-2015-08.md">Scientific Collaboration with GitHub</a></strong>. <a href="https://github.com/bwawrik/MBIO5810">OU Bioinformatics Breakfast Club</a> 2015.</li>
 </ol>
 </div>
-<div id="software" class="section level2" number="23.6">
+<div id="software" class="section level2" number="26.6">
 <h2><span class="header-section-number">C.6</span> Software</h2>
 <ol style="list-style-type: decimal">
 <li><strong><a href="https://ouhscbbmc.github.io/REDCapR/">REDCapR</a></strong>: Interaction Between R and REDCap.</li>
@@ -599,7 +606,7 @@ <h2><span class="header-section-number">C.6</span> Software</h2>
 <li><strong><a href="https://github.com/OuhscBbmc/usnavy-billets">usnavy billets</a></strong>: Optimally assigning naval officers to billets.</li>
 </ol>
 </div>
-<div id="architectures" class="section level2" number="23.7">
+<div id="architectures" class="section level2" number="26.7">
 <h2><span class="header-section-number">C.7</span> Architectures</h2>
 <ol style="list-style-type: decimal">
 <li><p>Linear Pipeline of the <a href="https://github.com/wibeasley/RAnalysisSkeleton">R Analysis Skeleton</a></p>
@@ -624,7 +631,7 @@ <h2><span class="header-section-number">C.7</span> Architectures</h2>
 </a></p></li>
 </ol>
 </div>
-<div id="components" class="section level2" number="23.8">
+<div id="components" class="section level2" number="26.8">
 <h2><span class="header-section-number">C.8</span> Components</h2>
 <ol style="list-style-type: decimal">
 <li><strong><a href="https://raw.githack.com/OuhscBbmc/StatisticalComputing/master/2018-presentations/10-october/table-styling.html#/">Customizing display tables: using css with DT and kableExtra</a></strong>. <a href="https://github.com/OuhscBbmc/StatisticalComputing">SCUG</a> 2018.</li>
diff --git a/docs/publication.html b/docs/publication.html
index 826c2fb..7139510 100644
--- a/docs/publication.html
+++ b/docs/publication.html
@@ -24,7 +24,7 @@
 <meta name="author" content="Will Beasley" />
 
 
-<meta name="date" content="2021-05-20" />
+<meta name="date" content="2021-06-08" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1" />
   <meta name="apple-mobile-web-app-capable" content="yes" />
@@ -456,6 +456,13 @@
 <li class="chapter" data-level="20.2" data-path="team.html"><a href="team.html#training-to-data-science"><i class="fa fa-check"></i><b>20.2</b> Training to Data Science</a></li>
 <li class="chapter" data-level="20.3" data-path="team.html"><a href="team.html#bridges-outside-the-team"><i class="fa fa-check"></i><b>20.3</b> Bridges Outside the Team</a></li>
 </ul></li>
+<li class="chapter" data-level="21" data-path="redcap-user.html"><a href="redcap-user.html"><i class="fa fa-check"></i><b>21</b> Material for REDCap Users</a>
+<ul>
+<li class="chapter" data-level="21.1" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-login"><i class="fa fa-check"></i><b>21.1</b> Login</a></li>
+<li class="chapter" data-level="21.2" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-report-develop"><i class="fa fa-check"></i><b>21.2</b> Developing Reports</a></li>
+</ul></li>
+<li class="chapter" data-level="22" data-path="redcap-developer.html"><a href="redcap-developer.html"><i class="fa fa-check"></i><b>22</b> Material for REDCap Developers</a></li>
+<li class="chapter" data-level="23" data-path="redcap-admin.html"><a href="redcap-admin.html"><i class="fa fa-check"></i><b>23</b> Material for REDCap Admins</a></li>
 <li class="appendix"><span><b>Appendix</b></span></li>
 <li class="chapter" data-level="A" data-path="git.html"><a href="git.html"><i class="fa fa-check"></i><b>A</b> Git &amp; GitHub</a>
 <ul>
diff --git a/docs/redcap-admin.html b/docs/redcap-admin.html
new file mode 100644
index 0000000..635bb1c
--- /dev/null
+++ b/docs/redcap-admin.html
@@ -0,0 +1,624 @@
+<!DOCTYPE html>
+<html lang="" xml:lang="">
+<head>
+
+  <meta charset="utf-8" />
+  <meta http-equiv="X-UA-Compatible" content="IE=edge" />
+  <title>Chapter 23 Material for REDCap Admins | Collaborative Data Science Practices</title>
+  <meta name="description" content="Collection of publicly available practices of data science and analysis." />
+  <meta name="generator" content="bookdown 0.22 and GitBook 2.6.7" />
+
+  <meta property="og:title" content="Chapter 23 Material for REDCap Admins | Collaborative Data Science Practices" />
+  <meta property="og:type" content="book" />
+  
+  
+  <meta property="og:description" content="Collection of publicly available practices of data science and analysis." />
+  
+
+  <meta name="twitter:card" content="summary" />
+  <meta name="twitter:title" content="Chapter 23 Material for REDCap Admins | Collaborative Data Science Practices" />
+  
+  <meta name="twitter:description" content="Collection of publicly available practices of data science and analysis." />
+  
+
+<meta name="author" content="Will Beasley" />
+
+
+<meta name="date" content="2021-06-08" />
+
+  <meta name="viewport" content="width=device-width, initial-scale=1" />
+  <meta name="apple-mobile-web-app-capable" content="yes" />
+  <meta name="apple-mobile-web-app-status-bar-style" content="black" />
+  
+  
+<link rel="prev" href="redcap-developer.html"/>
+<link rel="next" href="git.html"/>
+<script src="libs/header-attrs-2.8/header-attrs.js"></script>
+<script src="libs/jquery-2.2.3/jquery.min.js"></script>
+<link href="libs/gitbook-2.6.7/css/style.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-table.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-bookdown.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-highlight.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-search.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-fontsettings.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-clipboard.css" rel="stylesheet" />
+
+
+
+
+
+
+
+
+
+<link href="libs/anchor-sections-1.0.1/anchor-sections.css" rel="stylesheet" />
+<script src="libs/anchor-sections-1.0.1/anchor-sections.js"></script>
+
+
+<style type="text/css">
+pre > code.sourceCode { white-space: pre; position: relative; }
+pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
+pre > code.sourceCode > span:empty { height: 1.2em; }
+.sourceCode { overflow: visible; }
+code.sourceCode > span { color: inherit; text-decoration: inherit; }
+pre.sourceCode { margin: 0; }
+@media screen {
+div.sourceCode { overflow: auto; }
+}
+@media print {
+pre > code.sourceCode { white-space: pre-wrap; }
+pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
+}
+pre.numberSource code
+  { counter-reset: source-line 0; }
+pre.numberSource code > span
+  { position: relative; left: -4em; counter-increment: source-line; }
+pre.numberSource code > span > a:first-child::before
+  { content: counter(source-line);
+    position: relative; left: -1em; text-align: right; vertical-align: baseline;
+    border: none; display: inline-block;
+    -webkit-touch-callout: none; -webkit-user-select: none;
+    -khtml-user-select: none; -moz-user-select: none;
+    -ms-user-select: none; user-select: none;
+    padding: 0 4px; width: 4em;
+    color: #aaaaaa;
+  }
+pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa;  padding-left: 4px; }
+div.sourceCode
+  {   }
+@media screen {
+pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
+}
+code span.al { color: #ff0000; font-weight: bold; } /* Alert */
+code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
+code span.at { color: #7d9029; } /* Attribute */
+code span.bn { color: #40a070; } /* BaseN */
+code span.bu { } /* BuiltIn */
+code span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
+code span.ch { color: #4070a0; } /* Char */
+code span.cn { color: #880000; } /* Constant */
+code span.co { color: #60a0b0; font-style: italic; } /* Comment */
+code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
+code span.do { color: #ba2121; font-style: italic; } /* Documentation */
+code span.dt { color: #902000; } /* DataType */
+code span.dv { color: #40a070; } /* DecVal */
+code span.er { color: #ff0000; font-weight: bold; } /* Error */
+code span.ex { } /* Extension */
+code span.fl { color: #40a070; } /* Float */
+code span.fu { color: #06287e; } /* Function */
+code span.im { } /* Import */
+code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
+code span.kw { color: #007020; font-weight: bold; } /* Keyword */
+code span.op { color: #666666; } /* Operator */
+code span.ot { color: #007020; } /* Other */
+code span.pp { color: #bc7a00; } /* Preprocessor */
+code span.sc { color: #4070a0; } /* SpecialChar */
+code span.ss { color: #bb6688; } /* SpecialString */
+code span.st { color: #4070a0; } /* String */
+code span.va { color: #19177c; } /* Variable */
+code span.vs { color: #4070a0; } /* VerbatimString */
+code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
+</style>
+
+<style type="text/css">
+/* Used with Pandoc 2.11+ new --citeproc when CSL is used */
+div.csl-bib-body { }
+div.csl-entry {
+  clear: both;
+}
+.hanging div.csl-entry {
+  margin-left:2em;
+  text-indent:-2em;
+}
+div.csl-left-margin {
+  min-width:2em;
+  float:left;
+}
+div.csl-right-inline {
+  margin-left:2em;
+  padding-left:1em;
+}
+div.csl-indent {
+  margin-left: 2em;
+}
+</style>
+
+<link rel="stylesheet" href="style.css" type="text/css" />
+</head>
+
+<body>
+
+
+
+  <div class="book without-animation with-summary font-size-2 font-family-1" data-basepath=".">
+
+    <div class="book-summary">
+      <nav role="navigation">
+
+<ul class="summary">
+<li><a href="./">Collaborative Data Science</a></li>
+
+<li class="divider"></li>
+<li class="chapter" data-level="1" data-path="index.html"><a href="index.html"><i class="fa fa-check"></i><b>1</b> Introduction</a></li>
+<li class="chapter" data-level="2" data-path="coding.html"><a href="coding.html"><i class="fa fa-check"></i><b>2</b> Coding Principles</a>
+<ul>
+<li class="chapter" data-level="2.1" data-path="coding.html"><a href="coding.html#coding-simplify"><i class="fa fa-check"></i><b>2.1</b> Simplify</a>
+<ul>
+<li class="chapter" data-level="2.1.1" data-path="coding.html"><a href="coding.html#coding-simplify-types"><i class="fa fa-check"></i><b>2.1.1</b> Data Types</a></li>
+<li class="chapter" data-level="2.1.2" data-path="coding.html"><a href="coding.html#coding-simplify-categorical"><i class="fa fa-check"></i><b>2.1.2</b> Categorical Levels</a></li>
+<li class="chapter" data-level="2.1.3" data-path="coding.html"><a href="coding.html#coding-simplify-recoding"><i class="fa fa-check"></i><b>2.1.3</b> Recoding</a></li>
+</ul></li>
+<li class="chapter" data-level="2.2" data-path="coding.html"><a href="coding.html#coding-defensive"><i class="fa fa-check"></i><b>2.2</b> Defensive Style</a>
+<ul>
+<li class="chapter" data-level="2.2.1" data-path="coding.html"><a href="coding.html#coding-defensive-qualify-functions"><i class="fa fa-check"></i><b>2.2.1</b> Qualify functions</a></li>
+<li class="chapter" data-level="2.2.2" data-path="coding.html"><a href="coding.html#coding-defensive-date-arithmetic"><i class="fa fa-check"></i><b>2.2.2</b> Date Arithmetic</a></li>
+<li class="chapter" data-level="2.2.3" data-path="coding.html"><a href="coding.html#excluding-bad-cases"><i class="fa fa-check"></i><b>2.2.3</b> Excluding Bad Cases</a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="3" data-path="architecture.html"><a href="architecture.html"><i class="fa fa-check"></i><b>3</b> Architecture Principles</a>
+<ul>
+<li class="chapter" data-level="3.1" data-path="architecture.html"><a href="architecture.html#encapsulation"><i class="fa fa-check"></i><b>3.1</b> Encapsulation</a></li>
+<li class="chapter" data-level="3.2" data-path="architecture.html"><a href="architecture.html#leverage-team-members-strengths-avoid-weaknesses"><i class="fa fa-check"></i><b>3.2</b> Leverage team member’s strengths &amp; avoid weaknesses</a>
+<ul>
+<li class="chapter" data-level="3.2.1" data-path="architecture.html"><a href="architecture.html#focused-code-files"><i class="fa fa-check"></i><b>3.2.1</b> Focused code files</a></li>
+<li class="chapter" data-level="3.2.2" data-path="architecture.html"><a href="architecture.html#metadata-for-content-experts"><i class="fa fa-check"></i><b>3.2.2</b> Metadata for content experts</a></li>
+</ul></li>
+<li class="chapter" data-level="3.3" data-path="architecture.html"><a href="architecture.html#scales"><i class="fa fa-check"></i><b>3.3</b> Scales</a>
+<ul>
+<li class="chapter" data-level="3.3.1" data-path="architecture.html"><a href="architecture.html#single-source-single-analysis"><i class="fa fa-check"></i><b>3.3.1</b> Single source &amp; single analysis</a></li>
+<li class="chapter" data-level="3.3.2" data-path="architecture.html"><a href="architecture.html#multiple-sources-multiple-analyses"><i class="fa fa-check"></i><b>3.3.2</b> Multiple sources &amp; multiple analyses</a></li>
+</ul></li>
+<li class="chapter" data-level="3.4" data-path="architecture.html"><a href="architecture.html#architecture-consistency"><i class="fa fa-check"></i><b>3.4</b> Consistency</a>
+<ul>
+<li class="chapter" data-level="3.4.1" data-path="architecture.html"><a href="architecture.html#consistency-files"><i class="fa fa-check"></i><b>3.4.1</b> Across Files</a></li>
+<li class="chapter" data-level="3.4.2" data-path="architecture.html"><a href="architecture.html#across-languages"><i class="fa fa-check"></i><b>3.4.2</b> Across Languages</a></li>
+<li class="chapter" data-level="3.4.3" data-path="architecture.html"><a href="architecture.html#across-projects"><i class="fa fa-check"></i><b>3.4.3</b> Across Projects</a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="4" data-path="file-prototype-r.html"><a href="file-prototype-r.html"><i class="fa fa-check"></i><b>4</b> Prototypical R File</a>
+<ul>
+<li class="chapter" data-level="4.1" data-path="file-prototype-r.html"><a href="file-prototype-r.html#chunk-clear"><i class="fa fa-check"></i><b>4.1</b> Clear Memory</a></li>
+<li class="chapter" data-level="4.2" data-path="file-prototype-r.html"><a href="file-prototype-r.html#chunk-load-sources"><i class="fa fa-check"></i><b>4.2</b> Load Sources</a></li>
+<li class="chapter" data-level="4.3" data-path="file-prototype-r.html"><a href="file-prototype-r.html#chunk-load-packages"><i class="fa fa-check"></i><b>4.3</b> Load Packages</a></li>
+<li class="chapter" data-level="4.4" data-path="file-prototype-r.html"><a href="file-prototype-r.html#chunk-declare"><i class="fa fa-check"></i><b>4.4</b> Declare Globals</a></li>
+<li class="chapter" data-level="4.5" data-path="file-prototype-r.html"><a href="file-prototype-r.html#chunk-load-data"><i class="fa fa-check"></i><b>4.5</b> Load Data</a></li>
+<li class="chapter" data-level="4.6" data-path="file-prototype-r.html"><a href="file-prototype-r.html#chunk-tweak-data"><i class="fa fa-check"></i><b>4.6</b> Tweak Data</a></li>
+<li class="chapter" data-level="4.7" data-path="file-prototype-r.html"><a href="file-prototype-r.html#chunk-unique"><i class="fa fa-check"></i><b>4.7</b> (Unique Content)</a></li>
+<li class="chapter" data-level="4.8" data-path="file-prototype-r.html"><a href="file-prototype-r.html#chunk-verify-values"><i class="fa fa-check"></i><b>4.8</b> Verify Values</a></li>
+<li class="chapter" data-level="4.9" data-path="file-prototype-r.html"><a href="file-prototype-r.html#chunk-specify-columns"><i class="fa fa-check"></i><b>4.9</b> Specify Output Columns</a></li>
+<li class="chapter" data-level="4.10" data-path="file-prototype-r.html"><a href="file-prototype-r.html#save-to-disk-or-database"><i class="fa fa-check"></i><b>4.10</b> Save to Disk or Database</a></li>
+<li class="chapter" data-level="4.11" data-path="file-prototype-r.html"><a href="file-prototype-r.html#additional-resources"><i class="fa fa-check"></i><b>4.11</b> Additional Resources</a></li>
+</ul></li>
+<li class="chapter" data-level="5" data-path="file-prototype-sql.html"><a href="file-prototype-sql.html"><i class="fa fa-check"></i><b>5</b> Prototypical SQL File</a>
+<ul>
+<li class="chapter" data-level="5.1" data-path="file-prototype-sql.html"><a href="file-prototype-sql.html#sql-choice"><i class="fa fa-check"></i><b>5.1</b> Choice of Database Engine</a></li>
+<li class="chapter" data-level="5.2" data-path="file-prototype-sql.html"><a href="file-prototype-sql.html#sql-ferry"><i class="fa fa-check"></i><b>5.2</b> Ferry</a></li>
+<li class="chapter" data-level="5.3" data-path="file-prototype-sql.html"><a href="file-prototype-sql.html#sql-default-database"><i class="fa fa-check"></i><b>5.3</b> Default Databases</a></li>
+<li class="chapter" data-level="5.4" data-path="file-prototype-sql.html"><a href="file-prototype-sql.html#sql-declare"><i class="fa fa-check"></i><b>5.4</b> Declare Values Databases</a></li>
+<li class="chapter" data-level="5.5" data-path="file-prototype-sql.html"><a href="file-prototype-sql.html#sql-recreate"><i class="fa fa-check"></i><b>5.5</b> Recreate Table</a></li>
+<li class="chapter" data-level="5.6" data-path="file-prototype-sql.html"><a href="file-prototype-sql.html#sql-truncate"><i class="fa fa-check"></i><b>5.6</b> Truncate Table</a></li>
+<li class="chapter" data-level="5.7" data-path="file-prototype-sql.html"><a href="file-prototype-sql.html#sql-insert"><i class="fa fa-check"></i><b>5.7</b> INSERT INTO</a></li>
+<li class="chapter" data-level="5.8" data-path="file-prototype-sql.html"><a href="file-prototype-sql.html#sql-select"><i class="fa fa-check"></i><b>5.8</b> SELECT</a></li>
+<li class="chapter" data-level="5.9" data-path="file-prototype-sql.html"><a href="file-prototype-sql.html#sql-from"><i class="fa fa-check"></i><b>5.9</b> FROM</a></li>
+<li class="chapter" data-level="5.10" data-path="file-prototype-sql.html"><a href="file-prototype-sql.html#sql-where"><i class="fa fa-check"></i><b>5.10</b> WHERE</a></li>
+<li class="chapter" data-level="5.11" data-path="file-prototype-sql.html"><a href="file-prototype-sql.html#sql-order-by"><i class="fa fa-check"></i><b>5.11</b> ORDER BY</a></li>
+<li class="chapter" data-level="5.12" data-path="file-prototype-sql.html"><a href="file-prototype-sql.html#sql-indexing"><i class="fa fa-check"></i><b>5.12</b> Indexing</a></li>
+</ul></li>
+<li class="chapter" data-level="6" data-path="repo-prototype.html"><a href="repo-prototype.html"><i class="fa fa-check"></i><b>6</b> Prototypical Repository</a>
+<ul>
+<li class="chapter" data-level="6.1" data-path="repo-prototype.html"><a href="repo-prototype.html#repo-root"><i class="fa fa-check"></i><b>6.1</b> Root</a>
+<ul>
+<li class="chapter" data-level="6.1.1" data-path="repo-prototype.html"><a href="repo-prototype.html#repo-config"><i class="fa fa-check"></i><b>6.1.1</b> <code>config.R</code></a></li>
+<li class="chapter" data-level="6.1.2" data-path="repo-prototype.html"><a href="repo-prototype.html#repo-flow"><i class="fa fa-check"></i><b>6.1.2</b> <code>flow.R</code></a></li>
+<li class="chapter" data-level="6.1.3" data-path="repo-prototype.html"><a href="repo-prototype.html#repo-readme"><i class="fa fa-check"></i><b>6.1.3</b> <code>README.md</code></a></li>
+<li class="chapter" data-level="6.1.4" data-path="repo-prototype.html"><a href="repo-prototype.html#repo-rproj"><i class="fa fa-check"></i><b>6.1.4</b> <code>*.Rproj</code></a></li>
+</ul></li>
+<li class="chapter" data-level="6.2" data-path="repo-prototype.html"><a href="repo-prototype.html#repo-analysis"><i class="fa fa-check"></i><b>6.2</b> Analysis</a></li>
+<li class="chapter" data-level="6.3" data-path="repo-prototype.html"><a href="repo-prototype.html#repo-data-public"><i class="fa fa-check"></i><b>6.3</b> Data Public</a></li>
+<li class="chapter" data-level="6.4" data-path="repo-prototype.html"><a href="repo-prototype.html#repo-data-unshared"><i class="fa fa-check"></i><b>6.4</b> Data Unshared</a></li>
+<li class="chapter" data-level="6.5" data-path="repo-prototype.html"><a href="repo-prototype.html#repo-documentation"><i class="fa fa-check"></i><b>6.5</b> Documentation</a></li>
+<li class="chapter" data-level="6.6" data-path="repo-prototype.html"><a href="repo-prototype.html#repo-manipulation"><i class="fa fa-check"></i><b>6.6</b> Manipulation</a></li>
+<li class="chapter" data-level="6.7" data-path="repo-prototype.html"><a href="repo-prototype.html#repo-stitched"><i class="fa fa-check"></i><b>6.7</b> Stitched Output</a></li>
+<li class="chapter" data-level="6.8" data-path="repo-prototype.html"><a href="repo-prototype.html#repo-utility"><i class="fa fa-check"></i><b>6.8</b> Utility</a></li>
+</ul></li>
+<li class="chapter" data-level="7" data-path="rest.html"><a href="rest.html"><i class="fa fa-check"></i><b>7</b> Data at Rest</a>
+<ul>
+<li class="chapter" data-level="7.1" data-path="rest.html"><a href="rest.html#data-states"><i class="fa fa-check"></i><b>7.1</b> Data States</a></li>
+<li class="chapter" data-level="7.2" data-path="rest.html"><a href="rest.html#data-containers"><i class="fa fa-check"></i><b>7.2</b> Data Containers</a>
+<ul>
+<li class="chapter" data-level="7.2.1" data-path="rest.html"><a href="rest.html#data-containers-csv"><i class="fa fa-check"></i><b>7.2.1</b> csv</a></li>
+<li class="chapter" data-level="7.2.2" data-path="rest.html"><a href="rest.html#data-containers-rds"><i class="fa fa-check"></i><b>7.2.2</b> rds</a></li>
+<li class="chapter" data-level="7.2.3" data-path="rest.html"><a href="rest.html#data-containers-yaml"><i class="fa fa-check"></i><b>7.2.3</b> yaml, json, and xml</a></li>
+<li class="chapter" data-level="7.2.4" data-path="rest.html"><a href="rest.html#data-containers-arrow"><i class="fa fa-check"></i><b>7.2.4</b> Arrow</a></li>
+<li class="chapter" data-level="7.2.5" data-path="rest.html"><a href="rest.html#data-containers-sqlite"><i class="fa fa-check"></i><b>7.2.5</b> SQLite</a></li>
+<li class="chapter" data-level="7.2.6" data-path="rest.html"><a href="rest.html#data-containers-database"><i class="fa fa-check"></i><b>7.2.6</b> Central Enterprise database</a></li>
+<li class="chapter" data-level="7.2.7" data-path="rest.html"><a href="rest.html#data-containers-redcap"><i class="fa fa-check"></i><b>7.2.7</b> Central REDCap database</a></li>
+<li class="chapter" data-level="7.2.8" data-path="rest.html"><a href="rest.html#data-containers-avoid"><i class="fa fa-check"></i><b>7.2.8</b> Containers to avoid</a></li>
+</ul></li>
+<li class="chapter" data-level="7.3" data-path="rest.html"><a href="rest.html#data-conventions"><i class="fa fa-check"></i><b>7.3</b> Storage Conventions</a>
+<ul>
+<li class="chapter" data-level="7.3.1" data-path="rest.html"><a href="rest.html#data-conventions-all"><i class="fa fa-check"></i><b>7.3.1</b> All Sources</a></li>
+<li class="chapter" data-level="7.3.2" data-path="rest.html"><a href="rest.html#data-conventions-text"><i class="fa fa-check"></i><b>7.3.2</b> Text</a></li>
+<li class="chapter" data-level="7.3.3" data-path="rest.html"><a href="rest.html#data-conventions-excel"><i class="fa fa-check"></i><b>7.3.3</b> Excel</a></li>
+<li class="chapter" data-level="7.3.4" data-path="rest.html"><a href="rest.html#data-conventions-meditech"><i class="fa fa-check"></i><b>7.3.4</b> Meditech</a></li>
+<li class="chapter" data-level="7.3.5" data-path="rest.html"><a href="rest.html#data-conventions-database"><i class="fa fa-check"></i><b>7.3.5</b> Databases</a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="8" data-path="patterns.html"><a href="patterns.html"><i class="fa fa-check"></i><b>8</b> Patterns</a>
+<ul>
+<li class="chapter" data-level="8.1" data-path="patterns.html"><a href="patterns.html#pattern-ellis"><i class="fa fa-check"></i><b>8.1</b> Ellis</a>
+<ul>
+<li class="chapter" data-level="8.1.1" data-path="patterns.html"><a href="patterns.html#purpose"><i class="fa fa-check"></i><b>8.1.1</b> Purpose</a></li>
+<li class="chapter" data-level="8.1.2" data-path="patterns.html"><a href="patterns.html#philosophy"><i class="fa fa-check"></i><b>8.1.2</b> Philosophy</a></li>
+<li class="chapter" data-level="8.1.3" data-path="patterns.html"><a href="patterns.html#guidelines"><i class="fa fa-check"></i><b>8.1.3</b> Guidelines</a></li>
+<li class="chapter" data-level="8.1.4" data-path="patterns.html"><a href="patterns.html#examples"><i class="fa fa-check"></i><b>8.1.4</b> Examples</a></li>
+<li class="chapter" data-level="8.1.5" data-path="patterns.html"><a href="patterns.html#elements"><i class="fa fa-check"></i><b>8.1.5</b> Elements</a></li>
+</ul></li>
+<li class="chapter" data-level="8.2" data-path="patterns.html"><a href="patterns.html#pattern-arch"><i class="fa fa-check"></i><b>8.2</b> Arch</a></li>
+<li class="chapter" data-level="8.3" data-path="patterns.html"><a href="patterns.html#pattern-ferry"><i class="fa fa-check"></i><b>8.3</b> Ferry</a></li>
+<li class="chapter" data-level="8.4" data-path="patterns.html"><a href="patterns.html#pattern-scribe"><i class="fa fa-check"></i><b>8.4</b> Scribe</a></li>
+<li class="chapter" data-level="8.5" data-path="patterns.html"><a href="patterns.html#pattern-analysis"><i class="fa fa-check"></i><b>8.5</b> Analysis</a></li>
+<li class="chapter" data-level="8.6" data-path="patterns.html"><a href="patterns.html#pattern-presentation-static"><i class="fa fa-check"></i><b>8.6</b> Presentation -Static</a></li>
+<li class="chapter" data-level="8.7" data-path="patterns.html"><a href="patterns.html#pattern-presentation-interactive"><i class="fa fa-check"></i><b>8.7</b> Presentation -Interactive</a></li>
+<li class="chapter" data-level="8.8" data-path="patterns.html"><a href="patterns.html#pattern-metadata"><i class="fa fa-check"></i><b>8.8</b> Metadata</a>
+<ul>
+<li class="chapter" data-level="8.8.1" data-path="patterns.html"><a href="patterns.html#primary-rules-for-mapping"><i class="fa fa-check"></i><b>8.8.1</b> Primary Rules for Mapping</a></li>
+<li class="chapter" data-level="8.8.2" data-path="patterns.html"><a href="patterns.html#secondary-rules-for-mapping"><i class="fa fa-check"></i><b>8.8.2</b> Secondary Rules for Mapping</a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="9" data-path="security.html"><a href="security.html"><i class="fa fa-check"></i><b>9</b> Security &amp; Private Data</a>
+<ul>
+<li class="chapter" data-level="9.1" data-path="security.html"><a href="security.html#security-guidelines"><i class="fa fa-check"></i><b>9.1</b> Security Guidelines</a></li>
+<li class="chapter" data-level="9.2" data-path="security.html"><a href="security.html#dataset-level-redaction"><i class="fa fa-check"></i><b>9.2</b> Dataset-level Redaction</a></li>
+<li class="chapter" data-level="9.3" data-path="security.html"><a href="security.html#security-for-data-at-rest"><i class="fa fa-check"></i><b>9.3</b> Security for Data at Rest</a></li>
+<li class="chapter" data-level="9.4" data-path="security.html"><a href="security.html#file-level-permissions"><i class="fa fa-check"></i><b>9.4</b> File-level permissions</a></li>
+<li class="chapter" data-level="9.5" data-path="security.html"><a href="security.html#database-permissions"><i class="fa fa-check"></i><b>9.5</b> Database permissions</a></li>
+<li class="chapter" data-level="9.6" data-path="security.html"><a href="security.html#public-private-repositories"><i class="fa fa-check"></i><b>9.6</b> Public &amp; Private Repositories</a>
+<ul>
+<li class="chapter" data-level="9.6.1" data-path="security.html"><a href="security.html#repo-rules"><i class="fa fa-check"></i><b>9.6.1</b> Repo Rules</a></li>
+<li class="chapter" data-level="9.6.2" data-path="security.html"><a href="security.html#scrubbing-github-history"><i class="fa fa-check"></i><b>9.6.2</b> Scrubbing GitHub history</a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="10" data-path="automation.html"><a href="automation.html"><i class="fa fa-check"></i><b>10</b> Automation &amp; Reproducibility</a>
+<ul>
+<li class="chapter" data-level="10.1" data-path="automation.html"><a href="automation.html#automation-mediator"><i class="fa fa-check"></i><b>10.1</b> Mediator</a>
+<ul>
+<li class="chapter" data-level="10.1.1" data-path="automation.html"><a href="automation.html#automation-flow"><i class="fa fa-check"></i><b>10.1.1</b> Flow File in R</a></li>
+<li class="chapter" data-level="10.1.2" data-path="automation.html"><a href="automation.html#automation-makefile"><i class="fa fa-check"></i><b>10.1.2</b> Makefile</a></li>
+<li class="chapter" data-level="10.1.3" data-path="automation.html"><a href="automation.html#automation-ssis"><i class="fa fa-check"></i><b>10.1.3</b> SSIS</a></li>
+</ul></li>
+<li class="chapter" data-level="10.2" data-path="automation.html"><a href="automation.html#automation-scheduling"><i class="fa fa-check"></i><b>10.2</b> Scheduling</a>
+<ul>
+<li class="chapter" data-level="10.2.1" data-path="automation.html"><a href="automation.html#automation-cron"><i class="fa fa-check"></i><b>10.2.1</b> cron</a></li>
+<li class="chapter" data-level="10.2.2" data-path="automation.html"><a href="automation.html#automation-task-scheduler"><i class="fa fa-check"></i><b>10.2.2</b> Task Scheduler</a></li>
+<li class="chapter" data-level="10.2.3" data-path="automation.html"><a href="automation.html#automation-sql-server-agent"><i class="fa fa-check"></i><b>10.2.3</b> SQL Server Agent</a></li>
+</ul></li>
+<li class="chapter" data-level="10.3" data-path="automation.html"><a href="automation.html#auxiliary-issues"><i class="fa fa-check"></i><b>10.3</b> Auxiliary Issues</a>
+<ul>
+<li class="chapter" data-level="10.3.1" data-path="automation.html"><a href="automation.html#sink-log-files"><i class="fa fa-check"></i><b>10.3.1</b> Sink Log Files</a></li>
+<li class="chapter" data-level="10.3.2" data-path="automation.html"><a href="automation.html#package-versions"><i class="fa fa-check"></i><b>10.3.2</b> Package Versions</a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="11" data-path="scaling-up.html"><a href="scaling-up.html"><i class="fa fa-check"></i><b>11</b> Scaling Up</a>
+<ul>
+<li class="chapter" data-level="11.1" data-path="scaling-up.html"><a href="scaling-up.html#data-storage"><i class="fa fa-check"></i><b>11.1</b> Data Storage</a></li>
+<li class="chapter" data-level="11.2" data-path="scaling-up.html"><a href="scaling-up.html#data-processing"><i class="fa fa-check"></i><b>11.2</b> Data Processing</a></li>
+</ul></li>
+<li class="chapter" data-level="12" data-path="collaboration.html"><a href="collaboration.html"><i class="fa fa-check"></i><b>12</b> Parallel Collaboration</a>
+<ul>
+<li class="chapter" data-level="12.1" data-path="collaboration.html"><a href="collaboration.html#social-contract"><i class="fa fa-check"></i><b>12.1</b> Social Contract</a></li>
+<li class="chapter" data-level="12.2" data-path="collaboration.html"><a href="collaboration.html#code-reviews"><i class="fa fa-check"></i><b>12.2</b> Code Reviews</a></li>
+<li class="chapter" data-level="12.3" data-path="collaboration.html"><a href="collaboration.html#remote"><i class="fa fa-check"></i><b>12.3</b> Remote</a></li>
+<li class="chapter" data-level="12.4" data-path="collaboration.html"><a href="collaboration.html#additional-resources-1"><i class="fa fa-check"></i><b>12.4</b> Additional Resources</a></li>
+<li class="chapter" data-level="12.5" data-path="collaboration.html"><a href="collaboration.html#loose-notes"><i class="fa fa-check"></i><b>12.5</b> Loose Notes</a>
+<ul>
+<li class="chapter" data-level="12.5.1" data-path="collaboration.html"><a href="collaboration.html#github"><i class="fa fa-check"></i><b>12.5.1</b> GitHub</a></li>
+<li class="chapter" data-level="12.5.2" data-path="collaboration.html"><a href="collaboration.html#common-code"><i class="fa fa-check"></i><b>12.5.2</b> Common Code</a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="13" data-path="document.html"><a href="document.html"><i class="fa fa-check"></i><b>13</b> Documentation</a>
+<ul>
+<li class="chapter" data-level="13.1" data-path="document.html"><a href="document.html#team-wide"><i class="fa fa-check"></i><b>13.1</b> Team-wide</a></li>
+<li class="chapter" data-level="13.2" data-path="document.html"><a href="document.html#project-specific"><i class="fa fa-check"></i><b>13.2</b> Project-specific</a></li>
+<li class="chapter" data-level="13.3" data-path="document.html"><a href="document.html#dataset-origin-structure"><i class="fa fa-check"></i><b>13.3</b> Dataset Origin &amp; Structure</a></li>
+<li class="chapter" data-level="13.4" data-path="document.html"><a href="document.html#issues-tasks"><i class="fa fa-check"></i><b>13.4</b> Issues &amp; Tasks</a>
+<ul>
+<li class="chapter" data-level="13.4.1" data-path="document.html"><a href="document.html#documentation-issue-template"><i class="fa fa-check"></i><b>13.4.1</b> GitHub Issue Template</a></li>
+</ul></li>
+<li class="chapter" data-level="13.5" data-path="document.html"><a href="document.html#flow-diagrams"><i class="fa fa-check"></i><b>13.5</b> Flow Diagrams</a></li>
+<li class="chapter" data-level="13.6" data-path="document.html"><a href="document.html#setting-up-new-machine"><i class="fa fa-check"></i><b>13.6</b> Setting up new machine</a></li>
+</ul></li>
+<li class="chapter" data-level="14" data-path="style.html"><a href="style.html"><i class="fa fa-check"></i><b>14</b> Style Guide</a>
+<ul>
+<li class="chapter" data-level="14.1" data-path="style.html"><a href="style.html#readability"><i class="fa fa-check"></i><b>14.1</b> Readability</a>
+<ul>
+<li class="chapter" data-level="14.1.1" data-path="style.html"><a href="style.html#style-number"><i class="fa fa-check"></i><b>14.1.1</b> Number</a></li>
+<li class="chapter" data-level="14.1.2" data-path="style.html"><a href="style.html#style-abbreviation"><i class="fa fa-check"></i><b>14.1.2</b> Abbreviations</a></li>
+</ul></li>
+<li class="chapter" data-level="14.2" data-path="style.html"><a href="style.html#datasets"><i class="fa fa-check"></i><b>14.2</b> Datasets</a>
+<ul>
+<li class="chapter" data-level="14.2.1" data-path="style.html"><a href="style.html#style-filter"><i class="fa fa-check"></i><b>14.2.1</b> Filtering Rows</a></li>
+<li class="chapter" data-level="14.2.2" data-path="style.html"><a href="style.html#style-attach"><i class="fa fa-check"></i><b>14.2.2</b> Don’t attach</a></li>
+</ul></li>
+<li class="chapter" data-level="14.3" data-path="style.html"><a href="style.html#style-factor"><i class="fa fa-check"></i><b>14.3</b> Categorical Variables</a>
+<ul>
+<li class="chapter" data-level="14.3.1" data-path="style.html"><a href="style.html#style-factor-unknown"><i class="fa fa-check"></i><b>14.3.1</b> Explicit Missing Values</a></li>
+<li class="chapter" data-level="14.3.2" data-path="style.html"><a href="style.html#style-factor-granularity"><i class="fa fa-check"></i><b>14.3.2</b> Granularity</a></li>
+</ul></li>
+<li class="chapter" data-level="14.4" data-path="style.html"><a href="style.html#style-dates"><i class="fa fa-check"></i><b>14.4</b> Dates</a></li>
+<li class="chapter" data-level="14.5" data-path="style.html"><a href="style.html#naming"><i class="fa fa-check"></i><b>14.5</b> Naming</a>
+<ul>
+<li class="chapter" data-level="14.5.1" data-path="style.html"><a href="style.html#style-naming-variables"><i class="fa fa-check"></i><b>14.5.1</b> Variables</a></li>
+<li class="chapter" data-level="14.5.2" data-path="style.html"><a href="style.html#style-naming-files"><i class="fa fa-check"></i><b>14.5.2</b> Files and Folders</a></li>
+<li class="chapter" data-level="14.5.3" data-path="style.html"><a href="style.html#style-naming-datasets"><i class="fa fa-check"></i><b>14.5.3</b> Datasets</a></li>
+<li class="chapter" data-level="14.5.4" data-path="style.html"><a href="style.html#style-naming-semantic"><i class="fa fa-check"></i><b>14.5.4</b> Semantic sorting</a></li>
+</ul></li>
+<li class="chapter" data-level="14.6" data-path="style.html"><a href="style.html#style-whitespace"><i class="fa fa-check"></i><b>14.6</b> Whitespace</a></li>
+<li class="chapter" data-level="14.7" data-path="style.html"><a href="style.html#style-database"><i class="fa fa-check"></i><b>14.7</b> Database</a></li>
+<li class="chapter" data-level="14.8" data-path="style.html"><a href="style.html#style-ggplot"><i class="fa fa-check"></i><b>14.8</b> ggplot2</a>
+<ul>
+<li class="chapter" data-level="14.8.1" data-path="style.html"><a href="style.html#style-ggplot-order"><i class="fa fa-check"></i><b>14.8.1</b> Order of commands</a></li>
+<li class="chapter" data-level="14.8.2" data-path="style.html"><a href="style.html#style-ggplot-gotchas"><i class="fa fa-check"></i><b>14.8.2</b> Gotchas</a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="15" data-path="publication.html"><a href="publication.html"><i class="fa fa-check"></i><b>15</b> Publishing Results</a>
+<ul>
+<li class="chapter" data-level="15.1" data-path="publication.html"><a href="publication.html#publication-analysts"><i class="fa fa-check"></i><b>15.1</b> To Other Analysts</a></li>
+<li class="chapter" data-level="15.2" data-path="publication.html"><a href="publication.html#publication-experts"><i class="fa fa-check"></i><b>15.2</b> To Researchers &amp; Content Experts</a></li>
+<li class="chapter" data-level="15.3" data-path="publication.html"><a href="publication.html#publication-phobic"><i class="fa fa-check"></i><b>15.3</b> To Technical-Phobic Audiences</a></li>
+</ul></li>
+<li class="chapter" data-level="16" data-path="testing-and-validation.html"><a href="testing-and-validation.html"><i class="fa fa-check"></i><b>16</b> Testing, Validation, &amp; Defensive Programming</a>
+<ul>
+<li class="chapter" data-level="16.1" data-path="testing-and-validation.html"><a href="testing-and-validation.html#testing-functions"><i class="fa fa-check"></i><b>16.1</b> Testing Functions</a></li>
+<li class="chapter" data-level="16.2" data-path="testing-and-validation.html"><a href="testing-and-validation.html#defensive-programming"><i class="fa fa-check"></i><b>16.2</b> Defensive Programming</a></li>
+<li class="chapter" data-level="16.3" data-path="testing-and-validation.html"><a href="testing-and-validation.html#validator"><i class="fa fa-check"></i><b>16.3</b> Validator</a></li>
+</ul></li>
+<li class="chapter" data-level="17" data-path="troubleshooting.html"><a href="troubleshooting.html"><i class="fa fa-check"></i><b>17</b> Troubleshooting and Debugging</a>
+<ul>
+<li class="chapter" data-level="17.1" data-path="troubleshooting.html"><a href="troubleshooting.html#finding-help"><i class="fa fa-check"></i><b>17.1</b> Finding Help</a></li>
+<li class="chapter" data-level="17.2" data-path="troubleshooting.html"><a href="troubleshooting.html#debugging"><i class="fa fa-check"></i><b>17.2</b> Debugging</a></li>
+</ul></li>
+<li class="chapter" data-level="18" data-path="workstation.html"><a href="workstation.html"><i class="fa fa-check"></i><b>18</b> Workstation</a>
+<ul>
+<li class="chapter" data-level="18.1" data-path="workstation.html"><a href="workstation.html#workstation-required"><i class="fa fa-check"></i><b>18.1</b> Required Installation</a>
+<ul>
+<li class="chapter" data-level="18.1.1" data-path="workstation.html"><a href="workstation.html#workstation-r"><i class="fa fa-check"></i><b>18.1.1</b> R</a></li>
+<li class="chapter" data-level="18.1.2" data-path="workstation.html"><a href="workstation.html#workstation-rstudio"><i class="fa fa-check"></i><b>18.1.2</b> RStudio</a></li>
+<li class="chapter" data-level="18.1.3" data-path="workstation.html"><a href="workstation.html#workstation-r-package-installation"><i class="fa fa-check"></i><b>18.1.3</b> Installing R Packages</a></li>
+<li class="chapter" data-level="18.1.4" data-path="workstation.html"><a href="workstation.html#workstation-r-package-update"><i class="fa fa-check"></i><b>18.1.4</b> Updating R Packages</a></li>
+<li class="chapter" data-level="18.1.5" data-path="workstation.html"><a href="workstation.html#workstation-github"><i class="fa fa-check"></i><b>18.1.5</b> GitHub</a></li>
+<li class="chapter" data-level="18.1.6" data-path="workstation.html"><a href="workstation.html#workstation-github-client"><i class="fa fa-check"></i><b>18.1.6</b> GitHub Desktop</a></li>
+<li class="chapter" data-level="18.1.7" data-path="workstation.html"><a href="workstation.html#workstation-rtools"><i class="fa fa-check"></i><b>18.1.7</b> R Tools</a></li>
+</ul></li>
+<li class="chapter" data-level="18.2" data-path="workstation.html"><a href="workstation.html#workstation-recommended"><i class="fa fa-check"></i><b>18.2</b> Recommended Installation</a>
+<ul>
+<li class="chapter" data-level="18.2.1" data-path="workstation.html"><a href="workstation.html#workstation-odbc"><i class="fa fa-check"></i><b>18.2.1</b> ODBC Driver</a></li>
+<li class="chapter" data-level="18.2.2" data-path="workstation.html"><a href="workstation.html#workstation-notepadpp"><i class="fa fa-check"></i><b>18.2.2</b> Notepad++</a></li>
+<li class="chapter" data-level="18.2.3" data-path="workstation.html"><a href="workstation.html#workstation-ads"><i class="fa fa-check"></i><b>18.2.3</b> Azure Data Studio</a></li>
+<li class="chapter" data-level="18.2.4" data-path="workstation.html"><a href="workstation.html#workstation-vscode"><i class="fa fa-check"></i><b>18.2.4</b> Visual Studio Code</a></li>
+</ul></li>
+<li class="chapter" data-level="18.3" data-path="workstation.html"><a href="workstation.html#workstation-optional"><i class="fa fa-check"></i><b>18.3</b> Optional Installation</a>
+<ul>
+<li class="chapter" data-level="18.3.1" data-path="workstation.html"><a href="workstation.html#workstation-git"><i class="fa fa-check"></i><b>18.3.1</b> Git</a></li>
+<li class="chapter" data-level="18.3.2" data-path="workstation.html"><a href="workstation.html#workstation-calc"><i class="fa fa-check"></i><b>18.3.2</b> LibreOffice Calc</a></li>
+<li class="chapter" data-level="18.3.3" data-path="workstation.html"><a href="workstation.html#workstation-pandoc"><i class="fa fa-check"></i><b>18.3.3</b> pandoc</a></li>
+<li class="chapter" data-level="18.3.4" data-path="workstation.html"><a href="workstation.html#workstation-python"><i class="fa fa-check"></i><b>18.3.4</b> Python</a></li>
+</ul></li>
+<li class="chapter" data-level="18.4" data-path="workstation.html"><a href="workstation.html#workstation-assets"><i class="fa fa-check"></i><b>18.4</b> Asset Locations</a></li>
+<li class="chapter" data-level="18.5" data-path="workstation.html"><a href="workstation.html#workstation-administrator"><i class="fa fa-check"></i><b>18.5</b> Administrator Installation</a>
+<ul>
+<li class="chapter" data-level="18.5.1" data-path="workstation.html"><a href="workstation.html#workstation-mysql"><i class="fa fa-check"></i><b>18.5.1</b> MySQL Workbench</a></li>
+<li class="chapter" data-level="18.5.2" data-path="workstation.html"><a href="workstation.html#workstation-postman"><i class="fa fa-check"></i><b>18.5.2</b> Postman</a></li>
+<li class="chapter" data-level="18.5.3" data-path="workstation.html"><a href="workstation.html#workstation-ssms"><i class="fa fa-check"></i><b>18.5.3</b> SQL Server Management Studio (SSMS)</a></li>
+<li class="chapter" data-level="18.5.4" data-path="workstation.html"><a href="workstation.html#workstation-winscp"><i class="fa fa-check"></i><b>18.5.4</b> WinSCP</a></li>
+</ul></li>
+<li class="chapter" data-level="18.6" data-path="workstation.html"><a href="workstation.html#workstation-troubleshooting"><i class="fa fa-check"></i><b>18.6</b> Installation Troubleshooting</a></li>
+<li class="chapter" data-level="18.7" data-path="workstation.html"><a href="workstation.html#workstation-ubuntu"><i class="fa fa-check"></i><b>18.7</b> Ubuntu Installation</a></li>
+<li class="chapter" data-level="18.8" data-path="workstation.html"><a href="workstation.html#workstation-retired"><i class="fa fa-check"></i><b>18.8</b> Retired Tools</a></li>
+</ul></li>
+<li class="chapter" data-level="19" data-path="tools.html"><a href="tools.html"><i class="fa fa-check"></i><b>19</b> Considerations when Selecting Tools</a>
+<ul>
+<li class="chapter" data-level="19.1" data-path="tools.html"><a href="tools.html#general"><i class="fa fa-check"></i><b>19.1</b> General</a>
+<ul>
+<li class="chapter" data-level="19.1.1" data-path="tools.html"><a href="tools.html#the-components-goal"><i class="fa fa-check"></i><b>19.1.1</b> The Component’s Goal</a></li>
+<li class="chapter" data-level="19.1.2" data-path="tools.html"><a href="tools.html#current-skillset-of-team"><i class="fa fa-check"></i><b>19.1.2</b> Current Skillset of Team</a></li>
+<li class="chapter" data-level="19.1.3" data-path="tools.html"><a href="tools.html#desired-future-skillset-of-team"><i class="fa fa-check"></i><b>19.1.3</b> Desired Future Skillset of Team</a></li>
+<li class="chapter" data-level="19.1.4" data-path="tools.html"><a href="tools.html#skillset-of-audience"><i class="fa fa-check"></i><b>19.1.4</b> Skillset of Audience</a></li>
+</ul></li>
+<li class="chapter" data-level="19.2" data-path="tools.html"><a href="tools.html#languages"><i class="fa fa-check"></i><b>19.2</b> Languages</a></li>
+<li class="chapter" data-level="19.3" data-path="tools.html"><a href="tools.html#r-packages"><i class="fa fa-check"></i><b>19.3</b> R Packages</a></li>
+<li class="chapter" data-level="19.4" data-path="tools.html"><a href="tools.html#database"><i class="fa fa-check"></i><b>19.4</b> Database</a></li>
+<li class="chapter" data-level="19.5" data-path="tools.html"><a href="tools.html#additional-resources-2"><i class="fa fa-check"></i><b>19.5</b> Additional Resources</a></li>
+</ul></li>
+<li class="chapter" data-level="20" data-path="team.html"><a href="team.html"><i class="fa fa-check"></i><b>20</b> Growing a Team</a>
+<ul>
+<li class="chapter" data-level="20.1" data-path="team.html"><a href="team.html#recruiting"><i class="fa fa-check"></i><b>20.1</b> Recruiting</a></li>
+<li class="chapter" data-level="20.2" data-path="team.html"><a href="team.html#training-to-data-science"><i class="fa fa-check"></i><b>20.2</b> Training to Data Science</a></li>
+<li class="chapter" data-level="20.3" data-path="team.html"><a href="team.html#bridges-outside-the-team"><i class="fa fa-check"></i><b>20.3</b> Bridges Outside the Team</a></li>
+</ul></li>
+<li class="chapter" data-level="21" data-path="redcap-user.html"><a href="redcap-user.html"><i class="fa fa-check"></i><b>21</b> Material for REDCap Users</a>
+<ul>
+<li class="chapter" data-level="21.1" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-login"><i class="fa fa-check"></i><b>21.1</b> Login</a></li>
+<li class="chapter" data-level="21.2" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-report-develop"><i class="fa fa-check"></i><b>21.2</b> Developing Reports</a></li>
+</ul></li>
+<li class="chapter" data-level="22" data-path="redcap-developer.html"><a href="redcap-developer.html"><i class="fa fa-check"></i><b>22</b> Material for REDCap Developers</a></li>
+<li class="chapter" data-level="23" data-path="redcap-admin.html"><a href="redcap-admin.html"><i class="fa fa-check"></i><b>23</b> Material for REDCap Admins</a></li>
+<li class="appendix"><span><b>Appendix</b></span></li>
+<li class="chapter" data-level="A" data-path="git.html"><a href="git.html"><i class="fa fa-check"></i><b>A</b> Git &amp; GitHub</a>
+<ul>
+<li class="chapter" data-level="A.1" data-path="git.html"><a href="git.html#git-code"><i class="fa fa-check"></i><b>A.1</b> for Code Development</a></li>
+<li class="chapter" data-level="A.2" data-path="git.html"><a href="git.html#git-collaboration"><i class="fa fa-check"></i><b>A.2</b> for Collaboration</a></li>
+<li class="chapter" data-level="A.3" data-path="git.html"><a href="git.html#git-stability"><i class="fa fa-check"></i><b>A.3</b> for Stability</a></li>
+<li class="chapter" data-level="A.4" data-path="git.html"><a href="git.html#git-collaborators"><i class="fa fa-check"></i><b>A.4</b> for New Collaborators</a></li>
+<li class="chapter" data-level="A.5" data-path="git.html"><a href="git.html#git-contribution"><i class="fa fa-check"></i><b>A.5</b> Steps for Contributing to Repo</a>
+<ul>
+<li class="chapter" data-level="A.5.1" data-path="git.html"><a href="git.html#git-contribution-regular"><i class="fa fa-check"></i><b>A.5.1</b> Regular Contributions</a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="B" data-path="snippets.html"><a href="snippets.html"><i class="fa fa-check"></i><b>B</b> Snippets</a>
+<ul>
+<li class="chapter" data-level="B.1" data-path="snippets.html"><a href="snippets.html#snippets-reading"><i class="fa fa-check"></i><b>B.1</b> Reading External Data</a>
+<ul>
+<li class="chapter" data-level="B.1.1" data-path="snippets.html"><a href="snippets.html#snippets-reading-excel"><i class="fa fa-check"></i><b>B.1.1</b> Reading from Excel</a></li>
+<li class="chapter" data-level="B.1.2" data-path="snippets.html"><a href="snippets.html#snippets-reading-trailing-comma"><i class="fa fa-check"></i><b>B.1.2</b> Removing Trailing Comma from Header</a></li>
+<li class="chapter" data-level="B.1.3" data-path="snippets.html"><a href="snippets.html#snippets-reading-vroom"><i class="fa fa-check"></i><b>B.1.3</b> Removing Trailing Comma from Header</a></li>
+</ul></li>
+<li class="chapter" data-level="B.2" data-path="snippets.html"><a href="snippets.html#snippets-grooming"><i class="fa fa-check"></i><b>B.2</b> Grooming</a>
+<ul>
+<li class="chapter" data-level="B.2.1" data-path="snippets.html"><a href="snippets.html#snippets-grooming-two-year"><i class="fa fa-check"></i><b>B.2.1</b> Correct for misinterpreted two-digit year</a></li>
+</ul></li>
+<li class="chapter" data-level="B.3" data-path="snippets.html"><a href="snippets.html#snippets-identification"><i class="fa fa-check"></i><b>B.3</b> Identification</a>
+<ul>
+<li class="chapter" data-level="B.3.1" data-path="snippets.html"><a href="snippets.html#snippets-identification-tags"><i class="fa fa-check"></i><b>B.3.1</b> Generating “tags”</a></li>
+</ul></li>
+<li class="chapter" data-level="B.4" data-path="snippets.html"><a href="snippets.html#snippets-correspondence"><i class="fa fa-check"></i><b>B.4</b> Correspondence with Collaborators</a>
+<ul>
+<li class="chapter" data-level="B.4.1" data-path="snippets.html"><a href="snippets.html#snippets-correspondence-excel"><i class="fa fa-check"></i><b>B.4.1</b> Excel files</a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="C" data-path="presentations.html"><a href="presentations.html"><i class="fa fa-check"></i><b>C</b> Presentations</a>
+<ul>
+<li class="chapter" data-level="C.1" data-path="presentations.html"><a href="presentations.html#cdw"><i class="fa fa-check"></i><b>C.1</b> CDW</a></li>
+<li class="chapter" data-level="C.2" data-path="presentations.html"><a href="presentations.html#redcap"><i class="fa fa-check"></i><b>C.2</b> REDCap</a></li>
+<li class="chapter" data-level="C.3" data-path="presentations.html"><a href="presentations.html#reproducible-research-visualization"><i class="fa fa-check"></i><b>C.3</b> Reproducible Research &amp; Visualization</a></li>
+<li class="chapter" data-level="C.4" data-path="presentations.html"><a href="presentations.html#data-management"><i class="fa fa-check"></i><b>C.4</b> Data Management</a></li>
+<li class="chapter" data-level="C.5" data-path="presentations.html"><a href="presentations.html#github-1"><i class="fa fa-check"></i><b>C.5</b> GitHub</a></li>
+<li class="chapter" data-level="C.6" data-path="presentations.html"><a href="presentations.html#software"><i class="fa fa-check"></i><b>C.6</b> Software</a></li>
+<li class="chapter" data-level="C.7" data-path="presentations.html"><a href="presentations.html#architectures"><i class="fa fa-check"></i><b>C.7</b> Architectures</a></li>
+<li class="chapter" data-level="C.8" data-path="presentations.html"><a href="presentations.html#components"><i class="fa fa-check"></i><b>C.8</b> Components</a></li>
+</ul></li>
+<li class="chapter" data-level="D" data-path="scratch-pad.html"><a href="scratch-pad.html"><i class="fa fa-check"></i><b>D</b> Scratch Pad of Loose Ideas</a>
+<ul>
+<li class="chapter" data-level="D.1" data-path="scratch-pad.html"><a href="scratch-pad.html#chapters-sections-to-form"><i class="fa fa-check"></i><b>D.1</b> Chapters &amp; Sections to Form</a></li>
+<li class="chapter" data-level="D.2" data-path="scratch-pad.html"><a href="scratch-pad.html#practices"><i class="fa fa-check"></i><b>D.2</b> Practices</a></li>
+<li class="chapter" data-level="D.3" data-path="scratch-pad.html"><a href="scratch-pad.html#good-sites"><i class="fa fa-check"></i><b>D.3</b> Good Sites</a></li>
+</ul></li>
+<li class="chapter" data-level="E" data-path="example-dashboard.html"><a href="example-dashboard.html"><i class="fa fa-check"></i><b>E</b> Example Dashboard</a>
+<ul>
+<li class="chapter" data-level="E.1" data-path="example-dashboard.html"><a href="example-dashboard.html#example-dashboard-example"><i class="fa fa-check"></i><b>E.1</b> Example</a></li>
+<li class="chapter" data-level="E.2" data-path="example-dashboard.html"><a href="example-dashboard.html#example-dashboard-guide"><i class="fa fa-check"></i><b>E.2</b> Style Guide</a>
+<ul>
+<li class="chapter" data-level="E.2.1" data-path="example-dashboard.html"><a href="example-dashboard.html#headline-page"><i class="fa fa-check"></i><b>E.2.1</b> Headline page</a></li>
+<li class="chapter" data-level="E.2.2" data-path="example-dashboard.html"><a href="example-dashboard.html#tables-page"><i class="fa fa-check"></i><b>E.2.2</b> Tables page</a></li>
+<li class="chapter" data-level="E.2.3" data-path="example-dashboard.html"><a href="example-dashboard.html#graphs-page"><i class="fa fa-check"></i><b>E.2.3</b> Graphs page</a></li>
+<li class="chapter" data-level="E.2.4" data-path="example-dashboard.html"><a href="example-dashboard.html#marginal-graphs-page"><i class="fa fa-check"></i><b>E.2.4</b> Marginal Graphs page</a></li>
+<li class="chapter" data-level="E.2.5" data-path="example-dashboard.html"><a href="example-dashboard.html#documentation-page"><i class="fa fa-check"></i><b>E.2.5</b> Documentation page</a></li>
+<li class="chapter" data-level="E.2.6" data-path="example-dashboard.html"><a href="example-dashboard.html#miscellaneous-notes"><i class="fa fa-check"></i><b>E.2.6</b> Miscellaneous Notes</a></li>
+</ul></li>
+<li class="chapter" data-level="E.3" data-path="example-dashboard.html"><a href="example-dashboard.html#example-dashboard-architecture"><i class="fa fa-check"></i><b>E.3</b> Architecture</a>
+<ul>
+<li class="chapter" data-level="E.3.1" data-path="example-dashboard.html"><a href="example-dashboard.html#data-from-external-system"><i class="fa fa-check"></i><b>E.3.1</b> Data from External System</a></li>
+<li class="chapter" data-level="E.3.2" data-path="example-dashboard.html"><a href="example-dashboard.html#groomed-data-in-warehouse"><i class="fa fa-check"></i><b>E.3.2</b> Groomed Data in Warehouse</a></li>
+<li class="chapter" data-level="E.3.3" data-path="example-dashboard.html"><a href="example-dashboard.html#analysis-ready-dataset"><i class="fa fa-check"></i><b>E.3.3</b> Analysis-Ready Dataset</a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="F" data-path="example-chapter.html"><a href="example-chapter.html"><i class="fa fa-check"></i><b>F</b> Example Chapter</a></li>
+<li class="chapter" data-level="G" data-path="acknowledgements.html"><a href="acknowledgements.html"><i class="fa fa-check"></i><b>G</b> Acknowledgements</a></li>
+<li class="chapter" data-level="H" data-path="references.html"><a href="references.html"><i class="fa fa-check"></i><b>H</b> References</a></li>
+<li class="divider"></li>
+<li><a href="https://github.com/rstudio/bookdown" target="blank">Published with bookdown</a></li>
+
+</ul>
+
+      </nav>
+    </div>
+
+    <div class="book-body">
+      <div class="body-inner">
+        <div class="book-header" role="navigation">
+          <h1>
+            <i class="fa fa-circle-o-notch fa-spin"></i><a href="./">Collaborative Data Science Practices</a>
+          </h1>
+        </div>
+
+        <div class="page-wrapper" tabindex="-1" role="main">
+          <div class="page-inner">
+
+            <section class="normal" id="section-">
+<div id="redcap-admin" class="section level1" number="23">
+<h1><span class="header-section-number">Chapter 23</span> Material for REDCap Admins</h1>
+
+</div>
+
+
+
+            </section>
+
+          </div>
+        </div>
+      </div>
+<a href="redcap-developer.html" class="navigation navigation-prev " aria-label="Previous page"><i class="fa fa-angle-left"></i></a>
+<a href="git.html" class="navigation navigation-next " aria-label="Next page"><i class="fa fa-angle-right"></i></a>
+    </div>
+  </div>
+<script src="libs/gitbook-2.6.7/js/app.min.js"></script>
+<script src="libs/gitbook-2.6.7/js/lunr.js"></script>
+<script src="libs/gitbook-2.6.7/js/clipboard.min.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-search.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-sharing.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-fontsettings.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-bookdown.js"></script>
+<script src="libs/gitbook-2.6.7/js/jquery.highlight.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-clipboard.js"></script>
+<script>
+gitbook.require(["gitbook"], function(gitbook) {
+gitbook.start({
+"sharing": {
+"github": false,
+"facebook": true,
+"twitter": true,
+"linkedin": false,
+"weibo": false,
+"instapaper": false,
+"vk": false,
+"whatsapp": false,
+"all": ["facebook", "twitter", "linkedin", "weibo", "instapaper"]
+},
+"fontsettings": {
+"theme": "white",
+"family": "sans",
+"size": 2
+},
+"edit": {
+"link": "https://github.com/ouhscbbmc/data-science-practices-1/edit/master/ch-redcap-admin.md",
+"text": "Edit"
+},
+"history": {
+"link": null,
+"text": null
+},
+"view": {
+"link": null,
+"text": null
+},
+"download": ["data-science-practices-1.pdf", "data-science-practices-1.epub"],
+"toc": {
+"collapse": "subsection"
+}
+});
+});
+</script>
+
+</body>
+
+</html>
diff --git a/docs/redcap-developer.html b/docs/redcap-developer.html
new file mode 100644
index 0000000..e2a09d1
--- /dev/null
+++ b/docs/redcap-developer.html
@@ -0,0 +1,621 @@
+<!DOCTYPE html>
+<html lang="" xml:lang="">
+<head>
+
+  <meta charset="utf-8" />
+  <meta http-equiv="X-UA-Compatible" content="IE=edge" />
+  <title>Chapter 22 Material for REDCap Developers | Collaborative Data Science Practices</title>
+  <meta name="description" content="Collection of publicly available practices of data science and analysis." />
+  <meta name="generator" content="bookdown 0.22 and GitBook 2.6.7" />
+
+  <meta property="og:title" content="Chapter 22 Material for REDCap Developers | Collaborative Data Science Practices" />
+  <meta property="og:type" content="book" />
+  
+  
+  <meta property="og:description" content="Collection of publicly available practices of data science and analysis." />
+  
+
+  <meta name="twitter:card" content="summary" />
+  <meta name="twitter:title" content="Chapter 22 Material for REDCap Developers | Collaborative Data Science Practices" />
+  
+  <meta name="twitter:description" content="Collection of publicly available practices of data science and analysis." />
+  
+
+<meta name="author" content="Will Beasley" />
+
+
+<meta name="date" content="2021-06-08" />
+
+  <meta name="viewport" content="width=device-width, initial-scale=1" />
+  <meta name="apple-mobile-web-app-capable" content="yes" />
+  <meta name="apple-mobile-web-app-status-bar-style" content="black" />
+  
+  
+<link rel="prev" href="redcap-user.html"/>
+<link rel="next" href="redcap-admin.html"/>
+<script src="libs/header-attrs-2.8/header-attrs.js"></script>
+<script src="libs/jquery-2.2.3/jquery.min.js"></script>
+<link href="libs/gitbook-2.6.7/css/style.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-table.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-bookdown.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-highlight.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-search.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-fontsettings.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-clipboard.css" rel="stylesheet" />
+
+
+
+
+
+
+
+
+
+<link href="libs/anchor-sections-1.0.1/anchor-sections.css" rel="stylesheet" />
+<script src="libs/anchor-sections-1.0.1/anchor-sections.js"></script>
+
+
+<style type="text/css">
+pre > code.sourceCode { white-space: pre; position: relative; }
+pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
+pre > code.sourceCode > span:empty { height: 1.2em; }
+.sourceCode { overflow: visible; }
+code.sourceCode > span { color: inherit; text-decoration: inherit; }
+pre.sourceCode { margin: 0; }
+@media screen {
+div.sourceCode { overflow: auto; }
+}
+@media print {
+pre > code.sourceCode { white-space: pre-wrap; }
+pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
+}
+pre.numberSource code
+  { counter-reset: source-line 0; }
+pre.numberSource code > span
+  { position: relative; left: -4em; counter-increment: source-line; }
+pre.numberSource code > span > a:first-child::before
+  { content: counter(source-line);
+    position: relative; left: -1em; text-align: right; vertical-align: baseline;
+    border: none; display: inline-block;
+    -webkit-touch-callout: none; -webkit-user-select: none;
+    -khtml-user-select: none; -moz-user-select: none;
+    -ms-user-select: none; user-select: none;
+    padding: 0 4px; width: 4em;
+    color: #aaaaaa;
+  }
+pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa;  padding-left: 4px; }
+div.sourceCode
+  {   }
+@media screen {
+pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
+}
+code span.al { color: #ff0000; font-weight: bold; } /* Alert */
+code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
+code span.at { color: #7d9029; } /* Attribute */
+code span.bn { color: #40a070; } /* BaseN */
+code span.bu { } /* BuiltIn */
+code span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
+code span.ch { color: #4070a0; } /* Char */
+code span.cn { color: #880000; } /* Constant */
+code span.co { color: #60a0b0; font-style: italic; } /* Comment */
+code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
+code span.do { color: #ba2121; font-style: italic; } /* Documentation */
+code span.dt { color: #902000; } /* DataType */
+code span.dv { color: #40a070; } /* DecVal */
+code span.er { color: #ff0000; font-weight: bold; } /* Error */
+code span.ex { } /* Extension */
+code span.fl { color: #40a070; } /* Float */
+code span.fu { color: #06287e; } /* Function */
+code span.im { } /* Import */
+code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
+code span.kw { color: #007020; font-weight: bold; } /* Keyword */
+code span.op { color: #666666; } /* Operator */
+code span.ot { color: #007020; } /* Other */
+code span.pp { color: #bc7a00; } /* Preprocessor */
+code span.sc { color: #4070a0; } /* SpecialChar */
+code span.ss { color: #bb6688; } /* SpecialString */
+code span.st { color: #4070a0; } /* String */
+code span.va { color: #19177c; } /* Variable */
+code span.vs { color: #4070a0; } /* VerbatimString */
+code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
+</style>
+
+<style type="text/css">
+/* Used with Pandoc 2.11+ new --citeproc when CSL is used */
+div.csl-bib-body { }
+div.csl-entry {
+  clear: both;
+}
+.hanging div.csl-entry {
+  margin-left:2em;
+  text-indent:-2em;
+}
+div.csl-left-margin {
+  min-width:2em;
+  float:left;
+}
+div.csl-right-inline {
+  margin-left:2em;
+  padding-left:1em;
+}
+div.csl-indent {
+  margin-left: 2em;
+}
+</style>
+
+<link rel="stylesheet" href="style.css" type="text/css" />
+</head>
+
+<body>
+
+
+
+  <div class="book without-animation with-summary font-size-2 font-family-1" data-basepath=".">
+
+    <div class="book-summary">
+      <nav role="navigation">
+
+<ul class="summary">
+<li><a href="./">Collaborative Data Science</a></li>
+
+<li class="divider"></li>
+<li class="chapter" data-level="1" data-path="index.html"><a href="index.html"><i class="fa fa-check"></i><b>1</b> Introduction</a></li>
+<li class="chapter" data-level="2" data-path="coding.html"><a href="coding.html"><i class="fa fa-check"></i><b>2</b> Coding Principles</a>
+<ul>
+<li class="chapter" data-level="2.1" data-path="coding.html"><a href="coding.html#coding-simplify"><i class="fa fa-check"></i><b>2.1</b> Simplify</a>
+<ul>
+<li class="chapter" data-level="2.1.1" data-path="coding.html"><a href="coding.html#coding-simplify-types"><i class="fa fa-check"></i><b>2.1.1</b> Data Types</a></li>
+<li class="chapter" data-level="2.1.2" data-path="coding.html"><a href="coding.html#coding-simplify-categorical"><i class="fa fa-check"></i><b>2.1.2</b> Categorical Levels</a></li>
+<li class="chapter" data-level="2.1.3" data-path="coding.html"><a href="coding.html#coding-simplify-recoding"><i class="fa fa-check"></i><b>2.1.3</b> Recoding</a></li>
+</ul></li>
+<li class="chapter" data-level="2.2" data-path="coding.html"><a href="coding.html#coding-defensive"><i class="fa fa-check"></i><b>2.2</b> Defensive Style</a>
+<ul>
+<li class="chapter" data-level="2.2.1" data-path="coding.html"><a href="coding.html#coding-defensive-qualify-functions"><i class="fa fa-check"></i><b>2.2.1</b> Qualify functions</a></li>
+<li class="chapter" data-level="2.2.2" data-path="coding.html"><a href="coding.html#coding-defensive-date-arithmetic"><i class="fa fa-check"></i><b>2.2.2</b> Date Arithmetic</a></li>
+<li class="chapter" data-level="2.2.3" data-path="coding.html"><a href="coding.html#excluding-bad-cases"><i class="fa fa-check"></i><b>2.2.3</b> Excluding Bad Cases</a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="3" data-path="architecture.html"><a href="architecture.html"><i class="fa fa-check"></i><b>3</b> Architecture Principles</a>
+<ul>
+<li class="chapter" data-level="3.1" data-path="architecture.html"><a href="architecture.html#encapsulation"><i class="fa fa-check"></i><b>3.1</b> Encapsulation</a></li>
+<li class="chapter" data-level="3.2" data-path="architecture.html"><a href="architecture.html#leverage-team-members-strengths-avoid-weaknesses"><i class="fa fa-check"></i><b>3.2</b> Leverage team member’s strengths &amp; avoid weaknesses</a>
+<ul>
+<li class="chapter" data-level="3.2.1" data-path="architecture.html"><a href="architecture.html#focused-code-files"><i class="fa fa-check"></i><b>3.2.1</b> Focused code files</a></li>
+<li class="chapter" data-level="3.2.2" data-path="architecture.html"><a href="architecture.html#metadata-for-content-experts"><i class="fa fa-check"></i><b>3.2.2</b> Metadata for content experts</a></li>
+</ul></li>
+<li class="chapter" data-level="3.3" data-path="architecture.html"><a href="architecture.html#scales"><i class="fa fa-check"></i><b>3.3</b> Scales</a>
+<ul>
+<li class="chapter" data-level="3.3.1" data-path="architecture.html"><a href="architecture.html#single-source-single-analysis"><i class="fa fa-check"></i><b>3.3.1</b> Single source &amp; single analysis</a></li>
+<li class="chapter" data-level="3.3.2" data-path="architecture.html"><a href="architecture.html#multiple-sources-multiple-analyses"><i class="fa fa-check"></i><b>3.3.2</b> Multiple sources &amp; multiple analyses</a></li>
+</ul></li>
+<li class="chapter" data-level="3.4" data-path="architecture.html"><a href="architecture.html#architecture-consistency"><i class="fa fa-check"></i><b>3.4</b> Consistency</a>
+<ul>
+<li class="chapter" data-level="3.4.1" data-path="architecture.html"><a href="architecture.html#consistency-files"><i class="fa fa-check"></i><b>3.4.1</b> Across Files</a></li>
+<li class="chapter" data-level="3.4.2" data-path="architecture.html"><a href="architecture.html#across-languages"><i class="fa fa-check"></i><b>3.4.2</b> Across Languages</a></li>
+<li class="chapter" data-level="3.4.3" data-path="architecture.html"><a href="architecture.html#across-projects"><i class="fa fa-check"></i><b>3.4.3</b> Across Projects</a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="4" data-path="file-prototype-r.html"><a href="file-prototype-r.html"><i class="fa fa-check"></i><b>4</b> Prototypical R File</a>
+<ul>
+<li class="chapter" data-level="4.1" data-path="file-prototype-r.html"><a href="file-prototype-r.html#chunk-clear"><i class="fa fa-check"></i><b>4.1</b> Clear Memory</a></li>
+<li class="chapter" data-level="4.2" data-path="file-prototype-r.html"><a href="file-prototype-r.html#chunk-load-sources"><i class="fa fa-check"></i><b>4.2</b> Load Sources</a></li>
+<li class="chapter" data-level="4.3" data-path="file-prototype-r.html"><a href="file-prototype-r.html#chunk-load-packages"><i class="fa fa-check"></i><b>4.3</b> Load Packages</a></li>
+<li class="chapter" data-level="4.4" data-path="file-prototype-r.html"><a href="file-prototype-r.html#chunk-declare"><i class="fa fa-check"></i><b>4.4</b> Declare Globals</a></li>
+<li class="chapter" data-level="4.5" data-path="file-prototype-r.html"><a href="file-prototype-r.html#chunk-load-data"><i class="fa fa-check"></i><b>4.5</b> Load Data</a></li>
+<li class="chapter" data-level="4.6" data-path="file-prototype-r.html"><a href="file-prototype-r.html#chunk-tweak-data"><i class="fa fa-check"></i><b>4.6</b> Tweak Data</a></li>
+<li class="chapter" data-level="4.7" data-path="file-prototype-r.html"><a href="file-prototype-r.html#chunk-unique"><i class="fa fa-check"></i><b>4.7</b> (Unique Content)</a></li>
+<li class="chapter" data-level="4.8" data-path="file-prototype-r.html"><a href="file-prototype-r.html#chunk-verify-values"><i class="fa fa-check"></i><b>4.8</b> Verify Values</a></li>
+<li class="chapter" data-level="4.9" data-path="file-prototype-r.html"><a href="file-prototype-r.html#chunk-specify-columns"><i class="fa fa-check"></i><b>4.9</b> Specify Output Columns</a></li>
+<li class="chapter" data-level="4.10" data-path="file-prototype-r.html"><a href="file-prototype-r.html#save-to-disk-or-database"><i class="fa fa-check"></i><b>4.10</b> Save to Disk or Database</a></li>
+<li class="chapter" data-level="4.11" data-path="file-prototype-r.html"><a href="file-prototype-r.html#additional-resources"><i class="fa fa-check"></i><b>4.11</b> Additional Resources</a></li>
+</ul></li>
+<li class="chapter" data-level="5" data-path="file-prototype-sql.html"><a href="file-prototype-sql.html"><i class="fa fa-check"></i><b>5</b> Prototypical SQL File</a>
+<ul>
+<li class="chapter" data-level="5.1" data-path="file-prototype-sql.html"><a href="file-prototype-sql.html#sql-choice"><i class="fa fa-check"></i><b>5.1</b> Choice of Database Engine</a></li>
+<li class="chapter" data-level="5.2" data-path="file-prototype-sql.html"><a href="file-prototype-sql.html#sql-ferry"><i class="fa fa-check"></i><b>5.2</b> Ferry</a></li>
+<li class="chapter" data-level="5.3" data-path="file-prototype-sql.html"><a href="file-prototype-sql.html#sql-default-database"><i class="fa fa-check"></i><b>5.3</b> Default Databases</a></li>
+<li class="chapter" data-level="5.4" data-path="file-prototype-sql.html"><a href="file-prototype-sql.html#sql-declare"><i class="fa fa-check"></i><b>5.4</b> Declare Values Databases</a></li>
+<li class="chapter" data-level="5.5" data-path="file-prototype-sql.html"><a href="file-prototype-sql.html#sql-recreate"><i class="fa fa-check"></i><b>5.5</b> Recreate Table</a></li>
+<li class="chapter" data-level="5.6" data-path="file-prototype-sql.html"><a href="file-prototype-sql.html#sql-truncate"><i class="fa fa-check"></i><b>5.6</b> Truncate Table</a></li>
+<li class="chapter" data-level="5.7" data-path="file-prototype-sql.html"><a href="file-prototype-sql.html#sql-insert"><i class="fa fa-check"></i><b>5.7</b> INSERT INTO</a></li>
+<li class="chapter" data-level="5.8" data-path="file-prototype-sql.html"><a href="file-prototype-sql.html#sql-select"><i class="fa fa-check"></i><b>5.8</b> SELECT</a></li>
+<li class="chapter" data-level="5.9" data-path="file-prototype-sql.html"><a href="file-prototype-sql.html#sql-from"><i class="fa fa-check"></i><b>5.9</b> FROM</a></li>
+<li class="chapter" data-level="5.10" data-path="file-prototype-sql.html"><a href="file-prototype-sql.html#sql-where"><i class="fa fa-check"></i><b>5.10</b> WHERE</a></li>
+<li class="chapter" data-level="5.11" data-path="file-prototype-sql.html"><a href="file-prototype-sql.html#sql-order-by"><i class="fa fa-check"></i><b>5.11</b> ORDER BY</a></li>
+<li class="chapter" data-level="5.12" data-path="file-prototype-sql.html"><a href="file-prototype-sql.html#sql-indexing"><i class="fa fa-check"></i><b>5.12</b> Indexing</a></li>
+</ul></li>
+<li class="chapter" data-level="6" data-path="repo-prototype.html"><a href="repo-prototype.html"><i class="fa fa-check"></i><b>6</b> Prototypical Repository</a>
+<ul>
+<li class="chapter" data-level="6.1" data-path="repo-prototype.html"><a href="repo-prototype.html#repo-root"><i class="fa fa-check"></i><b>6.1</b> Root</a>
+<ul>
+<li class="chapter" data-level="6.1.1" data-path="repo-prototype.html"><a href="repo-prototype.html#repo-config"><i class="fa fa-check"></i><b>6.1.1</b> <code>config.R</code></a></li>
+<li class="chapter" data-level="6.1.2" data-path="repo-prototype.html"><a href="repo-prototype.html#repo-flow"><i class="fa fa-check"></i><b>6.1.2</b> <code>flow.R</code></a></li>
+<li class="chapter" data-level="6.1.3" data-path="repo-prototype.html"><a href="repo-prototype.html#repo-readme"><i class="fa fa-check"></i><b>6.1.3</b> <code>README.md</code></a></li>
+<li class="chapter" data-level="6.1.4" data-path="repo-prototype.html"><a href="repo-prototype.html#repo-rproj"><i class="fa fa-check"></i><b>6.1.4</b> <code>*.Rproj</code></a></li>
+</ul></li>
+<li class="chapter" data-level="6.2" data-path="repo-prototype.html"><a href="repo-prototype.html#repo-analysis"><i class="fa fa-check"></i><b>6.2</b> Analysis</a></li>
+<li class="chapter" data-level="6.3" data-path="repo-prototype.html"><a href="repo-prototype.html#repo-data-public"><i class="fa fa-check"></i><b>6.3</b> Data Public</a></li>
+<li class="chapter" data-level="6.4" data-path="repo-prototype.html"><a href="repo-prototype.html#repo-data-unshared"><i class="fa fa-check"></i><b>6.4</b> Data Unshared</a></li>
+<li class="chapter" data-level="6.5" data-path="repo-prototype.html"><a href="repo-prototype.html#repo-documentation"><i class="fa fa-check"></i><b>6.5</b> Documentation</a></li>
+<li class="chapter" data-level="6.6" data-path="repo-prototype.html"><a href="repo-prototype.html#repo-manipulation"><i class="fa fa-check"></i><b>6.6</b> Manipulation</a></li>
+<li class="chapter" data-level="6.7" data-path="repo-prototype.html"><a href="repo-prototype.html#repo-stitched"><i class="fa fa-check"></i><b>6.7</b> Stitched Output</a></li>
+<li class="chapter" data-level="6.8" data-path="repo-prototype.html"><a href="repo-prototype.html#repo-utility"><i class="fa fa-check"></i><b>6.8</b> Utility</a></li>
+</ul></li>
+<li class="chapter" data-level="7" data-path="rest.html"><a href="rest.html"><i class="fa fa-check"></i><b>7</b> Data at Rest</a>
+<ul>
+<li class="chapter" data-level="7.1" data-path="rest.html"><a href="rest.html#data-states"><i class="fa fa-check"></i><b>7.1</b> Data States</a></li>
+<li class="chapter" data-level="7.2" data-path="rest.html"><a href="rest.html#data-containers"><i class="fa fa-check"></i><b>7.2</b> Data Containers</a>
+<ul>
+<li class="chapter" data-level="7.2.1" data-path="rest.html"><a href="rest.html#data-containers-csv"><i class="fa fa-check"></i><b>7.2.1</b> csv</a></li>
+<li class="chapter" data-level="7.2.2" data-path="rest.html"><a href="rest.html#data-containers-rds"><i class="fa fa-check"></i><b>7.2.2</b> rds</a></li>
+<li class="chapter" data-level="7.2.3" data-path="rest.html"><a href="rest.html#data-containers-yaml"><i class="fa fa-check"></i><b>7.2.3</b> yaml, json, and xml</a></li>
+<li class="chapter" data-level="7.2.4" data-path="rest.html"><a href="rest.html#data-containers-arrow"><i class="fa fa-check"></i><b>7.2.4</b> Arrow</a></li>
+<li class="chapter" data-level="7.2.5" data-path="rest.html"><a href="rest.html#data-containers-sqlite"><i class="fa fa-check"></i><b>7.2.5</b> SQLite</a></li>
+<li class="chapter" data-level="7.2.6" data-path="rest.html"><a href="rest.html#data-containers-database"><i class="fa fa-check"></i><b>7.2.6</b> Central Enterprise database</a></li>
+<li class="chapter" data-level="7.2.7" data-path="rest.html"><a href="rest.html#data-containers-redcap"><i class="fa fa-check"></i><b>7.2.7</b> Central REDCap database</a></li>
+<li class="chapter" data-level="7.2.8" data-path="rest.html"><a href="rest.html#data-containers-avoid"><i class="fa fa-check"></i><b>7.2.8</b> Containers to avoid</a></li>
+</ul></li>
+<li class="chapter" data-level="7.3" data-path="rest.html"><a href="rest.html#data-conventions"><i class="fa fa-check"></i><b>7.3</b> Storage Conventions</a>
+<ul>
+<li class="chapter" data-level="7.3.1" data-path="rest.html"><a href="rest.html#data-conventions-all"><i class="fa fa-check"></i><b>7.3.1</b> All Sources</a></li>
+<li class="chapter" data-level="7.3.2" data-path="rest.html"><a href="rest.html#data-conventions-text"><i class="fa fa-check"></i><b>7.3.2</b> Text</a></li>
+<li class="chapter" data-level="7.3.3" data-path="rest.html"><a href="rest.html#data-conventions-excel"><i class="fa fa-check"></i><b>7.3.3</b> Excel</a></li>
+<li class="chapter" data-level="7.3.4" data-path="rest.html"><a href="rest.html#data-conventions-meditech"><i class="fa fa-check"></i><b>7.3.4</b> Meditech</a></li>
+<li class="chapter" data-level="7.3.5" data-path="rest.html"><a href="rest.html#data-conventions-database"><i class="fa fa-check"></i><b>7.3.5</b> Databases</a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="8" data-path="patterns.html"><a href="patterns.html"><i class="fa fa-check"></i><b>8</b> Patterns</a>
+<ul>
+<li class="chapter" data-level="8.1" data-path="patterns.html"><a href="patterns.html#pattern-ellis"><i class="fa fa-check"></i><b>8.1</b> Ellis</a>
+<ul>
+<li class="chapter" data-level="8.1.1" data-path="patterns.html"><a href="patterns.html#purpose"><i class="fa fa-check"></i><b>8.1.1</b> Purpose</a></li>
+<li class="chapter" data-level="8.1.2" data-path="patterns.html"><a href="patterns.html#philosophy"><i class="fa fa-check"></i><b>8.1.2</b> Philosophy</a></li>
+<li class="chapter" data-level="8.1.3" data-path="patterns.html"><a href="patterns.html#guidelines"><i class="fa fa-check"></i><b>8.1.3</b> Guidelines</a></li>
+<li class="chapter" data-level="8.1.4" data-path="patterns.html"><a href="patterns.html#examples"><i class="fa fa-check"></i><b>8.1.4</b> Examples</a></li>
+<li class="chapter" data-level="8.1.5" data-path="patterns.html"><a href="patterns.html#elements"><i class="fa fa-check"></i><b>8.1.5</b> Elements</a></li>
+</ul></li>
+<li class="chapter" data-level="8.2" data-path="patterns.html"><a href="patterns.html#pattern-arch"><i class="fa fa-check"></i><b>8.2</b> Arch</a></li>
+<li class="chapter" data-level="8.3" data-path="patterns.html"><a href="patterns.html#pattern-ferry"><i class="fa fa-check"></i><b>8.3</b> Ferry</a></li>
+<li class="chapter" data-level="8.4" data-path="patterns.html"><a href="patterns.html#pattern-scribe"><i class="fa fa-check"></i><b>8.4</b> Scribe</a></li>
+<li class="chapter" data-level="8.5" data-path="patterns.html"><a href="patterns.html#pattern-analysis"><i class="fa fa-check"></i><b>8.5</b> Analysis</a></li>
+<li class="chapter" data-level="8.6" data-path="patterns.html"><a href="patterns.html#pattern-presentation-static"><i class="fa fa-check"></i><b>8.6</b> Presentation -Static</a></li>
+<li class="chapter" data-level="8.7" data-path="patterns.html"><a href="patterns.html#pattern-presentation-interactive"><i class="fa fa-check"></i><b>8.7</b> Presentation -Interactive</a></li>
+<li class="chapter" data-level="8.8" data-path="patterns.html"><a href="patterns.html#pattern-metadata"><i class="fa fa-check"></i><b>8.8</b> Metadata</a>
+<ul>
+<li class="chapter" data-level="8.8.1" data-path="patterns.html"><a href="patterns.html#primary-rules-for-mapping"><i class="fa fa-check"></i><b>8.8.1</b> Primary Rules for Mapping</a></li>
+<li class="chapter" data-level="8.8.2" data-path="patterns.html"><a href="patterns.html#secondary-rules-for-mapping"><i class="fa fa-check"></i><b>8.8.2</b> Secondary Rules for Mapping</a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="9" data-path="security.html"><a href="security.html"><i class="fa fa-check"></i><b>9</b> Security &amp; Private Data</a>
+<ul>
+<li class="chapter" data-level="9.1" data-path="security.html"><a href="security.html#security-guidelines"><i class="fa fa-check"></i><b>9.1</b> Security Guidelines</a></li>
+<li class="chapter" data-level="9.2" data-path="security.html"><a href="security.html#dataset-level-redaction"><i class="fa fa-check"></i><b>9.2</b> Dataset-level Redaction</a></li>
+<li class="chapter" data-level="9.3" data-path="security.html"><a href="security.html#security-for-data-at-rest"><i class="fa fa-check"></i><b>9.3</b> Security for Data at Rest</a></li>
+<li class="chapter" data-level="9.4" data-path="security.html"><a href="security.html#file-level-permissions"><i class="fa fa-check"></i><b>9.4</b> File-level permissions</a></li>
+<li class="chapter" data-level="9.5" data-path="security.html"><a href="security.html#database-permissions"><i class="fa fa-check"></i><b>9.5</b> Database permissions</a></li>
+<li class="chapter" data-level="9.6" data-path="security.html"><a href="security.html#public-private-repositories"><i class="fa fa-check"></i><b>9.6</b> Public &amp; Private Repositories</a>
+<ul>
+<li class="chapter" data-level="9.6.1" data-path="security.html"><a href="security.html#repo-rules"><i class="fa fa-check"></i><b>9.6.1</b> Repo Rules</a></li>
+<li class="chapter" data-level="9.6.2" data-path="security.html"><a href="security.html#scrubbing-github-history"><i class="fa fa-check"></i><b>9.6.2</b> Scrubbing GitHub history</a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="10" data-path="automation.html"><a href="automation.html"><i class="fa fa-check"></i><b>10</b> Automation &amp; Reproducibility</a>
+<ul>
+<li class="chapter" data-level="10.1" data-path="automation.html"><a href="automation.html#automation-mediator"><i class="fa fa-check"></i><b>10.1</b> Mediator</a>
+<ul>
+<li class="chapter" data-level="10.1.1" data-path="automation.html"><a href="automation.html#automation-flow"><i class="fa fa-check"></i><b>10.1.1</b> Flow File in R</a></li>
+<li class="chapter" data-level="10.1.2" data-path="automation.html"><a href="automation.html#automation-makefile"><i class="fa fa-check"></i><b>10.1.2</b> Makefile</a></li>
+<li class="chapter" data-level="10.1.3" data-path="automation.html"><a href="automation.html#automation-ssis"><i class="fa fa-check"></i><b>10.1.3</b> SSIS</a></li>
+</ul></li>
+<li class="chapter" data-level="10.2" data-path="automation.html"><a href="automation.html#automation-scheduling"><i class="fa fa-check"></i><b>10.2</b> Scheduling</a>
+<ul>
+<li class="chapter" data-level="10.2.1" data-path="automation.html"><a href="automation.html#automation-cron"><i class="fa fa-check"></i><b>10.2.1</b> cron</a></li>
+<li class="chapter" data-level="10.2.2" data-path="automation.html"><a href="automation.html#automation-task-scheduler"><i class="fa fa-check"></i><b>10.2.2</b> Task Scheduler</a></li>
+<li class="chapter" data-level="10.2.3" data-path="automation.html"><a href="automation.html#automation-sql-server-agent"><i class="fa fa-check"></i><b>10.2.3</b> SQL Server Agent</a></li>
+</ul></li>
+<li class="chapter" data-level="10.3" data-path="automation.html"><a href="automation.html#auxiliary-issues"><i class="fa fa-check"></i><b>10.3</b> Auxiliary Issues</a>
+<ul>
+<li class="chapter" data-level="10.3.1" data-path="automation.html"><a href="automation.html#sink-log-files"><i class="fa fa-check"></i><b>10.3.1</b> Sink Log Files</a></li>
+<li class="chapter" data-level="10.3.2" data-path="automation.html"><a href="automation.html#package-versions"><i class="fa fa-check"></i><b>10.3.2</b> Package Versions</a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="11" data-path="scaling-up.html"><a href="scaling-up.html"><i class="fa fa-check"></i><b>11</b> Scaling Up</a>
+<ul>
+<li class="chapter" data-level="11.1" data-path="scaling-up.html"><a href="scaling-up.html#data-storage"><i class="fa fa-check"></i><b>11.1</b> Data Storage</a></li>
+<li class="chapter" data-level="11.2" data-path="scaling-up.html"><a href="scaling-up.html#data-processing"><i class="fa fa-check"></i><b>11.2</b> Data Processing</a></li>
+</ul></li>
+<li class="chapter" data-level="12" data-path="collaboration.html"><a href="collaboration.html"><i class="fa fa-check"></i><b>12</b> Parallel Collaboration</a>
+<ul>
+<li class="chapter" data-level="12.1" data-path="collaboration.html"><a href="collaboration.html#social-contract"><i class="fa fa-check"></i><b>12.1</b> Social Contract</a></li>
+<li class="chapter" data-level="12.2" data-path="collaboration.html"><a href="collaboration.html#code-reviews"><i class="fa fa-check"></i><b>12.2</b> Code Reviews</a></li>
+<li class="chapter" data-level="12.3" data-path="collaboration.html"><a href="collaboration.html#remote"><i class="fa fa-check"></i><b>12.3</b> Remote</a></li>
+<li class="chapter" data-level="12.4" data-path="collaboration.html"><a href="collaboration.html#additional-resources-1"><i class="fa fa-check"></i><b>12.4</b> Additional Resources</a></li>
+<li class="chapter" data-level="12.5" data-path="collaboration.html"><a href="collaboration.html#loose-notes"><i class="fa fa-check"></i><b>12.5</b> Loose Notes</a>
+<ul>
+<li class="chapter" data-level="12.5.1" data-path="collaboration.html"><a href="collaboration.html#github"><i class="fa fa-check"></i><b>12.5.1</b> GitHub</a></li>
+<li class="chapter" data-level="12.5.2" data-path="collaboration.html"><a href="collaboration.html#common-code"><i class="fa fa-check"></i><b>12.5.2</b> Common Code</a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="13" data-path="document.html"><a href="document.html"><i class="fa fa-check"></i><b>13</b> Documentation</a>
+<ul>
+<li class="chapter" data-level="13.1" data-path="document.html"><a href="document.html#team-wide"><i class="fa fa-check"></i><b>13.1</b> Team-wide</a></li>
+<li class="chapter" data-level="13.2" data-path="document.html"><a href="document.html#project-specific"><i class="fa fa-check"></i><b>13.2</b> Project-specific</a></li>
+<li class="chapter" data-level="13.3" data-path="document.html"><a href="document.html#dataset-origin-structure"><i class="fa fa-check"></i><b>13.3</b> Dataset Origin &amp; Structure</a></li>
+<li class="chapter" data-level="13.4" data-path="document.html"><a href="document.html#issues-tasks"><i class="fa fa-check"></i><b>13.4</b> Issues &amp; Tasks</a>
+<ul>
+<li class="chapter" data-level="13.4.1" data-path="document.html"><a href="document.html#documentation-issue-template"><i class="fa fa-check"></i><b>13.4.1</b> GitHub Issue Template</a></li>
+</ul></li>
+<li class="chapter" data-level="13.5" data-path="document.html"><a href="document.html#flow-diagrams"><i class="fa fa-check"></i><b>13.5</b> Flow Diagrams</a></li>
+<li class="chapter" data-level="13.6" data-path="document.html"><a href="document.html#setting-up-new-machine"><i class="fa fa-check"></i><b>13.6</b> Setting up new machine</a></li>
+</ul></li>
+<li class="chapter" data-level="14" data-path="style.html"><a href="style.html"><i class="fa fa-check"></i><b>14</b> Style Guide</a>
+<ul>
+<li class="chapter" data-level="14.1" data-path="style.html"><a href="style.html#readability"><i class="fa fa-check"></i><b>14.1</b> Readability</a>
+<ul>
+<li class="chapter" data-level="14.1.1" data-path="style.html"><a href="style.html#style-number"><i class="fa fa-check"></i><b>14.1.1</b> Number</a></li>
+<li class="chapter" data-level="14.1.2" data-path="style.html"><a href="style.html#style-abbreviation"><i class="fa fa-check"></i><b>14.1.2</b> Abbreviations</a></li>
+</ul></li>
+<li class="chapter" data-level="14.2" data-path="style.html"><a href="style.html#datasets"><i class="fa fa-check"></i><b>14.2</b> Datasets</a>
+<ul>
+<li class="chapter" data-level="14.2.1" data-path="style.html"><a href="style.html#style-filter"><i class="fa fa-check"></i><b>14.2.1</b> Filtering Rows</a></li>
+<li class="chapter" data-level="14.2.2" data-path="style.html"><a href="style.html#style-attach"><i class="fa fa-check"></i><b>14.2.2</b> Don’t attach</a></li>
+</ul></li>
+<li class="chapter" data-level="14.3" data-path="style.html"><a href="style.html#style-factor"><i class="fa fa-check"></i><b>14.3</b> Categorical Variables</a>
+<ul>
+<li class="chapter" data-level="14.3.1" data-path="style.html"><a href="style.html#style-factor-unknown"><i class="fa fa-check"></i><b>14.3.1</b> Explicit Missing Values</a></li>
+<li class="chapter" data-level="14.3.2" data-path="style.html"><a href="style.html#style-factor-granularity"><i class="fa fa-check"></i><b>14.3.2</b> Granularity</a></li>
+</ul></li>
+<li class="chapter" data-level="14.4" data-path="style.html"><a href="style.html#style-dates"><i class="fa fa-check"></i><b>14.4</b> Dates</a></li>
+<li class="chapter" data-level="14.5" data-path="style.html"><a href="style.html#naming"><i class="fa fa-check"></i><b>14.5</b> Naming</a>
+<ul>
+<li class="chapter" data-level="14.5.1" data-path="style.html"><a href="style.html#style-naming-variables"><i class="fa fa-check"></i><b>14.5.1</b> Variables</a></li>
+<li class="chapter" data-level="14.5.2" data-path="style.html"><a href="style.html#style-naming-files"><i class="fa fa-check"></i><b>14.5.2</b> Files and Folders</a></li>
+<li class="chapter" data-level="14.5.3" data-path="style.html"><a href="style.html#style-naming-datasets"><i class="fa fa-check"></i><b>14.5.3</b> Datasets</a></li>
+<li class="chapter" data-level="14.5.4" data-path="style.html"><a href="style.html#style-naming-semantic"><i class="fa fa-check"></i><b>14.5.4</b> Semantic sorting</a></li>
+</ul></li>
+<li class="chapter" data-level="14.6" data-path="style.html"><a href="style.html#style-whitespace"><i class="fa fa-check"></i><b>14.6</b> Whitespace</a></li>
+<li class="chapter" data-level="14.7" data-path="style.html"><a href="style.html#style-database"><i class="fa fa-check"></i><b>14.7</b> Database</a></li>
+<li class="chapter" data-level="14.8" data-path="style.html"><a href="style.html#style-ggplot"><i class="fa fa-check"></i><b>14.8</b> ggplot2</a>
+<ul>
+<li class="chapter" data-level="14.8.1" data-path="style.html"><a href="style.html#style-ggplot-order"><i class="fa fa-check"></i><b>14.8.1</b> Order of commands</a></li>
+<li class="chapter" data-level="14.8.2" data-path="style.html"><a href="style.html#style-ggplot-gotchas"><i class="fa fa-check"></i><b>14.8.2</b> Gotchas</a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="15" data-path="publication.html"><a href="publication.html"><i class="fa fa-check"></i><b>15</b> Publishing Results</a>
+<ul>
+<li class="chapter" data-level="15.1" data-path="publication.html"><a href="publication.html#publication-analysts"><i class="fa fa-check"></i><b>15.1</b> To Other Analysts</a></li>
+<li class="chapter" data-level="15.2" data-path="publication.html"><a href="publication.html#publication-experts"><i class="fa fa-check"></i><b>15.2</b> To Researchers &amp; Content Experts</a></li>
+<li class="chapter" data-level="15.3" data-path="publication.html"><a href="publication.html#publication-phobic"><i class="fa fa-check"></i><b>15.3</b> To Technical-Phobic Audiences</a></li>
+</ul></li>
+<li class="chapter" data-level="16" data-path="testing-and-validation.html"><a href="testing-and-validation.html"><i class="fa fa-check"></i><b>16</b> Testing, Validation, &amp; Defensive Programming</a>
+<ul>
+<li class="chapter" data-level="16.1" data-path="testing-and-validation.html"><a href="testing-and-validation.html#testing-functions"><i class="fa fa-check"></i><b>16.1</b> Testing Functions</a></li>
+<li class="chapter" data-level="16.2" data-path="testing-and-validation.html"><a href="testing-and-validation.html#defensive-programming"><i class="fa fa-check"></i><b>16.2</b> Defensive Programming</a></li>
+<li class="chapter" data-level="16.3" data-path="testing-and-validation.html"><a href="testing-and-validation.html#validator"><i class="fa fa-check"></i><b>16.3</b> Validator</a></li>
+</ul></li>
+<li class="chapter" data-level="17" data-path="troubleshooting.html"><a href="troubleshooting.html"><i class="fa fa-check"></i><b>17</b> Troubleshooting and Debugging</a>
+<ul>
+<li class="chapter" data-level="17.1" data-path="troubleshooting.html"><a href="troubleshooting.html#finding-help"><i class="fa fa-check"></i><b>17.1</b> Finding Help</a></li>
+<li class="chapter" data-level="17.2" data-path="troubleshooting.html"><a href="troubleshooting.html#debugging"><i class="fa fa-check"></i><b>17.2</b> Debugging</a></li>
+</ul></li>
+<li class="chapter" data-level="18" data-path="workstation.html"><a href="workstation.html"><i class="fa fa-check"></i><b>18</b> Workstation</a>
+<ul>
+<li class="chapter" data-level="18.1" data-path="workstation.html"><a href="workstation.html#workstation-required"><i class="fa fa-check"></i><b>18.1</b> Required Installation</a>
+<ul>
+<li class="chapter" data-level="18.1.1" data-path="workstation.html"><a href="workstation.html#workstation-r"><i class="fa fa-check"></i><b>18.1.1</b> R</a></li>
+<li class="chapter" data-level="18.1.2" data-path="workstation.html"><a href="workstation.html#workstation-rstudio"><i class="fa fa-check"></i><b>18.1.2</b> RStudio</a></li>
+<li class="chapter" data-level="18.1.3" data-path="workstation.html"><a href="workstation.html#workstation-r-package-installation"><i class="fa fa-check"></i><b>18.1.3</b> Installing R Packages</a></li>
+<li class="chapter" data-level="18.1.4" data-path="workstation.html"><a href="workstation.html#workstation-r-package-update"><i class="fa fa-check"></i><b>18.1.4</b> Updating R Packages</a></li>
+<li class="chapter" data-level="18.1.5" data-path="workstation.html"><a href="workstation.html#workstation-github"><i class="fa fa-check"></i><b>18.1.5</b> GitHub</a></li>
+<li class="chapter" data-level="18.1.6" data-path="workstation.html"><a href="workstation.html#workstation-github-client"><i class="fa fa-check"></i><b>18.1.6</b> GitHub Desktop</a></li>
+<li class="chapter" data-level="18.1.7" data-path="workstation.html"><a href="workstation.html#workstation-rtools"><i class="fa fa-check"></i><b>18.1.7</b> R Tools</a></li>
+</ul></li>
+<li class="chapter" data-level="18.2" data-path="workstation.html"><a href="workstation.html#workstation-recommended"><i class="fa fa-check"></i><b>18.2</b> Recommended Installation</a>
+<ul>
+<li class="chapter" data-level="18.2.1" data-path="workstation.html"><a href="workstation.html#workstation-odbc"><i class="fa fa-check"></i><b>18.2.1</b> ODBC Driver</a></li>
+<li class="chapter" data-level="18.2.2" data-path="workstation.html"><a href="workstation.html#workstation-notepadpp"><i class="fa fa-check"></i><b>18.2.2</b> Notepad++</a></li>
+<li class="chapter" data-level="18.2.3" data-path="workstation.html"><a href="workstation.html#workstation-ads"><i class="fa fa-check"></i><b>18.2.3</b> Azure Data Studio</a></li>
+<li class="chapter" data-level="18.2.4" data-path="workstation.html"><a href="workstation.html#workstation-vscode"><i class="fa fa-check"></i><b>18.2.4</b> Visual Studio Code</a></li>
+</ul></li>
+<li class="chapter" data-level="18.3" data-path="workstation.html"><a href="workstation.html#workstation-optional"><i class="fa fa-check"></i><b>18.3</b> Optional Installation</a>
+<ul>
+<li class="chapter" data-level="18.3.1" data-path="workstation.html"><a href="workstation.html#workstation-git"><i class="fa fa-check"></i><b>18.3.1</b> Git</a></li>
+<li class="chapter" data-level="18.3.2" data-path="workstation.html"><a href="workstation.html#workstation-calc"><i class="fa fa-check"></i><b>18.3.2</b> LibreOffice Calc</a></li>
+<li class="chapter" data-level="18.3.3" data-path="workstation.html"><a href="workstation.html#workstation-pandoc"><i class="fa fa-check"></i><b>18.3.3</b> pandoc</a></li>
+<li class="chapter" data-level="18.3.4" data-path="workstation.html"><a href="workstation.html#workstation-python"><i class="fa fa-check"></i><b>18.3.4</b> Python</a></li>
+</ul></li>
+<li class="chapter" data-level="18.4" data-path="workstation.html"><a href="workstation.html#workstation-assets"><i class="fa fa-check"></i><b>18.4</b> Asset Locations</a></li>
+<li class="chapter" data-level="18.5" data-path="workstation.html"><a href="workstation.html#workstation-administrator"><i class="fa fa-check"></i><b>18.5</b> Administrator Installation</a>
+<ul>
+<li class="chapter" data-level="18.5.1" data-path="workstation.html"><a href="workstation.html#workstation-mysql"><i class="fa fa-check"></i><b>18.5.1</b> MySQL Workbench</a></li>
+<li class="chapter" data-level="18.5.2" data-path="workstation.html"><a href="workstation.html#workstation-postman"><i class="fa fa-check"></i><b>18.5.2</b> Postman</a></li>
+<li class="chapter" data-level="18.5.3" data-path="workstation.html"><a href="workstation.html#workstation-ssms"><i class="fa fa-check"></i><b>18.5.3</b> SQL Server Management Studio (SSMS)</a></li>
+<li class="chapter" data-level="18.5.4" data-path="workstation.html"><a href="workstation.html#workstation-winscp"><i class="fa fa-check"></i><b>18.5.4</b> WinSCP</a></li>
+</ul></li>
+<li class="chapter" data-level="18.6" data-path="workstation.html"><a href="workstation.html#workstation-troubleshooting"><i class="fa fa-check"></i><b>18.6</b> Installation Troubleshooting</a></li>
+<li class="chapter" data-level="18.7" data-path="workstation.html"><a href="workstation.html#workstation-ubuntu"><i class="fa fa-check"></i><b>18.7</b> Ubuntu Installation</a></li>
+<li class="chapter" data-level="18.8" data-path="workstation.html"><a href="workstation.html#workstation-retired"><i class="fa fa-check"></i><b>18.8</b> Retired Tools</a></li>
+</ul></li>
+<li class="chapter" data-level="19" data-path="tools.html"><a href="tools.html"><i class="fa fa-check"></i><b>19</b> Considerations when Selecting Tools</a>
+<ul>
+<li class="chapter" data-level="19.1" data-path="tools.html"><a href="tools.html#general"><i class="fa fa-check"></i><b>19.1</b> General</a>
+<ul>
+<li class="chapter" data-level="19.1.1" data-path="tools.html"><a href="tools.html#the-components-goal"><i class="fa fa-check"></i><b>19.1.1</b> The Component’s Goal</a></li>
+<li class="chapter" data-level="19.1.2" data-path="tools.html"><a href="tools.html#current-skillset-of-team"><i class="fa fa-check"></i><b>19.1.2</b> Current Skillset of Team</a></li>
+<li class="chapter" data-level="19.1.3" data-path="tools.html"><a href="tools.html#desired-future-skillset-of-team"><i class="fa fa-check"></i><b>19.1.3</b> Desired Future Skillset of Team</a></li>
+<li class="chapter" data-level="19.1.4" data-path="tools.html"><a href="tools.html#skillset-of-audience"><i class="fa fa-check"></i><b>19.1.4</b> Skillset of Audience</a></li>
+</ul></li>
+<li class="chapter" data-level="19.2" data-path="tools.html"><a href="tools.html#languages"><i class="fa fa-check"></i><b>19.2</b> Languages</a></li>
+<li class="chapter" data-level="19.3" data-path="tools.html"><a href="tools.html#r-packages"><i class="fa fa-check"></i><b>19.3</b> R Packages</a></li>
+<li class="chapter" data-level="19.4" data-path="tools.html"><a href="tools.html#database"><i class="fa fa-check"></i><b>19.4</b> Database</a></li>
+<li class="chapter" data-level="19.5" data-path="tools.html"><a href="tools.html#additional-resources-2"><i class="fa fa-check"></i><b>19.5</b> Additional Resources</a></li>
+</ul></li>
+<li class="chapter" data-level="20" data-path="team.html"><a href="team.html"><i class="fa fa-check"></i><b>20</b> Growing a Team</a>
+<ul>
+<li class="chapter" data-level="20.1" data-path="team.html"><a href="team.html#recruiting"><i class="fa fa-check"></i><b>20.1</b> Recruiting</a></li>
+<li class="chapter" data-level="20.2" data-path="team.html"><a href="team.html#training-to-data-science"><i class="fa fa-check"></i><b>20.2</b> Training to Data Science</a></li>
+<li class="chapter" data-level="20.3" data-path="team.html"><a href="team.html#bridges-outside-the-team"><i class="fa fa-check"></i><b>20.3</b> Bridges Outside the Team</a></li>
+</ul></li>
+<li class="chapter" data-level="21" data-path="redcap-user.html"><a href="redcap-user.html"><i class="fa fa-check"></i><b>21</b> Material for REDCap Users</a>
+<ul>
+<li class="chapter" data-level="21.1" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-login"><i class="fa fa-check"></i><b>21.1</b> Login</a></li>
+<li class="chapter" data-level="21.2" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-report-develop"><i class="fa fa-check"></i><b>21.2</b> Developing Reports</a></li>
+</ul></li>
+<li class="chapter" data-level="22" data-path="redcap-developer.html"><a href="redcap-developer.html"><i class="fa fa-check"></i><b>22</b> Material for REDCap Developers</a></li>
+<li class="chapter" data-level="23" data-path="redcap-admin.html"><a href="redcap-admin.html"><i class="fa fa-check"></i><b>23</b> Material for REDCap Admins</a></li>
+<li class="appendix"><span><b>Appendix</b></span></li>
+<li class="chapter" data-level="A" data-path="git.html"><a href="git.html"><i class="fa fa-check"></i><b>A</b> Git &amp; GitHub</a>
+<ul>
+<li class="chapter" data-level="A.1" data-path="git.html"><a href="git.html#git-code"><i class="fa fa-check"></i><b>A.1</b> for Code Development</a></li>
+<li class="chapter" data-level="A.2" data-path="git.html"><a href="git.html#git-collaboration"><i class="fa fa-check"></i><b>A.2</b> for Collaboration</a></li>
+<li class="chapter" data-level="A.3" data-path="git.html"><a href="git.html#git-stability"><i class="fa fa-check"></i><b>A.3</b> for Stability</a></li>
+<li class="chapter" data-level="A.4" data-path="git.html"><a href="git.html#git-collaborators"><i class="fa fa-check"></i><b>A.4</b> for New Collaborators</a></li>
+<li class="chapter" data-level="A.5" data-path="git.html"><a href="git.html#git-contribution"><i class="fa fa-check"></i><b>A.5</b> Steps for Contributing to Repo</a>
+<ul>
+<li class="chapter" data-level="A.5.1" data-path="git.html"><a href="git.html#git-contribution-regular"><i class="fa fa-check"></i><b>A.5.1</b> Regular Contributions</a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="B" data-path="snippets.html"><a href="snippets.html"><i class="fa fa-check"></i><b>B</b> Snippets</a>
+<ul>
+<li class="chapter" data-level="B.1" data-path="snippets.html"><a href="snippets.html#snippets-reading"><i class="fa fa-check"></i><b>B.1</b> Reading External Data</a>
+<ul>
+<li class="chapter" data-level="B.1.1" data-path="snippets.html"><a href="snippets.html#snippets-reading-excel"><i class="fa fa-check"></i><b>B.1.1</b> Reading from Excel</a></li>
+<li class="chapter" data-level="B.1.2" data-path="snippets.html"><a href="snippets.html#snippets-reading-trailing-comma"><i class="fa fa-check"></i><b>B.1.2</b> Removing Trailing Comma from Header</a></li>
+<li class="chapter" data-level="B.1.3" data-path="snippets.html"><a href="snippets.html#snippets-reading-vroom"><i class="fa fa-check"></i><b>B.1.3</b> Removing Trailing Comma from Header</a></li>
+</ul></li>
+<li class="chapter" data-level="B.2" data-path="snippets.html"><a href="snippets.html#snippets-grooming"><i class="fa fa-check"></i><b>B.2</b> Grooming</a>
+<ul>
+<li class="chapter" data-level="B.2.1" data-path="snippets.html"><a href="snippets.html#snippets-grooming-two-year"><i class="fa fa-check"></i><b>B.2.1</b> Correct for misinterpreted two-digit year</a></li>
+</ul></li>
+<li class="chapter" data-level="B.3" data-path="snippets.html"><a href="snippets.html#snippets-identification"><i class="fa fa-check"></i><b>B.3</b> Identification</a>
+<ul>
+<li class="chapter" data-level="B.3.1" data-path="snippets.html"><a href="snippets.html#snippets-identification-tags"><i class="fa fa-check"></i><b>B.3.1</b> Generating “tags”</a></li>
+</ul></li>
+<li class="chapter" data-level="B.4" data-path="snippets.html"><a href="snippets.html#snippets-correspondence"><i class="fa fa-check"></i><b>B.4</b> Correspondence with Collaborators</a>
+<ul>
+<li class="chapter" data-level="B.4.1" data-path="snippets.html"><a href="snippets.html#snippets-correspondence-excel"><i class="fa fa-check"></i><b>B.4.1</b> Excel files</a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="C" data-path="presentations.html"><a href="presentations.html"><i class="fa fa-check"></i><b>C</b> Presentations</a>
+<ul>
+<li class="chapter" data-level="C.1" data-path="presentations.html"><a href="presentations.html#cdw"><i class="fa fa-check"></i><b>C.1</b> CDW</a></li>
+<li class="chapter" data-level="C.2" data-path="presentations.html"><a href="presentations.html#redcap"><i class="fa fa-check"></i><b>C.2</b> REDCap</a></li>
+<li class="chapter" data-level="C.3" data-path="presentations.html"><a href="presentations.html#reproducible-research-visualization"><i class="fa fa-check"></i><b>C.3</b> Reproducible Research &amp; Visualization</a></li>
+<li class="chapter" data-level="C.4" data-path="presentations.html"><a href="presentations.html#data-management"><i class="fa fa-check"></i><b>C.4</b> Data Management</a></li>
+<li class="chapter" data-level="C.5" data-path="presentations.html"><a href="presentations.html#github-1"><i class="fa fa-check"></i><b>C.5</b> GitHub</a></li>
+<li class="chapter" data-level="C.6" data-path="presentations.html"><a href="presentations.html#software"><i class="fa fa-check"></i><b>C.6</b> Software</a></li>
+<li class="chapter" data-level="C.7" data-path="presentations.html"><a href="presentations.html#architectures"><i class="fa fa-check"></i><b>C.7</b> Architectures</a></li>
+<li class="chapter" data-level="C.8" data-path="presentations.html"><a href="presentations.html#components"><i class="fa fa-check"></i><b>C.8</b> Components</a></li>
+</ul></li>
+<li class="chapter" data-level="D" data-path="scratch-pad.html"><a href="scratch-pad.html"><i class="fa fa-check"></i><b>D</b> Scratch Pad of Loose Ideas</a>
+<ul>
+<li class="chapter" data-level="D.1" data-path="scratch-pad.html"><a href="scratch-pad.html#chapters-sections-to-form"><i class="fa fa-check"></i><b>D.1</b> Chapters &amp; Sections to Form</a></li>
+<li class="chapter" data-level="D.2" data-path="scratch-pad.html"><a href="scratch-pad.html#practices"><i class="fa fa-check"></i><b>D.2</b> Practices</a></li>
+<li class="chapter" data-level="D.3" data-path="scratch-pad.html"><a href="scratch-pad.html#good-sites"><i class="fa fa-check"></i><b>D.3</b> Good Sites</a></li>
+</ul></li>
+<li class="chapter" data-level="E" data-path="example-dashboard.html"><a href="example-dashboard.html"><i class="fa fa-check"></i><b>E</b> Example Dashboard</a>
+<ul>
+<li class="chapter" data-level="E.1" data-path="example-dashboard.html"><a href="example-dashboard.html#example-dashboard-example"><i class="fa fa-check"></i><b>E.1</b> Example</a></li>
+<li class="chapter" data-level="E.2" data-path="example-dashboard.html"><a href="example-dashboard.html#example-dashboard-guide"><i class="fa fa-check"></i><b>E.2</b> Style Guide</a>
+<ul>
+<li class="chapter" data-level="E.2.1" data-path="example-dashboard.html"><a href="example-dashboard.html#headline-page"><i class="fa fa-check"></i><b>E.2.1</b> Headline page</a></li>
+<li class="chapter" data-level="E.2.2" data-path="example-dashboard.html"><a href="example-dashboard.html#tables-page"><i class="fa fa-check"></i><b>E.2.2</b> Tables page</a></li>
+<li class="chapter" data-level="E.2.3" data-path="example-dashboard.html"><a href="example-dashboard.html#graphs-page"><i class="fa fa-check"></i><b>E.2.3</b> Graphs page</a></li>
+<li class="chapter" data-level="E.2.4" data-path="example-dashboard.html"><a href="example-dashboard.html#marginal-graphs-page"><i class="fa fa-check"></i><b>E.2.4</b> Marginal Graphs page</a></li>
+<li class="chapter" data-level="E.2.5" data-path="example-dashboard.html"><a href="example-dashboard.html#documentation-page"><i class="fa fa-check"></i><b>E.2.5</b> Documentation page</a></li>
+<li class="chapter" data-level="E.2.6" data-path="example-dashboard.html"><a href="example-dashboard.html#miscellaneous-notes"><i class="fa fa-check"></i><b>E.2.6</b> Miscellaneous Notes</a></li>
+</ul></li>
+<li class="chapter" data-level="E.3" data-path="example-dashboard.html"><a href="example-dashboard.html#example-dashboard-architecture"><i class="fa fa-check"></i><b>E.3</b> Architecture</a>
+<ul>
+<li class="chapter" data-level="E.3.1" data-path="example-dashboard.html"><a href="example-dashboard.html#data-from-external-system"><i class="fa fa-check"></i><b>E.3.1</b> Data from External System</a></li>
+<li class="chapter" data-level="E.3.2" data-path="example-dashboard.html"><a href="example-dashboard.html#groomed-data-in-warehouse"><i class="fa fa-check"></i><b>E.3.2</b> Groomed Data in Warehouse</a></li>
+<li class="chapter" data-level="E.3.3" data-path="example-dashboard.html"><a href="example-dashboard.html#analysis-ready-dataset"><i class="fa fa-check"></i><b>E.3.3</b> Analysis-Ready Dataset</a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="F" data-path="example-chapter.html"><a href="example-chapter.html"><i class="fa fa-check"></i><b>F</b> Example Chapter</a></li>
+<li class="chapter" data-level="G" data-path="acknowledgements.html"><a href="acknowledgements.html"><i class="fa fa-check"></i><b>G</b> Acknowledgements</a></li>
+<li class="chapter" data-level="H" data-path="references.html"><a href="references.html"><i class="fa fa-check"></i><b>H</b> References</a></li>
+<li class="divider"></li>
+<li><a href="https://github.com/rstudio/bookdown" target="blank">Published with bookdown</a></li>
+
+</ul>
+
+      </nav>
+    </div>
+
+    <div class="book-body">
+      <div class="body-inner">
+        <div class="book-header" role="navigation">
+          <h1>
+            <i class="fa fa-circle-o-notch fa-spin"></i><a href="./">Collaborative Data Science Practices</a>
+          </h1>
+        </div>
+
+        <div class="page-wrapper" tabindex="-1" role="main">
+          <div class="page-inner">
+
+            <section class="normal" id="section-">
+<div id="redcap-developer" class="section level1" number="22">
+<h1><span class="header-section-number">Chapter 22</span> Material for REDCap Developers</h1>
+
+</div>
+            </section>
+
+          </div>
+        </div>
+      </div>
+<a href="redcap-user.html" class="navigation navigation-prev " aria-label="Previous page"><i class="fa fa-angle-left"></i></a>
+<a href="redcap-admin.html" class="navigation navigation-next " aria-label="Next page"><i class="fa fa-angle-right"></i></a>
+    </div>
+  </div>
+<script src="libs/gitbook-2.6.7/js/app.min.js"></script>
+<script src="libs/gitbook-2.6.7/js/lunr.js"></script>
+<script src="libs/gitbook-2.6.7/js/clipboard.min.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-search.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-sharing.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-fontsettings.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-bookdown.js"></script>
+<script src="libs/gitbook-2.6.7/js/jquery.highlight.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-clipboard.js"></script>
+<script>
+gitbook.require(["gitbook"], function(gitbook) {
+gitbook.start({
+"sharing": {
+"github": false,
+"facebook": true,
+"twitter": true,
+"linkedin": false,
+"weibo": false,
+"instapaper": false,
+"vk": false,
+"whatsapp": false,
+"all": ["facebook", "twitter", "linkedin", "weibo", "instapaper"]
+},
+"fontsettings": {
+"theme": "white",
+"family": "sans",
+"size": 2
+},
+"edit": {
+"link": "https://github.com/ouhscbbmc/data-science-practices-1/edit/master/ch-redcap-developer.md",
+"text": "Edit"
+},
+"history": {
+"link": null,
+"text": null
+},
+"view": {
+"link": null,
+"text": null
+},
+"download": ["data-science-practices-1.pdf", "data-science-practices-1.epub"],
+"toc": {
+"collapse": "subsection"
+}
+});
+});
+</script>
+
+</body>
+
+</html>
diff --git a/docs/redcap-user.html b/docs/redcap-user.html
new file mode 100644
index 0000000..9a2a08f
--- /dev/null
+++ b/docs/redcap-user.html
@@ -0,0 +1,628 @@
+<!DOCTYPE html>
+<html lang="" xml:lang="">
+<head>
+
+  <meta charset="utf-8" />
+  <meta http-equiv="X-UA-Compatible" content="IE=edge" />
+  <title>Chapter 21 Material for REDCap Users | Collaborative Data Science Practices</title>
+  <meta name="description" content="Collection of publicly available practices of data science and analysis." />
+  <meta name="generator" content="bookdown 0.22 and GitBook 2.6.7" />
+
+  <meta property="og:title" content="Chapter 21 Material for REDCap Users | Collaborative Data Science Practices" />
+  <meta property="og:type" content="book" />
+  
+  
+  <meta property="og:description" content="Collection of publicly available practices of data science and analysis." />
+  
+
+  <meta name="twitter:card" content="summary" />
+  <meta name="twitter:title" content="Chapter 21 Material for REDCap Users | Collaborative Data Science Practices" />
+  
+  <meta name="twitter:description" content="Collection of publicly available practices of data science and analysis." />
+  
+
+<meta name="author" content="Will Beasley" />
+
+
+<meta name="date" content="2021-06-08" />
+
+  <meta name="viewport" content="width=device-width, initial-scale=1" />
+  <meta name="apple-mobile-web-app-capable" content="yes" />
+  <meta name="apple-mobile-web-app-status-bar-style" content="black" />
+  
+  
+<link rel="prev" href="team.html"/>
+<link rel="next" href="redcap-developer.html"/>
+<script src="libs/header-attrs-2.8/header-attrs.js"></script>
+<script src="libs/jquery-2.2.3/jquery.min.js"></script>
+<link href="libs/gitbook-2.6.7/css/style.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-table.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-bookdown.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-highlight.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-search.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-fontsettings.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-clipboard.css" rel="stylesheet" />
+
+
+
+
+
+
+
+
+
+<link href="libs/anchor-sections-1.0.1/anchor-sections.css" rel="stylesheet" />
+<script src="libs/anchor-sections-1.0.1/anchor-sections.js"></script>
+
+
+<style type="text/css">
+pre > code.sourceCode { white-space: pre; position: relative; }
+pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
+pre > code.sourceCode > span:empty { height: 1.2em; }
+.sourceCode { overflow: visible; }
+code.sourceCode > span { color: inherit; text-decoration: inherit; }
+pre.sourceCode { margin: 0; }
+@media screen {
+div.sourceCode { overflow: auto; }
+}
+@media print {
+pre > code.sourceCode { white-space: pre-wrap; }
+pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
+}
+pre.numberSource code
+  { counter-reset: source-line 0; }
+pre.numberSource code > span
+  { position: relative; left: -4em; counter-increment: source-line; }
+pre.numberSource code > span > a:first-child::before
+  { content: counter(source-line);
+    position: relative; left: -1em; text-align: right; vertical-align: baseline;
+    border: none; display: inline-block;
+    -webkit-touch-callout: none; -webkit-user-select: none;
+    -khtml-user-select: none; -moz-user-select: none;
+    -ms-user-select: none; user-select: none;
+    padding: 0 4px; width: 4em;
+    color: #aaaaaa;
+  }
+pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa;  padding-left: 4px; }
+div.sourceCode
+  {   }
+@media screen {
+pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
+}
+code span.al { color: #ff0000; font-weight: bold; } /* Alert */
+code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
+code span.at { color: #7d9029; } /* Attribute */
+code span.bn { color: #40a070; } /* BaseN */
+code span.bu { } /* BuiltIn */
+code span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
+code span.ch { color: #4070a0; } /* Char */
+code span.cn { color: #880000; } /* Constant */
+code span.co { color: #60a0b0; font-style: italic; } /* Comment */
+code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
+code span.do { color: #ba2121; font-style: italic; } /* Documentation */
+code span.dt { color: #902000; } /* DataType */
+code span.dv { color: #40a070; } /* DecVal */
+code span.er { color: #ff0000; font-weight: bold; } /* Error */
+code span.ex { } /* Extension */
+code span.fl { color: #40a070; } /* Float */
+code span.fu { color: #06287e; } /* Function */
+code span.im { } /* Import */
+code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
+code span.kw { color: #007020; font-weight: bold; } /* Keyword */
+code span.op { color: #666666; } /* Operator */
+code span.ot { color: #007020; } /* Other */
+code span.pp { color: #bc7a00; } /* Preprocessor */
+code span.sc { color: #4070a0; } /* SpecialChar */
+code span.ss { color: #bb6688; } /* SpecialString */
+code span.st { color: #4070a0; } /* String */
+code span.va { color: #19177c; } /* Variable */
+code span.vs { color: #4070a0; } /* VerbatimString */
+code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
+</style>
+
+<style type="text/css">
+/* Used with Pandoc 2.11+ new --citeproc when CSL is used */
+div.csl-bib-body { }
+div.csl-entry {
+  clear: both;
+}
+.hanging div.csl-entry {
+  margin-left:2em;
+  text-indent:-2em;
+}
+div.csl-left-margin {
+  min-width:2em;
+  float:left;
+}
+div.csl-right-inline {
+  margin-left:2em;
+  padding-left:1em;
+}
+div.csl-indent {
+  margin-left: 2em;
+}
+</style>
+
+<link rel="stylesheet" href="style.css" type="text/css" />
+</head>
+
+<body>
+
+
+
+  <div class="book without-animation with-summary font-size-2 font-family-1" data-basepath=".">
+
+    <div class="book-summary">
+      <nav role="navigation">
+
+<ul class="summary">
+<li><a href="./">Collaborative Data Science</a></li>
+
+<li class="divider"></li>
+<li class="chapter" data-level="1" data-path="index.html"><a href="index.html"><i class="fa fa-check"></i><b>1</b> Introduction</a></li>
+<li class="chapter" data-level="2" data-path="coding.html"><a href="coding.html"><i class="fa fa-check"></i><b>2</b> Coding Principles</a>
+<ul>
+<li class="chapter" data-level="2.1" data-path="coding.html"><a href="coding.html#coding-simplify"><i class="fa fa-check"></i><b>2.1</b> Simplify</a>
+<ul>
+<li class="chapter" data-level="2.1.1" data-path="coding.html"><a href="coding.html#coding-simplify-types"><i class="fa fa-check"></i><b>2.1.1</b> Data Types</a></li>
+<li class="chapter" data-level="2.1.2" data-path="coding.html"><a href="coding.html#coding-simplify-categorical"><i class="fa fa-check"></i><b>2.1.2</b> Categorical Levels</a></li>
+<li class="chapter" data-level="2.1.3" data-path="coding.html"><a href="coding.html#coding-simplify-recoding"><i class="fa fa-check"></i><b>2.1.3</b> Recoding</a></li>
+</ul></li>
+<li class="chapter" data-level="2.2" data-path="coding.html"><a href="coding.html#coding-defensive"><i class="fa fa-check"></i><b>2.2</b> Defensive Style</a>
+<ul>
+<li class="chapter" data-level="2.2.1" data-path="coding.html"><a href="coding.html#coding-defensive-qualify-functions"><i class="fa fa-check"></i><b>2.2.1</b> Qualify functions</a></li>
+<li class="chapter" data-level="2.2.2" data-path="coding.html"><a href="coding.html#coding-defensive-date-arithmetic"><i class="fa fa-check"></i><b>2.2.2</b> Date Arithmetic</a></li>
+<li class="chapter" data-level="2.2.3" data-path="coding.html"><a href="coding.html#excluding-bad-cases"><i class="fa fa-check"></i><b>2.2.3</b> Excluding Bad Cases</a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="3" data-path="architecture.html"><a href="architecture.html"><i class="fa fa-check"></i><b>3</b> Architecture Principles</a>
+<ul>
+<li class="chapter" data-level="3.1" data-path="architecture.html"><a href="architecture.html#encapsulation"><i class="fa fa-check"></i><b>3.1</b> Encapsulation</a></li>
+<li class="chapter" data-level="3.2" data-path="architecture.html"><a href="architecture.html#leverage-team-members-strengths-avoid-weaknesses"><i class="fa fa-check"></i><b>3.2</b> Leverage team member’s strengths &amp; avoid weaknesses</a>
+<ul>
+<li class="chapter" data-level="3.2.1" data-path="architecture.html"><a href="architecture.html#focused-code-files"><i class="fa fa-check"></i><b>3.2.1</b> Focused code files</a></li>
+<li class="chapter" data-level="3.2.2" data-path="architecture.html"><a href="architecture.html#metadata-for-content-experts"><i class="fa fa-check"></i><b>3.2.2</b> Metadata for content experts</a></li>
+</ul></li>
+<li class="chapter" data-level="3.3" data-path="architecture.html"><a href="architecture.html#scales"><i class="fa fa-check"></i><b>3.3</b> Scales</a>
+<ul>
+<li class="chapter" data-level="3.3.1" data-path="architecture.html"><a href="architecture.html#single-source-single-analysis"><i class="fa fa-check"></i><b>3.3.1</b> Single source &amp; single analysis</a></li>
+<li class="chapter" data-level="3.3.2" data-path="architecture.html"><a href="architecture.html#multiple-sources-multiple-analyses"><i class="fa fa-check"></i><b>3.3.2</b> Multiple sources &amp; multiple analyses</a></li>
+</ul></li>
+<li class="chapter" data-level="3.4" data-path="architecture.html"><a href="architecture.html#architecture-consistency"><i class="fa fa-check"></i><b>3.4</b> Consistency</a>
+<ul>
+<li class="chapter" data-level="3.4.1" data-path="architecture.html"><a href="architecture.html#consistency-files"><i class="fa fa-check"></i><b>3.4.1</b> Across Files</a></li>
+<li class="chapter" data-level="3.4.2" data-path="architecture.html"><a href="architecture.html#across-languages"><i class="fa fa-check"></i><b>3.4.2</b> Across Languages</a></li>
+<li class="chapter" data-level="3.4.3" data-path="architecture.html"><a href="architecture.html#across-projects"><i class="fa fa-check"></i><b>3.4.3</b> Across Projects</a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="4" data-path="file-prototype-r.html"><a href="file-prototype-r.html"><i class="fa fa-check"></i><b>4</b> Prototypical R File</a>
+<ul>
+<li class="chapter" data-level="4.1" data-path="file-prototype-r.html"><a href="file-prototype-r.html#chunk-clear"><i class="fa fa-check"></i><b>4.1</b> Clear Memory</a></li>
+<li class="chapter" data-level="4.2" data-path="file-prototype-r.html"><a href="file-prototype-r.html#chunk-load-sources"><i class="fa fa-check"></i><b>4.2</b> Load Sources</a></li>
+<li class="chapter" data-level="4.3" data-path="file-prototype-r.html"><a href="file-prototype-r.html#chunk-load-packages"><i class="fa fa-check"></i><b>4.3</b> Load Packages</a></li>
+<li class="chapter" data-level="4.4" data-path="file-prototype-r.html"><a href="file-prototype-r.html#chunk-declare"><i class="fa fa-check"></i><b>4.4</b> Declare Globals</a></li>
+<li class="chapter" data-level="4.5" data-path="file-prototype-r.html"><a href="file-prototype-r.html#chunk-load-data"><i class="fa fa-check"></i><b>4.5</b> Load Data</a></li>
+<li class="chapter" data-level="4.6" data-path="file-prototype-r.html"><a href="file-prototype-r.html#chunk-tweak-data"><i class="fa fa-check"></i><b>4.6</b> Tweak Data</a></li>
+<li class="chapter" data-level="4.7" data-path="file-prototype-r.html"><a href="file-prototype-r.html#chunk-unique"><i class="fa fa-check"></i><b>4.7</b> (Unique Content)</a></li>
+<li class="chapter" data-level="4.8" data-path="file-prototype-r.html"><a href="file-prototype-r.html#chunk-verify-values"><i class="fa fa-check"></i><b>4.8</b> Verify Values</a></li>
+<li class="chapter" data-level="4.9" data-path="file-prototype-r.html"><a href="file-prototype-r.html#chunk-specify-columns"><i class="fa fa-check"></i><b>4.9</b> Specify Output Columns</a></li>
+<li class="chapter" data-level="4.10" data-path="file-prototype-r.html"><a href="file-prototype-r.html#save-to-disk-or-database"><i class="fa fa-check"></i><b>4.10</b> Save to Disk or Database</a></li>
+<li class="chapter" data-level="4.11" data-path="file-prototype-r.html"><a href="file-prototype-r.html#additional-resources"><i class="fa fa-check"></i><b>4.11</b> Additional Resources</a></li>
+</ul></li>
+<li class="chapter" data-level="5" data-path="file-prototype-sql.html"><a href="file-prototype-sql.html"><i class="fa fa-check"></i><b>5</b> Prototypical SQL File</a>
+<ul>
+<li class="chapter" data-level="5.1" data-path="file-prototype-sql.html"><a href="file-prototype-sql.html#sql-choice"><i class="fa fa-check"></i><b>5.1</b> Choice of Database Engine</a></li>
+<li class="chapter" data-level="5.2" data-path="file-prototype-sql.html"><a href="file-prototype-sql.html#sql-ferry"><i class="fa fa-check"></i><b>5.2</b> Ferry</a></li>
+<li class="chapter" data-level="5.3" data-path="file-prototype-sql.html"><a href="file-prototype-sql.html#sql-default-database"><i class="fa fa-check"></i><b>5.3</b> Default Databases</a></li>
+<li class="chapter" data-level="5.4" data-path="file-prototype-sql.html"><a href="file-prototype-sql.html#sql-declare"><i class="fa fa-check"></i><b>5.4</b> Declare Values Databases</a></li>
+<li class="chapter" data-level="5.5" data-path="file-prototype-sql.html"><a href="file-prototype-sql.html#sql-recreate"><i class="fa fa-check"></i><b>5.5</b> Recreate Table</a></li>
+<li class="chapter" data-level="5.6" data-path="file-prototype-sql.html"><a href="file-prototype-sql.html#sql-truncate"><i class="fa fa-check"></i><b>5.6</b> Truncate Table</a></li>
+<li class="chapter" data-level="5.7" data-path="file-prototype-sql.html"><a href="file-prototype-sql.html#sql-insert"><i class="fa fa-check"></i><b>5.7</b> INSERT INTO</a></li>
+<li class="chapter" data-level="5.8" data-path="file-prototype-sql.html"><a href="file-prototype-sql.html#sql-select"><i class="fa fa-check"></i><b>5.8</b> SELECT</a></li>
+<li class="chapter" data-level="5.9" data-path="file-prototype-sql.html"><a href="file-prototype-sql.html#sql-from"><i class="fa fa-check"></i><b>5.9</b> FROM</a></li>
+<li class="chapter" data-level="5.10" data-path="file-prototype-sql.html"><a href="file-prototype-sql.html#sql-where"><i class="fa fa-check"></i><b>5.10</b> WHERE</a></li>
+<li class="chapter" data-level="5.11" data-path="file-prototype-sql.html"><a href="file-prototype-sql.html#sql-order-by"><i class="fa fa-check"></i><b>5.11</b> ORDER BY</a></li>
+<li class="chapter" data-level="5.12" data-path="file-prototype-sql.html"><a href="file-prototype-sql.html#sql-indexing"><i class="fa fa-check"></i><b>5.12</b> Indexing</a></li>
+</ul></li>
+<li class="chapter" data-level="6" data-path="repo-prototype.html"><a href="repo-prototype.html"><i class="fa fa-check"></i><b>6</b> Prototypical Repository</a>
+<ul>
+<li class="chapter" data-level="6.1" data-path="repo-prototype.html"><a href="repo-prototype.html#repo-root"><i class="fa fa-check"></i><b>6.1</b> Root</a>
+<ul>
+<li class="chapter" data-level="6.1.1" data-path="repo-prototype.html"><a href="repo-prototype.html#repo-config"><i class="fa fa-check"></i><b>6.1.1</b> <code>config.R</code></a></li>
+<li class="chapter" data-level="6.1.2" data-path="repo-prototype.html"><a href="repo-prototype.html#repo-flow"><i class="fa fa-check"></i><b>6.1.2</b> <code>flow.R</code></a></li>
+<li class="chapter" data-level="6.1.3" data-path="repo-prototype.html"><a href="repo-prototype.html#repo-readme"><i class="fa fa-check"></i><b>6.1.3</b> <code>README.md</code></a></li>
+<li class="chapter" data-level="6.1.4" data-path="repo-prototype.html"><a href="repo-prototype.html#repo-rproj"><i class="fa fa-check"></i><b>6.1.4</b> <code>*.Rproj</code></a></li>
+</ul></li>
+<li class="chapter" data-level="6.2" data-path="repo-prototype.html"><a href="repo-prototype.html#repo-analysis"><i class="fa fa-check"></i><b>6.2</b> Analysis</a></li>
+<li class="chapter" data-level="6.3" data-path="repo-prototype.html"><a href="repo-prototype.html#repo-data-public"><i class="fa fa-check"></i><b>6.3</b> Data Public</a></li>
+<li class="chapter" data-level="6.4" data-path="repo-prototype.html"><a href="repo-prototype.html#repo-data-unshared"><i class="fa fa-check"></i><b>6.4</b> Data Unshared</a></li>
+<li class="chapter" data-level="6.5" data-path="repo-prototype.html"><a href="repo-prototype.html#repo-documentation"><i class="fa fa-check"></i><b>6.5</b> Documentation</a></li>
+<li class="chapter" data-level="6.6" data-path="repo-prototype.html"><a href="repo-prototype.html#repo-manipulation"><i class="fa fa-check"></i><b>6.6</b> Manipulation</a></li>
+<li class="chapter" data-level="6.7" data-path="repo-prototype.html"><a href="repo-prototype.html#repo-stitched"><i class="fa fa-check"></i><b>6.7</b> Stitched Output</a></li>
+<li class="chapter" data-level="6.8" data-path="repo-prototype.html"><a href="repo-prototype.html#repo-utility"><i class="fa fa-check"></i><b>6.8</b> Utility</a></li>
+</ul></li>
+<li class="chapter" data-level="7" data-path="rest.html"><a href="rest.html"><i class="fa fa-check"></i><b>7</b> Data at Rest</a>
+<ul>
+<li class="chapter" data-level="7.1" data-path="rest.html"><a href="rest.html#data-states"><i class="fa fa-check"></i><b>7.1</b> Data States</a></li>
+<li class="chapter" data-level="7.2" data-path="rest.html"><a href="rest.html#data-containers"><i class="fa fa-check"></i><b>7.2</b> Data Containers</a>
+<ul>
+<li class="chapter" data-level="7.2.1" data-path="rest.html"><a href="rest.html#data-containers-csv"><i class="fa fa-check"></i><b>7.2.1</b> csv</a></li>
+<li class="chapter" data-level="7.2.2" data-path="rest.html"><a href="rest.html#data-containers-rds"><i class="fa fa-check"></i><b>7.2.2</b> rds</a></li>
+<li class="chapter" data-level="7.2.3" data-path="rest.html"><a href="rest.html#data-containers-yaml"><i class="fa fa-check"></i><b>7.2.3</b> yaml, json, and xml</a></li>
+<li class="chapter" data-level="7.2.4" data-path="rest.html"><a href="rest.html#data-containers-arrow"><i class="fa fa-check"></i><b>7.2.4</b> Arrow</a></li>
+<li class="chapter" data-level="7.2.5" data-path="rest.html"><a href="rest.html#data-containers-sqlite"><i class="fa fa-check"></i><b>7.2.5</b> SQLite</a></li>
+<li class="chapter" data-level="7.2.6" data-path="rest.html"><a href="rest.html#data-containers-database"><i class="fa fa-check"></i><b>7.2.6</b> Central Enterprise database</a></li>
+<li class="chapter" data-level="7.2.7" data-path="rest.html"><a href="rest.html#data-containers-redcap"><i class="fa fa-check"></i><b>7.2.7</b> Central REDCap database</a></li>
+<li class="chapter" data-level="7.2.8" data-path="rest.html"><a href="rest.html#data-containers-avoid"><i class="fa fa-check"></i><b>7.2.8</b> Containers to avoid</a></li>
+</ul></li>
+<li class="chapter" data-level="7.3" data-path="rest.html"><a href="rest.html#data-conventions"><i class="fa fa-check"></i><b>7.3</b> Storage Conventions</a>
+<ul>
+<li class="chapter" data-level="7.3.1" data-path="rest.html"><a href="rest.html#data-conventions-all"><i class="fa fa-check"></i><b>7.3.1</b> All Sources</a></li>
+<li class="chapter" data-level="7.3.2" data-path="rest.html"><a href="rest.html#data-conventions-text"><i class="fa fa-check"></i><b>7.3.2</b> Text</a></li>
+<li class="chapter" data-level="7.3.3" data-path="rest.html"><a href="rest.html#data-conventions-excel"><i class="fa fa-check"></i><b>7.3.3</b> Excel</a></li>
+<li class="chapter" data-level="7.3.4" data-path="rest.html"><a href="rest.html#data-conventions-meditech"><i class="fa fa-check"></i><b>7.3.4</b> Meditech</a></li>
+<li class="chapter" data-level="7.3.5" data-path="rest.html"><a href="rest.html#data-conventions-database"><i class="fa fa-check"></i><b>7.3.5</b> Databases</a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="8" data-path="patterns.html"><a href="patterns.html"><i class="fa fa-check"></i><b>8</b> Patterns</a>
+<ul>
+<li class="chapter" data-level="8.1" data-path="patterns.html"><a href="patterns.html#pattern-ellis"><i class="fa fa-check"></i><b>8.1</b> Ellis</a>
+<ul>
+<li class="chapter" data-level="8.1.1" data-path="patterns.html"><a href="patterns.html#purpose"><i class="fa fa-check"></i><b>8.1.1</b> Purpose</a></li>
+<li class="chapter" data-level="8.1.2" data-path="patterns.html"><a href="patterns.html#philosophy"><i class="fa fa-check"></i><b>8.1.2</b> Philosophy</a></li>
+<li class="chapter" data-level="8.1.3" data-path="patterns.html"><a href="patterns.html#guidelines"><i class="fa fa-check"></i><b>8.1.3</b> Guidelines</a></li>
+<li class="chapter" data-level="8.1.4" data-path="patterns.html"><a href="patterns.html#examples"><i class="fa fa-check"></i><b>8.1.4</b> Examples</a></li>
+<li class="chapter" data-level="8.1.5" data-path="patterns.html"><a href="patterns.html#elements"><i class="fa fa-check"></i><b>8.1.5</b> Elements</a></li>
+</ul></li>
+<li class="chapter" data-level="8.2" data-path="patterns.html"><a href="patterns.html#pattern-arch"><i class="fa fa-check"></i><b>8.2</b> Arch</a></li>
+<li class="chapter" data-level="8.3" data-path="patterns.html"><a href="patterns.html#pattern-ferry"><i class="fa fa-check"></i><b>8.3</b> Ferry</a></li>
+<li class="chapter" data-level="8.4" data-path="patterns.html"><a href="patterns.html#pattern-scribe"><i class="fa fa-check"></i><b>8.4</b> Scribe</a></li>
+<li class="chapter" data-level="8.5" data-path="patterns.html"><a href="patterns.html#pattern-analysis"><i class="fa fa-check"></i><b>8.5</b> Analysis</a></li>
+<li class="chapter" data-level="8.6" data-path="patterns.html"><a href="patterns.html#pattern-presentation-static"><i class="fa fa-check"></i><b>8.6</b> Presentation -Static</a></li>
+<li class="chapter" data-level="8.7" data-path="patterns.html"><a href="patterns.html#pattern-presentation-interactive"><i class="fa fa-check"></i><b>8.7</b> Presentation -Interactive</a></li>
+<li class="chapter" data-level="8.8" data-path="patterns.html"><a href="patterns.html#pattern-metadata"><i class="fa fa-check"></i><b>8.8</b> Metadata</a>
+<ul>
+<li class="chapter" data-level="8.8.1" data-path="patterns.html"><a href="patterns.html#primary-rules-for-mapping"><i class="fa fa-check"></i><b>8.8.1</b> Primary Rules for Mapping</a></li>
+<li class="chapter" data-level="8.8.2" data-path="patterns.html"><a href="patterns.html#secondary-rules-for-mapping"><i class="fa fa-check"></i><b>8.8.2</b> Secondary Rules for Mapping</a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="9" data-path="security.html"><a href="security.html"><i class="fa fa-check"></i><b>9</b> Security &amp; Private Data</a>
+<ul>
+<li class="chapter" data-level="9.1" data-path="security.html"><a href="security.html#security-guidelines"><i class="fa fa-check"></i><b>9.1</b> Security Guidelines</a></li>
+<li class="chapter" data-level="9.2" data-path="security.html"><a href="security.html#dataset-level-redaction"><i class="fa fa-check"></i><b>9.2</b> Dataset-level Redaction</a></li>
+<li class="chapter" data-level="9.3" data-path="security.html"><a href="security.html#security-for-data-at-rest"><i class="fa fa-check"></i><b>9.3</b> Security for Data at Rest</a></li>
+<li class="chapter" data-level="9.4" data-path="security.html"><a href="security.html#file-level-permissions"><i class="fa fa-check"></i><b>9.4</b> File-level permissions</a></li>
+<li class="chapter" data-level="9.5" data-path="security.html"><a href="security.html#database-permissions"><i class="fa fa-check"></i><b>9.5</b> Database permissions</a></li>
+<li class="chapter" data-level="9.6" data-path="security.html"><a href="security.html#public-private-repositories"><i class="fa fa-check"></i><b>9.6</b> Public &amp; Private Repositories</a>
+<ul>
+<li class="chapter" data-level="9.6.1" data-path="security.html"><a href="security.html#repo-rules"><i class="fa fa-check"></i><b>9.6.1</b> Repo Rules</a></li>
+<li class="chapter" data-level="9.6.2" data-path="security.html"><a href="security.html#scrubbing-github-history"><i class="fa fa-check"></i><b>9.6.2</b> Scrubbing GitHub history</a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="10" data-path="automation.html"><a href="automation.html"><i class="fa fa-check"></i><b>10</b> Automation &amp; Reproducibility</a>
+<ul>
+<li class="chapter" data-level="10.1" data-path="automation.html"><a href="automation.html#automation-mediator"><i class="fa fa-check"></i><b>10.1</b> Mediator</a>
+<ul>
+<li class="chapter" data-level="10.1.1" data-path="automation.html"><a href="automation.html#automation-flow"><i class="fa fa-check"></i><b>10.1.1</b> Flow File in R</a></li>
+<li class="chapter" data-level="10.1.2" data-path="automation.html"><a href="automation.html#automation-makefile"><i class="fa fa-check"></i><b>10.1.2</b> Makefile</a></li>
+<li class="chapter" data-level="10.1.3" data-path="automation.html"><a href="automation.html#automation-ssis"><i class="fa fa-check"></i><b>10.1.3</b> SSIS</a></li>
+</ul></li>
+<li class="chapter" data-level="10.2" data-path="automation.html"><a href="automation.html#automation-scheduling"><i class="fa fa-check"></i><b>10.2</b> Scheduling</a>
+<ul>
+<li class="chapter" data-level="10.2.1" data-path="automation.html"><a href="automation.html#automation-cron"><i class="fa fa-check"></i><b>10.2.1</b> cron</a></li>
+<li class="chapter" data-level="10.2.2" data-path="automation.html"><a href="automation.html#automation-task-scheduler"><i class="fa fa-check"></i><b>10.2.2</b> Task Scheduler</a></li>
+<li class="chapter" data-level="10.2.3" data-path="automation.html"><a href="automation.html#automation-sql-server-agent"><i class="fa fa-check"></i><b>10.2.3</b> SQL Server Agent</a></li>
+</ul></li>
+<li class="chapter" data-level="10.3" data-path="automation.html"><a href="automation.html#auxiliary-issues"><i class="fa fa-check"></i><b>10.3</b> Auxiliary Issues</a>
+<ul>
+<li class="chapter" data-level="10.3.1" data-path="automation.html"><a href="automation.html#sink-log-files"><i class="fa fa-check"></i><b>10.3.1</b> Sink Log Files</a></li>
+<li class="chapter" data-level="10.3.2" data-path="automation.html"><a href="automation.html#package-versions"><i class="fa fa-check"></i><b>10.3.2</b> Package Versions</a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="11" data-path="scaling-up.html"><a href="scaling-up.html"><i class="fa fa-check"></i><b>11</b> Scaling Up</a>
+<ul>
+<li class="chapter" data-level="11.1" data-path="scaling-up.html"><a href="scaling-up.html#data-storage"><i class="fa fa-check"></i><b>11.1</b> Data Storage</a></li>
+<li class="chapter" data-level="11.2" data-path="scaling-up.html"><a href="scaling-up.html#data-processing"><i class="fa fa-check"></i><b>11.2</b> Data Processing</a></li>
+</ul></li>
+<li class="chapter" data-level="12" data-path="collaboration.html"><a href="collaboration.html"><i class="fa fa-check"></i><b>12</b> Parallel Collaboration</a>
+<ul>
+<li class="chapter" data-level="12.1" data-path="collaboration.html"><a href="collaboration.html#social-contract"><i class="fa fa-check"></i><b>12.1</b> Social Contract</a></li>
+<li class="chapter" data-level="12.2" data-path="collaboration.html"><a href="collaboration.html#code-reviews"><i class="fa fa-check"></i><b>12.2</b> Code Reviews</a></li>
+<li class="chapter" data-level="12.3" data-path="collaboration.html"><a href="collaboration.html#remote"><i class="fa fa-check"></i><b>12.3</b> Remote</a></li>
+<li class="chapter" data-level="12.4" data-path="collaboration.html"><a href="collaboration.html#additional-resources-1"><i class="fa fa-check"></i><b>12.4</b> Additional Resources</a></li>
+<li class="chapter" data-level="12.5" data-path="collaboration.html"><a href="collaboration.html#loose-notes"><i class="fa fa-check"></i><b>12.5</b> Loose Notes</a>
+<ul>
+<li class="chapter" data-level="12.5.1" data-path="collaboration.html"><a href="collaboration.html#github"><i class="fa fa-check"></i><b>12.5.1</b> GitHub</a></li>
+<li class="chapter" data-level="12.5.2" data-path="collaboration.html"><a href="collaboration.html#common-code"><i class="fa fa-check"></i><b>12.5.2</b> Common Code</a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="13" data-path="document.html"><a href="document.html"><i class="fa fa-check"></i><b>13</b> Documentation</a>
+<ul>
+<li class="chapter" data-level="13.1" data-path="document.html"><a href="document.html#team-wide"><i class="fa fa-check"></i><b>13.1</b> Team-wide</a></li>
+<li class="chapter" data-level="13.2" data-path="document.html"><a href="document.html#project-specific"><i class="fa fa-check"></i><b>13.2</b> Project-specific</a></li>
+<li class="chapter" data-level="13.3" data-path="document.html"><a href="document.html#dataset-origin-structure"><i class="fa fa-check"></i><b>13.3</b> Dataset Origin &amp; Structure</a></li>
+<li class="chapter" data-level="13.4" data-path="document.html"><a href="document.html#issues-tasks"><i class="fa fa-check"></i><b>13.4</b> Issues &amp; Tasks</a>
+<ul>
+<li class="chapter" data-level="13.4.1" data-path="document.html"><a href="document.html#documentation-issue-template"><i class="fa fa-check"></i><b>13.4.1</b> GitHub Issue Template</a></li>
+</ul></li>
+<li class="chapter" data-level="13.5" data-path="document.html"><a href="document.html#flow-diagrams"><i class="fa fa-check"></i><b>13.5</b> Flow Diagrams</a></li>
+<li class="chapter" data-level="13.6" data-path="document.html"><a href="document.html#setting-up-new-machine"><i class="fa fa-check"></i><b>13.6</b> Setting up new machine</a></li>
+</ul></li>
+<li class="chapter" data-level="14" data-path="style.html"><a href="style.html"><i class="fa fa-check"></i><b>14</b> Style Guide</a>
+<ul>
+<li class="chapter" data-level="14.1" data-path="style.html"><a href="style.html#readability"><i class="fa fa-check"></i><b>14.1</b> Readability</a>
+<ul>
+<li class="chapter" data-level="14.1.1" data-path="style.html"><a href="style.html#style-number"><i class="fa fa-check"></i><b>14.1.1</b> Number</a></li>
+<li class="chapter" data-level="14.1.2" data-path="style.html"><a href="style.html#style-abbreviation"><i class="fa fa-check"></i><b>14.1.2</b> Abbreviations</a></li>
+</ul></li>
+<li class="chapter" data-level="14.2" data-path="style.html"><a href="style.html#datasets"><i class="fa fa-check"></i><b>14.2</b> Datasets</a>
+<ul>
+<li class="chapter" data-level="14.2.1" data-path="style.html"><a href="style.html#style-filter"><i class="fa fa-check"></i><b>14.2.1</b> Filtering Rows</a></li>
+<li class="chapter" data-level="14.2.2" data-path="style.html"><a href="style.html#style-attach"><i class="fa fa-check"></i><b>14.2.2</b> Don’t attach</a></li>
+</ul></li>
+<li class="chapter" data-level="14.3" data-path="style.html"><a href="style.html#style-factor"><i class="fa fa-check"></i><b>14.3</b> Categorical Variables</a>
+<ul>
+<li class="chapter" data-level="14.3.1" data-path="style.html"><a href="style.html#style-factor-unknown"><i class="fa fa-check"></i><b>14.3.1</b> Explicit Missing Values</a></li>
+<li class="chapter" data-level="14.3.2" data-path="style.html"><a href="style.html#style-factor-granularity"><i class="fa fa-check"></i><b>14.3.2</b> Granularity</a></li>
+</ul></li>
+<li class="chapter" data-level="14.4" data-path="style.html"><a href="style.html#style-dates"><i class="fa fa-check"></i><b>14.4</b> Dates</a></li>
+<li class="chapter" data-level="14.5" data-path="style.html"><a href="style.html#naming"><i class="fa fa-check"></i><b>14.5</b> Naming</a>
+<ul>
+<li class="chapter" data-level="14.5.1" data-path="style.html"><a href="style.html#style-naming-variables"><i class="fa fa-check"></i><b>14.5.1</b> Variables</a></li>
+<li class="chapter" data-level="14.5.2" data-path="style.html"><a href="style.html#style-naming-files"><i class="fa fa-check"></i><b>14.5.2</b> Files and Folders</a></li>
+<li class="chapter" data-level="14.5.3" data-path="style.html"><a href="style.html#style-naming-datasets"><i class="fa fa-check"></i><b>14.5.3</b> Datasets</a></li>
+<li class="chapter" data-level="14.5.4" data-path="style.html"><a href="style.html#style-naming-semantic"><i class="fa fa-check"></i><b>14.5.4</b> Semantic sorting</a></li>
+</ul></li>
+<li class="chapter" data-level="14.6" data-path="style.html"><a href="style.html#style-whitespace"><i class="fa fa-check"></i><b>14.6</b> Whitespace</a></li>
+<li class="chapter" data-level="14.7" data-path="style.html"><a href="style.html#style-database"><i class="fa fa-check"></i><b>14.7</b> Database</a></li>
+<li class="chapter" data-level="14.8" data-path="style.html"><a href="style.html#style-ggplot"><i class="fa fa-check"></i><b>14.8</b> ggplot2</a>
+<ul>
+<li class="chapter" data-level="14.8.1" data-path="style.html"><a href="style.html#style-ggplot-order"><i class="fa fa-check"></i><b>14.8.1</b> Order of commands</a></li>
+<li class="chapter" data-level="14.8.2" data-path="style.html"><a href="style.html#style-ggplot-gotchas"><i class="fa fa-check"></i><b>14.8.2</b> Gotchas</a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="15" data-path="publication.html"><a href="publication.html"><i class="fa fa-check"></i><b>15</b> Publishing Results</a>
+<ul>
+<li class="chapter" data-level="15.1" data-path="publication.html"><a href="publication.html#publication-analysts"><i class="fa fa-check"></i><b>15.1</b> To Other Analysts</a></li>
+<li class="chapter" data-level="15.2" data-path="publication.html"><a href="publication.html#publication-experts"><i class="fa fa-check"></i><b>15.2</b> To Researchers &amp; Content Experts</a></li>
+<li class="chapter" data-level="15.3" data-path="publication.html"><a href="publication.html#publication-phobic"><i class="fa fa-check"></i><b>15.3</b> To Technical-Phobic Audiences</a></li>
+</ul></li>
+<li class="chapter" data-level="16" data-path="testing-and-validation.html"><a href="testing-and-validation.html"><i class="fa fa-check"></i><b>16</b> Testing, Validation, &amp; Defensive Programming</a>
+<ul>
+<li class="chapter" data-level="16.1" data-path="testing-and-validation.html"><a href="testing-and-validation.html#testing-functions"><i class="fa fa-check"></i><b>16.1</b> Testing Functions</a></li>
+<li class="chapter" data-level="16.2" data-path="testing-and-validation.html"><a href="testing-and-validation.html#defensive-programming"><i class="fa fa-check"></i><b>16.2</b> Defensive Programming</a></li>
+<li class="chapter" data-level="16.3" data-path="testing-and-validation.html"><a href="testing-and-validation.html#validator"><i class="fa fa-check"></i><b>16.3</b> Validator</a></li>
+</ul></li>
+<li class="chapter" data-level="17" data-path="troubleshooting.html"><a href="troubleshooting.html"><i class="fa fa-check"></i><b>17</b> Troubleshooting and Debugging</a>
+<ul>
+<li class="chapter" data-level="17.1" data-path="troubleshooting.html"><a href="troubleshooting.html#finding-help"><i class="fa fa-check"></i><b>17.1</b> Finding Help</a></li>
+<li class="chapter" data-level="17.2" data-path="troubleshooting.html"><a href="troubleshooting.html#debugging"><i class="fa fa-check"></i><b>17.2</b> Debugging</a></li>
+</ul></li>
+<li class="chapter" data-level="18" data-path="workstation.html"><a href="workstation.html"><i class="fa fa-check"></i><b>18</b> Workstation</a>
+<ul>
+<li class="chapter" data-level="18.1" data-path="workstation.html"><a href="workstation.html#workstation-required"><i class="fa fa-check"></i><b>18.1</b> Required Installation</a>
+<ul>
+<li class="chapter" data-level="18.1.1" data-path="workstation.html"><a href="workstation.html#workstation-r"><i class="fa fa-check"></i><b>18.1.1</b> R</a></li>
+<li class="chapter" data-level="18.1.2" data-path="workstation.html"><a href="workstation.html#workstation-rstudio"><i class="fa fa-check"></i><b>18.1.2</b> RStudio</a></li>
+<li class="chapter" data-level="18.1.3" data-path="workstation.html"><a href="workstation.html#workstation-r-package-installation"><i class="fa fa-check"></i><b>18.1.3</b> Installing R Packages</a></li>
+<li class="chapter" data-level="18.1.4" data-path="workstation.html"><a href="workstation.html#workstation-r-package-update"><i class="fa fa-check"></i><b>18.1.4</b> Updating R Packages</a></li>
+<li class="chapter" data-level="18.1.5" data-path="workstation.html"><a href="workstation.html#workstation-github"><i class="fa fa-check"></i><b>18.1.5</b> GitHub</a></li>
+<li class="chapter" data-level="18.1.6" data-path="workstation.html"><a href="workstation.html#workstation-github-client"><i class="fa fa-check"></i><b>18.1.6</b> GitHub Desktop</a></li>
+<li class="chapter" data-level="18.1.7" data-path="workstation.html"><a href="workstation.html#workstation-rtools"><i class="fa fa-check"></i><b>18.1.7</b> R Tools</a></li>
+</ul></li>
+<li class="chapter" data-level="18.2" data-path="workstation.html"><a href="workstation.html#workstation-recommended"><i class="fa fa-check"></i><b>18.2</b> Recommended Installation</a>
+<ul>
+<li class="chapter" data-level="18.2.1" data-path="workstation.html"><a href="workstation.html#workstation-odbc"><i class="fa fa-check"></i><b>18.2.1</b> ODBC Driver</a></li>
+<li class="chapter" data-level="18.2.2" data-path="workstation.html"><a href="workstation.html#workstation-notepadpp"><i class="fa fa-check"></i><b>18.2.2</b> Notepad++</a></li>
+<li class="chapter" data-level="18.2.3" data-path="workstation.html"><a href="workstation.html#workstation-ads"><i class="fa fa-check"></i><b>18.2.3</b> Azure Data Studio</a></li>
+<li class="chapter" data-level="18.2.4" data-path="workstation.html"><a href="workstation.html#workstation-vscode"><i class="fa fa-check"></i><b>18.2.4</b> Visual Studio Code</a></li>
+</ul></li>
+<li class="chapter" data-level="18.3" data-path="workstation.html"><a href="workstation.html#workstation-optional"><i class="fa fa-check"></i><b>18.3</b> Optional Installation</a>
+<ul>
+<li class="chapter" data-level="18.3.1" data-path="workstation.html"><a href="workstation.html#workstation-git"><i class="fa fa-check"></i><b>18.3.1</b> Git</a></li>
+<li class="chapter" data-level="18.3.2" data-path="workstation.html"><a href="workstation.html#workstation-calc"><i class="fa fa-check"></i><b>18.3.2</b> LibreOffice Calc</a></li>
+<li class="chapter" data-level="18.3.3" data-path="workstation.html"><a href="workstation.html#workstation-pandoc"><i class="fa fa-check"></i><b>18.3.3</b> pandoc</a></li>
+<li class="chapter" data-level="18.3.4" data-path="workstation.html"><a href="workstation.html#workstation-python"><i class="fa fa-check"></i><b>18.3.4</b> Python</a></li>
+</ul></li>
+<li class="chapter" data-level="18.4" data-path="workstation.html"><a href="workstation.html#workstation-assets"><i class="fa fa-check"></i><b>18.4</b> Asset Locations</a></li>
+<li class="chapter" data-level="18.5" data-path="workstation.html"><a href="workstation.html#workstation-administrator"><i class="fa fa-check"></i><b>18.5</b> Administrator Installation</a>
+<ul>
+<li class="chapter" data-level="18.5.1" data-path="workstation.html"><a href="workstation.html#workstation-mysql"><i class="fa fa-check"></i><b>18.5.1</b> MySQL Workbench</a></li>
+<li class="chapter" data-level="18.5.2" data-path="workstation.html"><a href="workstation.html#workstation-postman"><i class="fa fa-check"></i><b>18.5.2</b> Postman</a></li>
+<li class="chapter" data-level="18.5.3" data-path="workstation.html"><a href="workstation.html#workstation-ssms"><i class="fa fa-check"></i><b>18.5.3</b> SQL Server Management Studio (SSMS)</a></li>
+<li class="chapter" data-level="18.5.4" data-path="workstation.html"><a href="workstation.html#workstation-winscp"><i class="fa fa-check"></i><b>18.5.4</b> WinSCP</a></li>
+</ul></li>
+<li class="chapter" data-level="18.6" data-path="workstation.html"><a href="workstation.html#workstation-troubleshooting"><i class="fa fa-check"></i><b>18.6</b> Installation Troubleshooting</a></li>
+<li class="chapter" data-level="18.7" data-path="workstation.html"><a href="workstation.html#workstation-ubuntu"><i class="fa fa-check"></i><b>18.7</b> Ubuntu Installation</a></li>
+<li class="chapter" data-level="18.8" data-path="workstation.html"><a href="workstation.html#workstation-retired"><i class="fa fa-check"></i><b>18.8</b> Retired Tools</a></li>
+</ul></li>
+<li class="chapter" data-level="19" data-path="tools.html"><a href="tools.html"><i class="fa fa-check"></i><b>19</b> Considerations when Selecting Tools</a>
+<ul>
+<li class="chapter" data-level="19.1" data-path="tools.html"><a href="tools.html#general"><i class="fa fa-check"></i><b>19.1</b> General</a>
+<ul>
+<li class="chapter" data-level="19.1.1" data-path="tools.html"><a href="tools.html#the-components-goal"><i class="fa fa-check"></i><b>19.1.1</b> The Component’s Goal</a></li>
+<li class="chapter" data-level="19.1.2" data-path="tools.html"><a href="tools.html#current-skillset-of-team"><i class="fa fa-check"></i><b>19.1.2</b> Current Skillset of Team</a></li>
+<li class="chapter" data-level="19.1.3" data-path="tools.html"><a href="tools.html#desired-future-skillset-of-team"><i class="fa fa-check"></i><b>19.1.3</b> Desired Future Skillset of Team</a></li>
+<li class="chapter" data-level="19.1.4" data-path="tools.html"><a href="tools.html#skillset-of-audience"><i class="fa fa-check"></i><b>19.1.4</b> Skillset of Audience</a></li>
+</ul></li>
+<li class="chapter" data-level="19.2" data-path="tools.html"><a href="tools.html#languages"><i class="fa fa-check"></i><b>19.2</b> Languages</a></li>
+<li class="chapter" data-level="19.3" data-path="tools.html"><a href="tools.html#r-packages"><i class="fa fa-check"></i><b>19.3</b> R Packages</a></li>
+<li class="chapter" data-level="19.4" data-path="tools.html"><a href="tools.html#database"><i class="fa fa-check"></i><b>19.4</b> Database</a></li>
+<li class="chapter" data-level="19.5" data-path="tools.html"><a href="tools.html#additional-resources-2"><i class="fa fa-check"></i><b>19.5</b> Additional Resources</a></li>
+</ul></li>
+<li class="chapter" data-level="20" data-path="team.html"><a href="team.html"><i class="fa fa-check"></i><b>20</b> Growing a Team</a>
+<ul>
+<li class="chapter" data-level="20.1" data-path="team.html"><a href="team.html#recruiting"><i class="fa fa-check"></i><b>20.1</b> Recruiting</a></li>
+<li class="chapter" data-level="20.2" data-path="team.html"><a href="team.html#training-to-data-science"><i class="fa fa-check"></i><b>20.2</b> Training to Data Science</a></li>
+<li class="chapter" data-level="20.3" data-path="team.html"><a href="team.html#bridges-outside-the-team"><i class="fa fa-check"></i><b>20.3</b> Bridges Outside the Team</a></li>
+</ul></li>
+<li class="chapter" data-level="21" data-path="redcap-user.html"><a href="redcap-user.html"><i class="fa fa-check"></i><b>21</b> Material for REDCap Users</a>
+<ul>
+<li class="chapter" data-level="21.1" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-login"><i class="fa fa-check"></i><b>21.1</b> Login</a></li>
+<li class="chapter" data-level="21.2" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-report-develop"><i class="fa fa-check"></i><b>21.2</b> Developing Reports</a></li>
+</ul></li>
+<li class="chapter" data-level="22" data-path="redcap-developer.html"><a href="redcap-developer.html"><i class="fa fa-check"></i><b>22</b> Material for REDCap Developers</a></li>
+<li class="chapter" data-level="23" data-path="redcap-admin.html"><a href="redcap-admin.html"><i class="fa fa-check"></i><b>23</b> Material for REDCap Admins</a></li>
+<li class="appendix"><span><b>Appendix</b></span></li>
+<li class="chapter" data-level="A" data-path="git.html"><a href="git.html"><i class="fa fa-check"></i><b>A</b> Git &amp; GitHub</a>
+<ul>
+<li class="chapter" data-level="A.1" data-path="git.html"><a href="git.html#git-code"><i class="fa fa-check"></i><b>A.1</b> for Code Development</a></li>
+<li class="chapter" data-level="A.2" data-path="git.html"><a href="git.html#git-collaboration"><i class="fa fa-check"></i><b>A.2</b> for Collaboration</a></li>
+<li class="chapter" data-level="A.3" data-path="git.html"><a href="git.html#git-stability"><i class="fa fa-check"></i><b>A.3</b> for Stability</a></li>
+<li class="chapter" data-level="A.4" data-path="git.html"><a href="git.html#git-collaborators"><i class="fa fa-check"></i><b>A.4</b> for New Collaborators</a></li>
+<li class="chapter" data-level="A.5" data-path="git.html"><a href="git.html#git-contribution"><i class="fa fa-check"></i><b>A.5</b> Steps for Contributing to Repo</a>
+<ul>
+<li class="chapter" data-level="A.5.1" data-path="git.html"><a href="git.html#git-contribution-regular"><i class="fa fa-check"></i><b>A.5.1</b> Regular Contributions</a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="B" data-path="snippets.html"><a href="snippets.html"><i class="fa fa-check"></i><b>B</b> Snippets</a>
+<ul>
+<li class="chapter" data-level="B.1" data-path="snippets.html"><a href="snippets.html#snippets-reading"><i class="fa fa-check"></i><b>B.1</b> Reading External Data</a>
+<ul>
+<li class="chapter" data-level="B.1.1" data-path="snippets.html"><a href="snippets.html#snippets-reading-excel"><i class="fa fa-check"></i><b>B.1.1</b> Reading from Excel</a></li>
+<li class="chapter" data-level="B.1.2" data-path="snippets.html"><a href="snippets.html#snippets-reading-trailing-comma"><i class="fa fa-check"></i><b>B.1.2</b> Removing Trailing Comma from Header</a></li>
+<li class="chapter" data-level="B.1.3" data-path="snippets.html"><a href="snippets.html#snippets-reading-vroom"><i class="fa fa-check"></i><b>B.1.3</b> Removing Trailing Comma from Header</a></li>
+</ul></li>
+<li class="chapter" data-level="B.2" data-path="snippets.html"><a href="snippets.html#snippets-grooming"><i class="fa fa-check"></i><b>B.2</b> Grooming</a>
+<ul>
+<li class="chapter" data-level="B.2.1" data-path="snippets.html"><a href="snippets.html#snippets-grooming-two-year"><i class="fa fa-check"></i><b>B.2.1</b> Correct for misinterpreted two-digit year</a></li>
+</ul></li>
+<li class="chapter" data-level="B.3" data-path="snippets.html"><a href="snippets.html#snippets-identification"><i class="fa fa-check"></i><b>B.3</b> Identification</a>
+<ul>
+<li class="chapter" data-level="B.3.1" data-path="snippets.html"><a href="snippets.html#snippets-identification-tags"><i class="fa fa-check"></i><b>B.3.1</b> Generating “tags”</a></li>
+</ul></li>
+<li class="chapter" data-level="B.4" data-path="snippets.html"><a href="snippets.html#snippets-correspondence"><i class="fa fa-check"></i><b>B.4</b> Correspondence with Collaborators</a>
+<ul>
+<li class="chapter" data-level="B.4.1" data-path="snippets.html"><a href="snippets.html#snippets-correspondence-excel"><i class="fa fa-check"></i><b>B.4.1</b> Excel files</a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="C" data-path="presentations.html"><a href="presentations.html"><i class="fa fa-check"></i><b>C</b> Presentations</a>
+<ul>
+<li class="chapter" data-level="C.1" data-path="presentations.html"><a href="presentations.html#cdw"><i class="fa fa-check"></i><b>C.1</b> CDW</a></li>
+<li class="chapter" data-level="C.2" data-path="presentations.html"><a href="presentations.html#redcap"><i class="fa fa-check"></i><b>C.2</b> REDCap</a></li>
+<li class="chapter" data-level="C.3" data-path="presentations.html"><a href="presentations.html#reproducible-research-visualization"><i class="fa fa-check"></i><b>C.3</b> Reproducible Research &amp; Visualization</a></li>
+<li class="chapter" data-level="C.4" data-path="presentations.html"><a href="presentations.html#data-management"><i class="fa fa-check"></i><b>C.4</b> Data Management</a></li>
+<li class="chapter" data-level="C.5" data-path="presentations.html"><a href="presentations.html#github-1"><i class="fa fa-check"></i><b>C.5</b> GitHub</a></li>
+<li class="chapter" data-level="C.6" data-path="presentations.html"><a href="presentations.html#software"><i class="fa fa-check"></i><b>C.6</b> Software</a></li>
+<li class="chapter" data-level="C.7" data-path="presentations.html"><a href="presentations.html#architectures"><i class="fa fa-check"></i><b>C.7</b> Architectures</a></li>
+<li class="chapter" data-level="C.8" data-path="presentations.html"><a href="presentations.html#components"><i class="fa fa-check"></i><b>C.8</b> Components</a></li>
+</ul></li>
+<li class="chapter" data-level="D" data-path="scratch-pad.html"><a href="scratch-pad.html"><i class="fa fa-check"></i><b>D</b> Scratch Pad of Loose Ideas</a>
+<ul>
+<li class="chapter" data-level="D.1" data-path="scratch-pad.html"><a href="scratch-pad.html#chapters-sections-to-form"><i class="fa fa-check"></i><b>D.1</b> Chapters &amp; Sections to Form</a></li>
+<li class="chapter" data-level="D.2" data-path="scratch-pad.html"><a href="scratch-pad.html#practices"><i class="fa fa-check"></i><b>D.2</b> Practices</a></li>
+<li class="chapter" data-level="D.3" data-path="scratch-pad.html"><a href="scratch-pad.html#good-sites"><i class="fa fa-check"></i><b>D.3</b> Good Sites</a></li>
+</ul></li>
+<li class="chapter" data-level="E" data-path="example-dashboard.html"><a href="example-dashboard.html"><i class="fa fa-check"></i><b>E</b> Example Dashboard</a>
+<ul>
+<li class="chapter" data-level="E.1" data-path="example-dashboard.html"><a href="example-dashboard.html#example-dashboard-example"><i class="fa fa-check"></i><b>E.1</b> Example</a></li>
+<li class="chapter" data-level="E.2" data-path="example-dashboard.html"><a href="example-dashboard.html#example-dashboard-guide"><i class="fa fa-check"></i><b>E.2</b> Style Guide</a>
+<ul>
+<li class="chapter" data-level="E.2.1" data-path="example-dashboard.html"><a href="example-dashboard.html#headline-page"><i class="fa fa-check"></i><b>E.2.1</b> Headline page</a></li>
+<li class="chapter" data-level="E.2.2" data-path="example-dashboard.html"><a href="example-dashboard.html#tables-page"><i class="fa fa-check"></i><b>E.2.2</b> Tables page</a></li>
+<li class="chapter" data-level="E.2.3" data-path="example-dashboard.html"><a href="example-dashboard.html#graphs-page"><i class="fa fa-check"></i><b>E.2.3</b> Graphs page</a></li>
+<li class="chapter" data-level="E.2.4" data-path="example-dashboard.html"><a href="example-dashboard.html#marginal-graphs-page"><i class="fa fa-check"></i><b>E.2.4</b> Marginal Graphs page</a></li>
+<li class="chapter" data-level="E.2.5" data-path="example-dashboard.html"><a href="example-dashboard.html#documentation-page"><i class="fa fa-check"></i><b>E.2.5</b> Documentation page</a></li>
+<li class="chapter" data-level="E.2.6" data-path="example-dashboard.html"><a href="example-dashboard.html#miscellaneous-notes"><i class="fa fa-check"></i><b>E.2.6</b> Miscellaneous Notes</a></li>
+</ul></li>
+<li class="chapter" data-level="E.3" data-path="example-dashboard.html"><a href="example-dashboard.html#example-dashboard-architecture"><i class="fa fa-check"></i><b>E.3</b> Architecture</a>
+<ul>
+<li class="chapter" data-level="E.3.1" data-path="example-dashboard.html"><a href="example-dashboard.html#data-from-external-system"><i class="fa fa-check"></i><b>E.3.1</b> Data from External System</a></li>
+<li class="chapter" data-level="E.3.2" data-path="example-dashboard.html"><a href="example-dashboard.html#groomed-data-in-warehouse"><i class="fa fa-check"></i><b>E.3.2</b> Groomed Data in Warehouse</a></li>
+<li class="chapter" data-level="E.3.3" data-path="example-dashboard.html"><a href="example-dashboard.html#analysis-ready-dataset"><i class="fa fa-check"></i><b>E.3.3</b> Analysis-Ready Dataset</a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="F" data-path="example-chapter.html"><a href="example-chapter.html"><i class="fa fa-check"></i><b>F</b> Example Chapter</a></li>
+<li class="chapter" data-level="G" data-path="acknowledgements.html"><a href="acknowledgements.html"><i class="fa fa-check"></i><b>G</b> Acknowledgements</a></li>
+<li class="chapter" data-level="H" data-path="references.html"><a href="references.html"><i class="fa fa-check"></i><b>H</b> References</a></li>
+<li class="divider"></li>
+<li><a href="https://github.com/rstudio/bookdown" target="blank">Published with bookdown</a></li>
+
+</ul>
+
+      </nav>
+    </div>
+
+    <div class="book-body">
+      <div class="body-inner">
+        <div class="book-header" role="navigation">
+          <h1>
+            <i class="fa fa-circle-o-notch fa-spin"></i><a href="./">Collaborative Data Science Practices</a>
+          </h1>
+        </div>
+
+        <div class="page-wrapper" tabindex="-1" role="main">
+          <div class="page-inner">
+
+            <section class="normal" id="section-">
+<div id="redcap-user" class="section level1" number="21">
+<h1><span class="header-section-number">Chapter 21</span> Material for REDCap Users</h1>
+<div id="redcap-user-login" class="section level2" number="21.1">
+<h2><span class="header-section-number">21.1</span> Login</h2>
+</div>
+<div id="redcap-user-report-develop" class="section level2" number="21.2">
+<h2><span class="header-section-number">21.2</span> Developing Reports</h2>
+<p>Please first read <a href="redcap-user.html#redcap-user-login">Login</a></p>
+
+</div>
+</div>
+            </section>
+
+          </div>
+        </div>
+      </div>
+<a href="team.html" class="navigation navigation-prev " aria-label="Previous page"><i class="fa fa-angle-left"></i></a>
+<a href="redcap-developer.html" class="navigation navigation-next " aria-label="Next page"><i class="fa fa-angle-right"></i></a>
+    </div>
+  </div>
+<script src="libs/gitbook-2.6.7/js/app.min.js"></script>
+<script src="libs/gitbook-2.6.7/js/lunr.js"></script>
+<script src="libs/gitbook-2.6.7/js/clipboard.min.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-search.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-sharing.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-fontsettings.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-bookdown.js"></script>
+<script src="libs/gitbook-2.6.7/js/jquery.highlight.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-clipboard.js"></script>
+<script>
+gitbook.require(["gitbook"], function(gitbook) {
+gitbook.start({
+"sharing": {
+"github": false,
+"facebook": true,
+"twitter": true,
+"linkedin": false,
+"weibo": false,
+"instapaper": false,
+"vk": false,
+"whatsapp": false,
+"all": ["facebook", "twitter", "linkedin", "weibo", "instapaper"]
+},
+"fontsettings": {
+"theme": "white",
+"family": "sans",
+"size": 2
+},
+"edit": {
+"link": "https://github.com/ouhscbbmc/data-science-practices-1/edit/master/ch-redcap-user.md",
+"text": "Edit"
+},
+"history": {
+"link": null,
+"text": null
+},
+"view": {
+"link": null,
+"text": null
+},
+"download": ["data-science-practices-1.pdf", "data-science-practices-1.epub"],
+"toc": {
+"collapse": "subsection"
+}
+});
+});
+</script>
+
+</body>
+
+</html>
diff --git a/docs/reference-keys.txt b/docs/reference-keys.txt
index 2a9323b..b6d1ab9 100644
--- a/docs/reference-keys.txt
+++ b/docs/reference-keys.txt
@@ -223,6 +223,11 @@ team
 recruiting
 training-to-data-science
 bridges-outside-the-team
+redcap-user
+redcap-user-login
+redcap-user-report-develop
+redcap-developer
+redcap-admin
 git
 git-code
 git-collaboration
diff --git a/docs/references.html b/docs/references.html
index ab6df4d..d708f80 100644
--- a/docs/references.html
+++ b/docs/references.html
@@ -24,7 +24,7 @@
 <meta name="author" content="Will Beasley" />
 
 
-<meta name="date" content="2021-05-20" />
+<meta name="date" content="2021-06-08" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1" />
   <meta name="apple-mobile-web-app-capable" content="yes" />
@@ -456,6 +456,13 @@
 <li class="chapter" data-level="20.2" data-path="team.html"><a href="team.html#training-to-data-science"><i class="fa fa-check"></i><b>20.2</b> Training to Data Science</a></li>
 <li class="chapter" data-level="20.3" data-path="team.html"><a href="team.html#bridges-outside-the-team"><i class="fa fa-check"></i><b>20.3</b> Bridges Outside the Team</a></li>
 </ul></li>
+<li class="chapter" data-level="21" data-path="redcap-user.html"><a href="redcap-user.html"><i class="fa fa-check"></i><b>21</b> Material for REDCap Users</a>
+<ul>
+<li class="chapter" data-level="21.1" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-login"><i class="fa fa-check"></i><b>21.1</b> Login</a></li>
+<li class="chapter" data-level="21.2" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-report-develop"><i class="fa fa-check"></i><b>21.2</b> Developing Reports</a></li>
+</ul></li>
+<li class="chapter" data-level="22" data-path="redcap-developer.html"><a href="redcap-developer.html"><i class="fa fa-check"></i><b>22</b> Material for REDCap Developers</a></li>
+<li class="chapter" data-level="23" data-path="redcap-admin.html"><a href="redcap-admin.html"><i class="fa fa-check"></i><b>23</b> Material for REDCap Admins</a></li>
 <li class="appendix"><span><b>Appendix</b></span></li>
 <li class="chapter" data-level="A" data-path="git.html"><a href="git.html"><i class="fa fa-check"></i><b>A</b> Git &amp; GitHub</a>
 <ul>
@@ -548,7 +555,7 @@ <h1>
           <div class="page-inner">
 
             <section class="normal" id="section-">
-<div id="references" class="section level1" number="28">
+<div id="references" class="section level1" number="31">
 <h1><span class="header-section-number">H</span> References</h1>
 
 <div id="refs" class="references csl-bib-body hanging-indent">
diff --git a/docs/repo-prototype.html b/docs/repo-prototype.html
index 9780544..1be6172 100644
--- a/docs/repo-prototype.html
+++ b/docs/repo-prototype.html
@@ -24,7 +24,7 @@
 <meta name="author" content="Will Beasley" />
 
 
-<meta name="date" content="2021-05-20" />
+<meta name="date" content="2021-06-08" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1" />
   <meta name="apple-mobile-web-app-capable" content="yes" />
@@ -456,6 +456,13 @@
 <li class="chapter" data-level="20.2" data-path="team.html"><a href="team.html#training-to-data-science"><i class="fa fa-check"></i><b>20.2</b> Training to Data Science</a></li>
 <li class="chapter" data-level="20.3" data-path="team.html"><a href="team.html#bridges-outside-the-team"><i class="fa fa-check"></i><b>20.3</b> Bridges Outside the Team</a></li>
 </ul></li>
+<li class="chapter" data-level="21" data-path="redcap-user.html"><a href="redcap-user.html"><i class="fa fa-check"></i><b>21</b> Material for REDCap Users</a>
+<ul>
+<li class="chapter" data-level="21.1" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-login"><i class="fa fa-check"></i><b>21.1</b> Login</a></li>
+<li class="chapter" data-level="21.2" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-report-develop"><i class="fa fa-check"></i><b>21.2</b> Developing Reports</a></li>
+</ul></li>
+<li class="chapter" data-level="22" data-path="redcap-developer.html"><a href="redcap-developer.html"><i class="fa fa-check"></i><b>22</b> Material for REDCap Developers</a></li>
+<li class="chapter" data-level="23" data-path="redcap-admin.html"><a href="redcap-admin.html"><i class="fa fa-check"></i><b>23</b> Material for REDCap Admins</a></li>
 <li class="appendix"><span><b>Appendix</b></span></li>
 <li class="chapter" data-level="A" data-path="git.html"><a href="git.html"><i class="fa fa-check"></i><b>A</b> Git &amp; GitHub</a>
 <ul>
diff --git a/docs/rest.html b/docs/rest.html
index 5b21fef..56a8369 100644
--- a/docs/rest.html
+++ b/docs/rest.html
@@ -24,7 +24,7 @@
 <meta name="author" content="Will Beasley" />
 
 
-<meta name="date" content="2021-05-20" />
+<meta name="date" content="2021-06-08" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1" />
   <meta name="apple-mobile-web-app-capable" content="yes" />
@@ -456,6 +456,13 @@
 <li class="chapter" data-level="20.2" data-path="team.html"><a href="team.html#training-to-data-science"><i class="fa fa-check"></i><b>20.2</b> Training to Data Science</a></li>
 <li class="chapter" data-level="20.3" data-path="team.html"><a href="team.html#bridges-outside-the-team"><i class="fa fa-check"></i><b>20.3</b> Bridges Outside the Team</a></li>
 </ul></li>
+<li class="chapter" data-level="21" data-path="redcap-user.html"><a href="redcap-user.html"><i class="fa fa-check"></i><b>21</b> Material for REDCap Users</a>
+<ul>
+<li class="chapter" data-level="21.1" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-login"><i class="fa fa-check"></i><b>21.1</b> Login</a></li>
+<li class="chapter" data-level="21.2" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-report-develop"><i class="fa fa-check"></i><b>21.2</b> Developing Reports</a></li>
+</ul></li>
+<li class="chapter" data-level="22" data-path="redcap-developer.html"><a href="redcap-developer.html"><i class="fa fa-check"></i><b>22</b> Material for REDCap Developers</a></li>
+<li class="chapter" data-level="23" data-path="redcap-admin.html"><a href="redcap-admin.html"><i class="fa fa-check"></i><b>23</b> Material for REDCap Admins</a></li>
 <li class="appendix"><span><b>Appendix</b></span></li>
 <li class="chapter" data-level="A" data-path="git.html"><a href="git.html"><i class="fa fa-check"></i><b>A</b> Git &amp; GitHub</a>
 <ul>
diff --git a/docs/scaling-up.html b/docs/scaling-up.html
index 9b0da99..3d74ae1 100644
--- a/docs/scaling-up.html
+++ b/docs/scaling-up.html
@@ -24,7 +24,7 @@
 <meta name="author" content="Will Beasley" />
 
 
-<meta name="date" content="2021-05-20" />
+<meta name="date" content="2021-06-08" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1" />
   <meta name="apple-mobile-web-app-capable" content="yes" />
@@ -456,6 +456,13 @@
 <li class="chapter" data-level="20.2" data-path="team.html"><a href="team.html#training-to-data-science"><i class="fa fa-check"></i><b>20.2</b> Training to Data Science</a></li>
 <li class="chapter" data-level="20.3" data-path="team.html"><a href="team.html#bridges-outside-the-team"><i class="fa fa-check"></i><b>20.3</b> Bridges Outside the Team</a></li>
 </ul></li>
+<li class="chapter" data-level="21" data-path="redcap-user.html"><a href="redcap-user.html"><i class="fa fa-check"></i><b>21</b> Material for REDCap Users</a>
+<ul>
+<li class="chapter" data-level="21.1" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-login"><i class="fa fa-check"></i><b>21.1</b> Login</a></li>
+<li class="chapter" data-level="21.2" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-report-develop"><i class="fa fa-check"></i><b>21.2</b> Developing Reports</a></li>
+</ul></li>
+<li class="chapter" data-level="22" data-path="redcap-developer.html"><a href="redcap-developer.html"><i class="fa fa-check"></i><b>22</b> Material for REDCap Developers</a></li>
+<li class="chapter" data-level="23" data-path="redcap-admin.html"><a href="redcap-admin.html"><i class="fa fa-check"></i><b>23</b> Material for REDCap Admins</a></li>
 <li class="appendix"><span><b>Appendix</b></span></li>
 <li class="chapter" data-level="A" data-path="git.html"><a href="git.html"><i class="fa fa-check"></i><b>A</b> Git &amp; GitHub</a>
 <ul>
diff --git a/docs/scratch-pad.html b/docs/scratch-pad.html
index 011c1c8..99d5885 100644
--- a/docs/scratch-pad.html
+++ b/docs/scratch-pad.html
@@ -24,7 +24,7 @@
 <meta name="author" content="Will Beasley" />
 
 
-<meta name="date" content="2021-05-20" />
+<meta name="date" content="2021-06-08" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1" />
   <meta name="apple-mobile-web-app-capable" content="yes" />
@@ -456,6 +456,13 @@
 <li class="chapter" data-level="20.2" data-path="team.html"><a href="team.html#training-to-data-science"><i class="fa fa-check"></i><b>20.2</b> Training to Data Science</a></li>
 <li class="chapter" data-level="20.3" data-path="team.html"><a href="team.html#bridges-outside-the-team"><i class="fa fa-check"></i><b>20.3</b> Bridges Outside the Team</a></li>
 </ul></li>
+<li class="chapter" data-level="21" data-path="redcap-user.html"><a href="redcap-user.html"><i class="fa fa-check"></i><b>21</b> Material for REDCap Users</a>
+<ul>
+<li class="chapter" data-level="21.1" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-login"><i class="fa fa-check"></i><b>21.1</b> Login</a></li>
+<li class="chapter" data-level="21.2" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-report-develop"><i class="fa fa-check"></i><b>21.2</b> Developing Reports</a></li>
+</ul></li>
+<li class="chapter" data-level="22" data-path="redcap-developer.html"><a href="redcap-developer.html"><i class="fa fa-check"></i><b>22</b> Material for REDCap Developers</a></li>
+<li class="chapter" data-level="23" data-path="redcap-admin.html"><a href="redcap-admin.html"><i class="fa fa-check"></i><b>23</b> Material for REDCap Admins</a></li>
 <li class="appendix"><span><b>Appendix</b></span></li>
 <li class="chapter" data-level="A" data-path="git.html"><a href="git.html"><i class="fa fa-check"></i><b>A</b> Git &amp; GitHub</a>
 <ul>
@@ -548,9 +555,9 @@ <h1>
           <div class="page-inner">
 
             <section class="normal" id="section-">
-<div id="scratch-pad" class="section level1" number="24">
+<div id="scratch-pad" class="section level1" number="27">
 <h1><span class="header-section-number">D</span> Scratch Pad of Loose Ideas</h1>
-<div id="chapters-sections-to-form" class="section level2" number="24.1">
+<div id="chapters-sections-to-form" class="section level2" number="27.1">
 <h2><span class="header-section-number">D.1</span> Chapters &amp; Sections to Form</h2>
 <ol style="list-style-type: decimal">
 <li><p>Tools to Consider</p>
@@ -595,13 +602,13 @@ <h2><span class="header-section-number">D.1</span> Chapters &amp; Sections to Fo
 <p>Your team should decide which elements of <a href="https://ouhscbbmc.github.io/data-science-practices-1/file-prototype-r.html">a file prototype</a> and <a href="https://ouhscbbmc.github.io/data-science-practices-1/repo-prototype.html">repo prototype</a> are best for you.</p></li>
 </ol>
 </div>
-<div id="practices" class="section level2" number="24.2">
+<div id="practices" class="section level2" number="27.2">
 <h2><span class="header-section-number">D.2</span> Practices</h2>
 <ul>
 <li><code>on.exit()</code> should have <code>add = TRUE</code> (<span class="citation"><a href="#ref-wickham-advanced-r" role="doc-biblioref">Wickham</a> (<a href="#ref-wickham-advanced-r" role="doc-biblioref">2019</a>)</span>, <a href="https://adv-r.hadley.nz/functions.html#on-exit">Exit handlers</a>).</li>
 </ul>
 </div>
-<div id="good-sites" class="section level2" number="24.3">
+<div id="good-sites" class="section level2" number="27.3">
 <h2><span class="header-section-number">D.3</span> Good Sites</h2>
 <p>Posts on these sites are almost always worth your time reading. The frequently improve how you develop with the common components used in our data pipelines.</p>
 <ul>
diff --git a/docs/search_index.json b/docs/search_index.json
index ca441a1..e0879db 100644
--- a/docs/search_index.json
+++ b/docs/search_index.json
@@ -1 +1 @@
-[["index.html", "Collaborative Data Science Practices Chapter 1 Introduction", " Collaborative Data Science Practices Will Beasley 2021-05-20 Chapter 1 Introduction This collection of documents describe practices used by the OUHSC BBMC in our analytics projects. "],["coding.html", "Chapter 2 Coding Principles 2.1 Simplify 2.2 Defensive Style", " Chapter 2 Coding Principles 2.1 Simplify 2.1.1 Data Types Use the simplest data type reasonable. A simpler data type is less likely contain unintended values. As we have seen, a string variable called gender can simultaneously contain the values m, f, F, Female, MALE, 0, 1, 2, Latino, \"\", and NA. On the other hand, a boolean variable gender_male can be only FALSE, TRUE, and NA.1 SQLite does not have a dedicated datatype, so you must resort to storing it as 0, 1 and NULL values. Because a caller cant assume that an ostensible boolean SQLite variable contains only those three values, the variable should be checked.] Once you have cleaned a variable in your initial ETL files (like an Ellis), lock it down so you do not have to spend time in the downstream files verifying that no bad values have been introduced. As a small bonus, simpler data types are typically faster, consume less memory, and translate more cleanly across platforms. Within R, the preference for numeric-ish variables is logical/boolean/bit, integer, bit64::integer64, and numeric/double-precision floats. The preference for categorical variables is logical/boolean/bit, factor, and character. 2.1.2 Categorical Levels When a boolean variable would be too restrictive and a factor or character is required, choose the simplest representation. Where possible: Use only lower case (e.g., male instead of Male for the gender variable). avoid repeating the variable in the level (e.g., control instead of control condition for the condition variable). 2.1.3 Recoding Almost every project recodes many variables. Choose the simplest function possible. The functions at the top are much easier to read and harder to mess up. Leverage existing booleans: Suppose you have the logical variable gender_male (which can be only TRUE, FALSE, or NA). Writing gender_male == TRUE or gender_male == FALSE will evaluate to a boolean thats unnecessary because gender_male is already a boolean. Testing for TRUE: use the variable by itself (i.e., gender_male instead of gender_male == TRUE). Testing for FALSE: use !. Write !gender_male instead of gender_male == FALSE or gender_male != TRUE. dplyr::coalesce(): The function evaluates a single variable and replaces NA with values from another variable. A coalesce like visit_completed = dplyr::coalesce(visit_completed, FALSE) is much easier to read and not mess up than visit_completed = dplyr::if_else(!is.na(visit_completed), visit_completed, FALSE) dplyr::na_if() transforms a nonmissing value into an NA. Recoding missing values like birth_apgar = dplyr::na_if(birth_apgar, 99) is easier to read and not mess up than birth_apgar = dplyr::if_else(birth_apgar == 99, NA_real_, birth_apgar) &lt;= (or a similar comparison operator): Compare two quantities to output a boolean variable. dplyr::if_else(): The function evaluates a single boolean variable. The output branches to only three possibilities: condition is (a) true, (b) false, or (c) (optionally) NA. An advantage over &lt;= is that NA values can be specified directly. date_start &lt;- as.Date(&quot;2017-01-01&quot;) # If a missing month element needs to be handled explicitly. stage = dplyr::if_else(date_start &lt;= month, &quot;pre&quot;, &quot;post&quot;, missing = &quot;missing-month&quot;) # Otherwise a simple boolean output is sufficient. stage_post = (date_start &lt;= month) base::cut(): The function evaluations only a single numeric variable. Its range is cut into different segments/categories on the one-dimensional number line. The output branches to single discrete value (either a factor-level or an integer). dplyr::recode(): The function evaluates a single integer or character variable. The output branches to a single discrete value. lookup table: It feasible recode 6 levels of race directly in R. Its less feasible to recode 200 provider names. Specify the mapping in a csv, readr the csv to a data.frame, and left-join it. dplyr::case_when(): The function is the most complicated because it can evaluate multiple variables. Also, multiple cases can be true, but only the first output is returned. This water fall execution helps in complicated scenarios, but is overkill for most. 2.2 Defensive Style 2.2.1 Qualify functions Try to prepend each function with its package. Write dplyr::filter() instead of filter(). When two packages contain public functions with the same name, the package that was most recently called with library() takes precedent. When multiple R files are executed, the packages precedents may not be predictable. Specifying the package eliminates the ambiguity, while also making the code easier to follow. For this reason, we recommend that almost all R files contain a load-packages chunk. See the Google Style Guide for more about qualifying functions. Some exceptions exist, including: The sf package if youre using its objects with dplyr verbs. 2.2.2 Date Arithmetic Dont use the minus operator (i.e., -) to subtract dates. Instead use as.integer(difftime(stop, start, units=\"days\")). Its longer but protects from the scenario that start or stop are changed upstream to a datetime. In that case, stop - start equals the number of seconds between the two points, not the number of days. 2.2.3 Excluding Bad Cases Some variables are critical to the record, and if its missing, you dont want or trust any of its other values. For instance, a hospital visit record rarely useful if missing the patient ID. In these cases, prevent the record from passing through the ellis. In this example, well presume we cant trust a patient record if it lacks a clean date of birth (dob). Define the permissible range, in either the elliss declare-globals chunk, or in the config-file. (Well use the config file for this example.) Well exclude anyone born before 2000, or after tomorrow. Even though its illogical for someone in a retrospective record to be born tomorrow, consider bending a little for small errors. range_dob : !expr c(as.Date(&quot;2000-01-01&quot;), Sys.Date() + lubridate::days(1)) In the tweak-data chunk, use OuhscMunge::trim_date() to set the cell to NA if it falls outside an acceptable range. After dplyr::mutate(), call tidyr::drop_na() to exclude the entire record, regardless if (a) it was already NA, or (b) was trimmed to NA. ds &lt;- ds %&gt;% dplyr::mutate( dob = OuhscMunge::trim_date(dob, config$range_dob) ) %&gt;% tidyr::drop_na(dob) Near the end of the file, verify the variable for three reasons: (a) theres a chance that the code above isnt working as expected, (b) some later code later might have introduced bad values, and (c) it clearly documents to a reader that dob was included in this range at this stage of the pipeline. checkmate::assert_date(ds$dob, any.missing=F, lower=config$range_dob[1], upper=config$range_dob[2]) The equivalent of Rs logical data type is called a bit in SQL Server, and a boolean in Postgres and MySQL. "],["architecture.html", "Chapter 3 Architecture Principles 3.1 Encapsulation 3.2 Leverage team members strengths &amp; avoid weaknesses 3.3 Scales 3.4 Consistency", " Chapter 3 Architecture Principles 3.1 Encapsulation 3.2 Leverage team members strengths &amp; avoid weaknesses 3.2.1 Focused code files 3.2.2 Metadata for content experts 3.3 Scales 3.3.1 Single source &amp; single analysis 3.3.2 Multiple sources &amp; multiple analyses 3.4 Consistency 3.4.1 Across Files 3.4.2 Across Languages 3.4.3 Across Projects "],["file-prototype-r.html", "Chapter 4 Prototypical R File 4.1 Clear Memory 4.2 Load Sources 4.3 Load Packages 4.4 Declare Globals 4.5 Load Data 4.6 Tweak Data 4.7 (Unique Content) 4.8 Verify Values 4.9 Specify Output Columns 4.10 Save to Disk or Database 4.11 Additional Resources", " Chapter 4 Prototypical R File As stated in Consistency across Files, using a consistent file structure can (a) improve the quality of the code because the structure has been proven over time to facilitate good practices and (b) allow your intentions to be more clear to teammates because they are familiar with the order and intentions of the chunks. We use the term chunk for a section of code because it corresponds with knitr terminology (Xie 2015), and in many analysis files (as opposed to manipulation files), the chunk of our R file connects to a knitr Rmd file. 4.1 Clear Memory Before the initial chunk many of our files clear the memory of variables from previous run. This is important when developing and debugging because it prevents previous runs from contaminating subsequent runs. However it has little effect during production; well look at manipulation files separately from analysis files. Manipulation R files are sourced with the argument local=new.env(). The file is executed in a fresh environment, so there are no variables to clear. Analysis R files are typically called from an Rmd files knitr::read_chunk(), and code positioned above the first chunk is not called by knitr.2 However typically do not clear the memory in R files that are sourced in the same environment as the caller, as it will interfere with the callers variables. rm(list = ls(all.names = TRUE)) 4.2 Load Sources In the first true chunk, source any R files containing global variables and functions that the current file requires. For instance, when a team of statisticians is producing a large report containing many analysis files, we define many of the graphical elements in a single file. This sourced file defines common color palettes and graphical functions so the cosmetics are more uniform across analyses. We prefer not to have sourced files perform any real action, such as importing data or manipulating a file. One reason is because it is difficult to be consistent about the environmental variables when the sourced files functions are run. A second reason is that it more cognitively difficult to understand how the files are connected. When the sourced file contains only function definitions, these operations can be called at any time in the current file with much tighter control of which variables are modified. A bonus of the discipline of defining functions (instead of executing functions) is that the operations are typically more robust and generalizable. Keep the chunk even if no files are sourced. An empty chunk is instructive to readers trying to determine if any files are sourced. This applies recommendation applies to all the chunks discussed in this chapter. As always, your team should agree on its own set of standards. # ---- load-sources ------------------------------------------------------------ base::source(file=&quot;./analysis/common/display-1.R&quot;) # Load common graphing functions. 4.3 Load Packages The load-packages chunk declares required packages near the files beginning for three reasons. First, a reader scanning the file can quickly determine its dependencies when located in a single chunk. Second, if your machine is lacking a required package, it is best to know early3. Third, this style mimics a requirement of other languages (such as declaring headers at the top of a C++ file) and follows the tidyverse style guide. As discussed in the previous qualify all functions section, we recommend that functions are qualified with their package (e.g., foo::bar() instead of merely bar()). Consequently, the load-packages chunk calls requireNamespace() more frequently than library(). requireNamespace() verifies the package is available on the local machine, but does not load it into memory; library() verifies the package is available, and then loads it. requireNamespace() is not used in several scenarios. Core packages (e.g., base and stats) are loaded by R in most default installations. We avoid unnecessary calls like library(stats) because they distract from more important features. Obvious dependencies are not called by requireNamespace() or library() for similar reasons, especially if they are not called directly. For example tidyselect is not listed when tidyr is listed. The pipe function (declared in the magrittr package , i.e., %&gt;%) is attached with import::from(magrittr, \"%&gt;%\"). This frequently-used function called be called throughout the execution without qualification. Compared to manipulation files, our analysis files tend to use many functions in a few concentrated packages so conflicting function names are less common. Typical packages used in analysis are ggplot2 and lme4. The sourced files above may load their own packages (by calling library()). It is important that the library() calls in this file follow the load-sources chunk so that identically-named functions (in different packages) are called with the correct precedent. Otherwise identically-named functions will conflict in the namespace with hard-to-predict results. Read R Packages for more about library(), requireNamespace(), and their siblings, as well as the larger concepts such as attaching functions into the search path. Here are packages found in most of our manipulation files. Notice the lesser-known packages have a quick explanation; this helps maintainers decide if the declaration is still necessary. Also notice the packages distributed outside of CRAN (e.g., GitHub) have a quick commented line to help the user install or update the package. # ---- load-packages ----------------------------------------------------------- import::from(magrittr, &quot;%&gt;%&quot; ) requireNamespace(&quot;readr&quot; ) requireNamespace(&quot;tidyr&quot; ) requireNamespace(&quot;dplyr&quot; ) requireNamespace(&quot;config&quot; ) requireNamespace(&quot;checkmate&quot; ) # Asserts expected conditions requireNamespace(&quot;OuhscMunge&quot;) # remotes::install_github(repo=&quot;OuhscBbmc/OuhscMunge&quot;) 4.4 Declare Globals When values are repeatedly used within a file, consider dedicating a variable so its defined and set only once. This is also a good place for variables that are used only once, but whose value are central to the files mission. Typical variables in our declare-globals chunk include data file paths, data file variables, color palettes, and values in the config file. The config file can coordinate a static variable across multiple files. Centrally # ---- declare-globals --------------------------------------------------------- # Constant values that won&#39;t change. config &lt;- config::get() path_db &lt;- config$path_database # Execute to specify the column types. It might require some manual adjustment (eg doubles to integers). # OuhscMunge::readr_spec_aligned(config$path_subject_1_raw) col_types &lt;- readr::cols_only( subject_id = readr::col_integer(), county_id = readr::col_integer(), gender_id = readr::col_double(), race = readr::col_character(), ethnicity = readr::col_character() ) 4.5 Load Data All data ingested by this file occurs in this chunk. We like to think of each file as a linear pipe with a single point of input and single point of output. Although it is possible for a file to read data files on any line, we recommend avoiding this sprawl because it is more difficult for humans to understand. If the software developer is a deist watchmaker, the files fate has been sealed by the end of this chunk. This makes is easier for a human to reason to isolate problems as either existing with (a) the incoming data or (b) the calculations on that data. Ideally this chunk consumes data from either a plain-text csv or a database. Many capable R functions and packages ingest data. We prefer the tidyverse readr for reading conventional files; its younger cousin, vroom has some nice advantages when working with larger files and some forms of jagged rectangles4. Depending on the file format, good packages to consider are data.table, haven, readxl, openxlsx, arrow, jsonlite, fst, yaml, and rio. When used in an Ellis, this chunk likely consumes a flat file like a csv with data or metadata. When used in a Ferry, Arch, or Scribe, this chunk likely consumes a database table. When used in an Analysis file, this chunk likely consumes a database table or rds (i.e., a compressed R data file). In some large-scale scenarios, there may be a series of datasets that cannot be held in RAM simultaneously. Our first choice is to split the R file so each new file has only a subset of the datasets in other words, the R file probably was given too much responsibility. Occassionaly the multiple datasets need to be considered at once, so splitting the R file is not a option. In these scenarios, we prefer to upload all the datasets to a database, which is better manipulating datasets too large for RAM. An R solution may be to loosen the restriction that dataset enter the R file only during the load-data chunk. Once a dataset is processed and no longer needed, rm() removes it from RAM. Now another dataset can be read from a file and manipulated. loose scrap: the chunk reads all data (e.g., database table, networked CSV, local lookup table). After this chunk, no new data should be introduced. This is for the sake of reducing human cognition load. Everything below this chunk is derived from these first four chunks. 4.6 Tweak Data loose scrap: Its best to rename the dataset (a) in a single place and (b) early in the pipeline, so the bad variable are never referenced. # OuhscMunge::column_rename_headstart(ds) # Help write `dplyr::select()` call. ds &lt;- ds %&gt;% dplyr::select( # `dplyr::select()` drops columns not included. subject_id, county_id, gender_id, race, ethnicity ) %&gt;% dplyr::mutate( ) %&gt;% dplyr::arrange(subject_id) # %&gt;% # tibble::rowid_to_column(&quot;subject_id&quot;) # Add a unique index if necessary 4.7 (Unique Content) This section represents all the chunks between tweak-data and verify-values. These chunks contain most of of the files creativity and contribution. In a sense, the structure of the first and last chunks allow these middle chunks to focus on concepts instead of plumbing. For simple files like the ellis of a metadata file, may not even need anything here. But complex analysis files may have 200+ lines distributed across a dozen chunks. We recommend that you create dedicate a chunk to each conceptual stage. If one starts to contain more than ~20 lines, consider if a more granular organization would clarify the codes intent. 4.8 Verify Values Running OuhscMunge::verify_value_headstart(ds) will # ---- verify-values ----------------------------------------------------------- # Sniff out problems # OuhscMunge::verify_value_headstart(ds) checkmate::assert_integer( ds$county_month_id , any.missing=F , lower=1, upper=3080 , unique=T) checkmate::assert_integer( ds$county_id , any.missing=F , lower=1, upper=77 ) checkmate::assert_date( ds$month , any.missing=F , lower=as.Date(&quot;2012-06-15&quot;), upper=Sys.Date()) checkmate::assert_character(ds$county_name , any.missing=F , pattern=&quot;^.{3,12}$&quot; ) checkmate::assert_integer( ds$region_id , any.missing=F , lower=1, upper=20 ) checkmate::assert_numeric( ds$fte , any.missing=F , lower=0, upper=40 ) checkmate::assert_logical( ds$fte_approximated , any.missing=F ) checkmate::assert_numeric( ds$fte_rolling_median , any.missing=T , lower=0, upper=40 ) county_month_combo &lt;- paste(ds$county_id, ds$month) checkmate::assert_character(county_month_combo, pattern =&quot;^\\\\d{1,2} \\\\d{4}-\\\\d{2}-\\\\d{2}$&quot;, any.missing=F, unique=T) 4.9 Specify Output Columns This chunk: verifies these variables exist before uploading, documents (to troubleshooting developers) these variables are a product of the file, and reorders the variables to match the expected structure. Variable order is especially important for the database engines/drivers that ignore the variable name, and use only the variable position. We use the term slim because typically this output has fewer variables than the full dataset processed by the file. If you doubt the variable will be needed downstream, leave it in the dplyr::select(), but commented out. If someone needs it in the future, theyll easily determine where it might come from, and then uncomment the line (and possibly modify the database table). Once you import a column into a warehouse that multiple people are using, it can be tough to remove without breaking their code. This chunk follows verify-values because sometimes you want to check the validity of variables that are not consumed downstream. These variables are not important themselves, but an illegal value may reveal a larger problem with the dataset. # Print colnames that `dplyr::select()` should contain below: # cat(paste0(&quot; &quot;, colnames(ds), collapse=&quot;,\\n&quot;)) # Define the subset of columns that will be needed in the analyses. # The fewer columns that are exported, the fewer things that can break downstream. ds_slim &lt;- ds %&gt;% # dplyr::slice(1:100) %&gt;% dplyr::select( subject_id, county_id, gender_id, race, ethnicity ) ds_slim 4.10 Save to Disk or Database 4.11 Additional Resources (Colin Gillespie 2017), particularly the Efficient input/output chapter. H References "],["file-prototype-sql.html", "Chapter 5 Prototypical SQL File 5.1 Choice of Database Engine 5.2 Ferry 5.3 Default Databases 5.4 Declare Values Databases 5.5 Recreate Table 5.6 Truncate Table 5.7 INSERT INTO 5.8 SELECT 5.9 FROM 5.10 WHERE 5.11 ORDER BY 5.12 Indexing", " Chapter 5 Prototypical SQL File New data scientists typically import entire tables from a database into R, and then merge, filter, and groom the data.frames. A more efficient approach is to submit sql that executes on the database and returns a more specialized dataset. This provides several advantages: A database will be much more efficient when filtering and joining tables than any programing language, such as R or Python. A well-designed database will have indexed columns and other optimizations that surpass R and Python capabilities. A database handles datasets that are thousands of times larger than what R and Python can accommodate in RAM. For large datasets, database engines persist the data on a hard drive (instead of just RAM) and are optimized to read the necessary information into RAM the moment before it is needed, and then return the processed back to disk before progressing to the next block of data. Frequently, only a portion of the tables rows and columns are ultimately needed by the analysis. Reducing the size of the dataset leaving the database has two benefits: less information travels across the network and Rs and Pythons limited memory space is conserved. In some scenarios, it is desirable to use the INSERT SQL command to transfer data within the database; and never travel across the network and never touch R or your local machine. For our large and complicated projects, the majority of data movement uses INSERT commands within SQL files. Among these scenarios, the analysis-focused projects use R to call the sequence of SQL files (see flow.R), while the database-focused project uss SSIS. In both cases, we try to write the SQL files to conform to similar standards and conventions. As stated in Consistency across Files (and in the previous chapter), using a consistent file structure can (a) improve the quality of the code because the structure has been proven over time to facilitate good practices and (b) allow your intentions to be more clear to teammates because they are familiar with the order and intentions of the chunks. 5.1 Choice of Database Engine The major relational database engines use roughly the same syntax, but they all have slight deviations and enhancements beyond the SQL standards. Most of our databases are hosted by SQL Server, since that is what OUHSCs campus seems most comfortable supporting. Consequently, this chapter uses SQL Server 2017+ syntax. But like most data science teams, we still need to consume other databases, such as Oracle and MySQL. Outside OUHSC projects, we tend to use PostgreSQL and Redshift. 5.2 Ferry This basic sql file moves data within a database to create a table named dx, which is contained in the ley_covid_1 schema of the cdw_staging database. --use cdw_staging declare @start_date date = &#39;2020-02-01&#39;; -- sync with config.yml declare @stop_date date = dateadd(day, -1, cast(getdate() as date)); -- sync with config.yml DROP TABLE if exists ley_covid_1.dx; CREATE TABLE ley_covid_1.dx( dx_id int identity(1, 1) primary key, patient_id int not null, covid_confirmed bit not null, problem_date date null, icd10_code varchar(20) not null ); -- TRUNCATE TABLE ley_covid_1.dx; INSERT INTO ley_covid_1.dx SELECT pr.patient_id ,ss.covid_confirmed ,pr.invoice_date as problem_date ,pr.code as icd10_code -- into ley_covid_1.dx FROM cdw.star_1.fact_problem as pr inner join beasley_covid_1.ss_dx as ss on pr.code = ss.icd10_code WHERE pr.problem_date_start between @start_date and @stop_date and pr.patient_id is not null ORDER BY pr.patient_id, pr.problem_date_start desc CREATE INDEX ley_covid_1_dx_patient_id on ley_covid_1.dx (patient_id); CREATE INDEX ley_covid_1_dx_icd10_code on ley_covid_1.dx (icd10_code); 5.3 Default Databases We prefer not to specify the database of each table, and instead control it through the connection (such as the DSNs default database value). Nevertheless, its helpful to include the default database behind a comment for two reasons. First, it communicates to the default database to the human reader. Second, during debugging, the code can be highlighted in ADS/SSMS and executed with F5; this will mimic what happens when the file is run via automation with a DSN. --use cdw_staging 5.4 Declare Values Databases Similar to the Declare Globals chunk in a prototypical R file, values set at the top of the file are easy to read and modify. declare @start_date date = &#39;2020-02-01&#39;; -- sync with config.yml declare @stop_date date = dateadd(day, -1, cast(getdate() as date)); -- sync with config.yml 5.5 Recreate Table When batch-loading data, it is typically easiest drop and recreate a database table. In the snippet below, any table with the specific name is dropped/deleted from the database and replaced with a (possibly new) definition. We like to dedicate a line to each table column, with at least three elements per line: the name, the data type, and if nulls are allowed. Many other features and keywords are available when designing tables. The ones we occasionally use are: primary key helps database optimization when later querying the table, and enforces uniqueness, such as a patient table should not have any two rows with the same patient_id value. Primary keys must be nonmissing, so the not null keyword is redundant. unique is helpful when a table has additional columns that need to be unique (such as patient_ssn and patient_id). A more advanced scenario using a clustered columnar table, which is incompatible with the primary key designation. identity(1, 1) creates a 1, 2, 3,  sequence, which relieves the client of creating the sequence with something like row_number(). Note that when identity column exists, the number columns in the SELECT clause will be one fewer than the columns defined in CREATE TABLE. DROP TABLE if exists ley_covid_1.dx; CREATE TABLE ley_covid_1.dx( dx_id int identity(1, 1) primary key, patient_id int not null, covid_confirmed bit not null, problem_date date null, icd10_code varchar(20) not null ); To jump-start the creation of the table definition, we frequently use the INTO clause. This operation creates a new table, informed the column properties of the source tables. Within ADS and SSMS, refresh the list of tables and select the new table; there will be an option to copy the CREATE TABLE statement (similar to the snippet above) and paste it into the sql file. The definition can then be modified, such as tightening from null to not null. -- into ley_covid_1.dx 5.6 Truncate Table In scenarios where the table definition is stable and the data is refreshed frequently (say, daily), consider TRUNCATE-ing the table. When taking this approach, we prefer to keep the DROP and CREATE code in the file, but commented out. This saves development time in the future if the table definition needs to be modified. -- TRUNCATE TABLE ley_covid_1.dx; 5.7 INSERT INTO The INSERT INTO (when followed by a SELECT clause), simply moves data from the query into the specified table. The INSERT INTO clause transfers the columns in the exact order of the query. It does not try to match to the names of the destination table. An error will be thrown if the column types are mismatched (e.g., attempting to insert a character string into an integer value). Even worse, no error will be thrown if the mismatched columns have compatible types. This will occur if the tables columns are patient_id, weight_kg, and height_cm, but the querys columns are patient_id, height_cm, and weight_in. Not only will the weight and height be written to the incorrect columns, but the execution will not catch that the source is weight_kg, but the destination is weight_in. INSERT INTO ley_covid_1.dx 5.8 SELECT The SELECT clause specifies the desired columns. It can also rename columns and perform manipulations. We prefer to specify the aliased table of each column. If two source tables have the same column name, an error will be thrown regarding the ambiguity. Even if thats not a concern, we believe that explicitly specifying the source improves readability and reduces errors. SELECT pr.patient_id ,ss.covid_confirmed ,cast(pr.invoice_datetime as date) as problem_date ,pr.code as icd10_code 5.9 FROM FROM cdw.star_1.fact_problem as pr inner join beasley_covid_1.ss_dx as ss on pr.code = ss.icd10_code 5.10 WHERE The WHERE clause reduces the number of returned rows (as opposed to reducing the number of columns in the SELECT clause). Use the indention level to communicate to reader how the subclauses are combined. This is especially important if it both AND and OR operators are used, since their order of operations can be confused easily. WHERE pr.problem_date_start between @start_date and @stop_date and pr.patient_id is not null 5.11 ORDER BY The ORDER BY clause simply specifies the order of the rows. Be default, a columns values will be in ascending order, but can be descending if desired. ORDER BY pr.patient_id, pr.problem_date_start desc 5.12 Indexing If the table is large or queried in a variety of ways, indexing the table can speed up performance dramatically. CREATE INDEX ley_covid_1_dx_patient_id on ley_covid_1.dx (patient_id); CREATE INDEX ley_covid_1_dx_icd10_code on ley_covid_1.dx (icd10_code); "],["repo-prototype.html", "Chapter 6 Prototypical Repository 6.1 Root 6.2 Analysis 6.3 Data Public 6.4 Data Unshared 6.5 Documentation 6.6 Manipulation 6.7 Stitched Output 6.8 Utility", " Chapter 6 Prototypical Repository https://github.com/wibeasley/RAnalysisSkeleton 6.1 Root 6.1.1 config.R The configuration file is simply a plain-text yaml file read by the config package. It is great when a value has to be coordinated across multiple files. Also see the discussion of how we use the config file for excluding bad data values and of how the config file relates to yaml, json, and xml. default: # To be processed by Ellis lanes path_subject_1_raw: &quot;data-public/raw/subject-1.csv&quot; path_mlm_1_raw: &quot;data-public/raw/mlm-1.csv&quot; # Central Database (produced by Ellis lanes). path_database: &quot;data-public/derived/db.sqlite3&quot; # Analysis-ready datasets (produced by scribes &amp; consumed by analyses). path_mlm_1_derived: &quot;data-public/derived/mlm-1.rds&quot; # Metadata path_annotation: &quot;data-public/metadata/cqi-annotation.csv&quot; # Logging errors and messages from automated execution. path_log_flow: !expr strftime(Sys.time(), &quot;data-unshared/log/flow-%Y-%m-%d--%H-%M-%S.log&quot;) # time_zone_local : &quot;America/Chicago&quot; # Force local time, in case remotely run. # ---- Validation Ranges &amp; Patterns ---- range_record_id : !expr c(1L, 999999L) range_dob : !expr c(as.Date(&quot;2010-01-01&quot;), Sys.Date() + lubridate::days(1)) range_datetime_entry : !expr c(as.POSIXct(&quot;2019-01-01&quot;, tz=&quot;America/Chicago&quot;), Sys.time()) max_age : 25 pattern_mrn : &quot;^E\\\\d{9}$&quot; # An &#39;E&#39;, followed by 9 digits. 6.1.2 flow.R The workflow of the repo is determined by flow.R. It calls (typically R and SQL) files in a specific order, while sending the log messages to a file. See automation mediators for more details. 6.1.3 README.md 6.1.4 *.Rproj The Rproj file stores project-wide settings used by the RStudio IDE, such how trailing whitespaces are handled. The files major benefit is that it sets the R sessions working directory, which facilitates good discipline about setting a constant location for all files in the repo. Although the plain-text file can be edited directly, we recommend using RStudios dialog box. There is good documentation about Rproj settings. If you are unsure, copy this file to the repos root directory and rename it to match the repo exactly. 6.2 Analysis 6.3 Data Public Raw Derived Metadata Database Original 6.4 Data Unshared 6.5 Documentation 6.6 Manipulation 6.7 Stitched Output 6.8 Utility "],["rest.html", "Chapter 7 Data at Rest 7.1 Data States 7.2 Data Containers 7.3 Storage Conventions", " Chapter 7 Data at Rest 7.1 Data States Raw Derived Project-wide File on Repo Project-wide File on Protected File Server User-specific File on Protected File Server Project-wide Database Original 7.2 Data Containers 7.2.1 csv When exchanging data between two different systems, the preferred format is frequently plain text, where each cell in a record is separated by a comma. This is commonly called a csv a comma separated value file. As opposed to proprietary formats like xlsx or sas7bdat, a csv file is easily opened and parsable by most statistical software, and even conventional text editors and GitHub. 7.2.2 rds 7.2.3 yaml, json, and xml yaml, json, and xml are three plain-text hierarchical formats commonly used when the data structure cannot be naturally represented by a rectangle or a set of rectangles (and therefore it is not a good fit for csv or rds). If you are unsure where to start with a nested dataset, see tidyrs Rectangling vignette. In the same way we advocate for the simplest recoding function that is adequate for the task, we prefer yaml over json, and json over xml. Yaml accommodates most, but not all our needs. Initially it may be tricky to correctly use whitespacing to specify the correct nesting structure in yaml, but once you are familar, the file is easy to read and edit, and the Git diffs can be quickly reviewed. The yaml package reads a yaml file, and returns a (nested) R list; it can also convert an R list into a yaml file. The config package wraps the yaml package to fill a common need: retrieving repository configuration information from a yaml file. We recommend using the config package when it fits. In some ways its functionality is a simplification of the yaml package, but it is an extension in other ways. For example, when a value follows !expr, R will evaluate the expression. We commonly specify the allowable ranges for variables in config.yml range_dob : !expr c(as.Date(&quot;2010-01-01&quot;), Sys.Date() + lubridate::days(1)) See the discussion of the config.yml in our prototypical repository, as well. 7.2.4 Arrow Apache Arrow is an open source specification that is developed to work with many languages such as R, Spark, Python, and many others. It accommodates nice rectangles where CSVs are used, and hierarchical nesting where json and xml are used. It is both an in-memory specification (which allows a Python process to directly access an R object), and an on-disk specification (which allows a Python process to read a saved R file). The file format is compressed, so it takes much less space to store on disk and less time to transfer over a network. Its downside is the file is not plain-text, but binary. That means the file is not readable and editable by as many programs, which hurts your projects portability. You wouldnt want to store most metadata files as arrow because then your collaborators couldnt easily help you map the values to qqq 7.2.5 SQLite 7.2.6 Central Enterprise database 7.2.7 Central REDCap database 7.2.8 Containers to avoid 7.2.8.1 Spreadsheets Try not to receive data in Excel files. We think Excel can be useful for light brainstorming and prototyping equations but is should not be trusted to transport serious information. Other spreadsheet software like LibreOffice Calc is less problematic in our experience, but still less desirable than the formats mentioned above. If you receive a csv and open it in a typical spreadsheet program, we strongly recommend to you do not save it, because of the potential for mangling values. After you close the spreadsheet, review the Git commits to verify no values were corrupted. See the appendix for a list of the ways your analyses can be undermined when receiving Excel files, as well as a template to correspond with your less-experienced colleagues that is sending your team Excel files. 7.2.8.2 Proprietary Proprietary formats like SASs sas7bdat are less accessible to people without the current expensive software licenses. Therefore distributing proprietary file formats hurts reproducibility and decreases your projects impact. On the other hand, using proprietary formats may be advantageous when you need to conceal the projects failure. We formerly distributed sas7bdat files to supplement (otherwise identical) csvs, in order to cater to the suprisingly large population of SAS users who were unfamiliar with proc import or the Google search engine. Recently we have distributed only the csvs, with example code for reading the file from SAS. 7.3 Storage Conventions 7.3.1 All Sources Across all file formats, these conventions usually work best. consistency across versions: use a script to produce the dataset, and inform the recipient if the datasets structure changes. Most of our processes are automated, and changes that are trivial to humans (e.g., yyyy-mm-dd to mm/dd-yy) will break the automation. The specificity in our automation is intentional. We install guards on our processes so that bad values do not pass. For instance, we may place bounds on the toddlers age at 12 and 36 months. We want our automation to break if the next dataset contains age values between 1 and 3 (years). Our downstream analysis (say, a regression model where age is a predictor variable) would produce misleading results if the shift between months and years went undetected. date format: specify as YYYY-MM-DD (ISO-8601) time format: specify as HH:MM or HH:MM:SS, preferably in 24-hour time. Use a leading zero from midnight to 9:59am, with a colon separating hours, minutes, and seconds (i.e., 09:59) patient names: separate the name_last, name_first, and name_middle as three distinct variables when possible. currency: represent money as an integer or floating-point variable. This representation is more easily parsable by software, and enables mathematical operations (like max() or mean()) to be performed directly. Avoid commas and symbols like $. If there is a possibility of ambiguity, indicate the denomination in the variable name (e.g., payment_dollars or payment_euros). 7.3.2 Text These conventions usually work best within plain-text formats. csv: comma separated values are the most common plain-text format, so they have better support than similar formats where cells are separated by tabs or semi-colons. However, if you are receiving a well-behaved file separated by these characters, be thankful and go with the flow. cells enclosed in quotes: a cell should be enclosed in double quotes, especially if its a string/character variable. 7.3.3 Excel As discussed above avoid Excel. When that is not possible, these conventions helps reduce ambiguity and corrupted values. See the appendix for our preferred approach to reading Excel files. avoid multiple tabs/worksheets: Excel files containing multiple worksheets are more complicated to read with automation, and the produces the opportunities for inconsistent variables across tabs/worksheets. save the cells as text: avoiding Excel attempting to save cells as dates or numbers. Admitedly, this is a last-ditch effort. If someone is using Excel to convert cells to text, the values are probably already corrupted. 7.3.4 Meditech patient identifier: mrn_meditech instead of mrn, MRN Rec#, or Med Rec#. account/admission identifier: account_number instead of mrn, Acct#, or Account#. patients full name: name_full instead of Patient Name or Name. long/tall format: one row per dx per patient (up to 50 dxs) instead of 50 columns of dx per patient. Applies to diagnosis code &amp; description order date &amp; number procedure name &amp; number Meditech Idiosyncracies: blood pressure: in most systems the bp_diastolic and bp_systolic values are stored in separate integer variables. In Meditech, they are stored in a single character variable, separated by a forward slash. 7.3.5 Databases When exchanging data between two different systems,  "],["patterns.html", "Chapter 8 Patterns 8.1 Ellis 8.2 Arch 8.3 Ferry 8.4 Scribe 8.5 Analysis 8.6 Presentation -Static 8.7 Presentation -Interactive 8.8 Metadata", " Chapter 8 Patterns 8.1 Ellis 8.1.1 Purpose To incorporate outside data source into your system safely. 8.1.2 Philosophy Without data immigration, all warehouses are useless. Embrace the power of fresh information in a way that is: repeatable when the data source is updated (and you have to refresh your warehouse) similar to other Ellis lanes (that are designed for other data sources) so you dont have to learn/remember an entirely new pattern. (Like Rubiks cube instructions.) 8.1.3 Guidelines Take small bites. Like all software development, dont tackle all the complexity the first time. Start by processing only the important columns before incorporating move. Use only the variables you need in the short-term, especially for new projects. As everyone knows, the variables from the upstream source can change. Dont spend effort writing code for variables you wont need for a few months/years; theyll likely change before you need them. After a row passes through the verify-values chunk, youre accountable for any failures it causes in your warehouse. All analysts know that external data is messy, so dont be surprised. Sometimes Ill spend an hour writing an Ellis for 6 columns. Narrowly define each Ellis lane. One code file should strive to (a) consume only one CSV and (b) produce only one table. Exceptions include: if multiple input files are related, and really belong together (e.g., one CSV per month, or one CSV per clinic). This scenario is pretty common. if the CSV should legitimately produce two different tables after munging. This happens infrequently, such as one warehouse table needs to be wide, and another long. 8.1.4 Examples https://github.com/wibeasley/RAnalysisSkeleton/blob/master/manipulation/te-ellis.R https://github.com/wibeasley/RAnalysisSkeleton/blob/master/manipulation/ https://github.com/OuhscBbmc/usnavy-billets/blob/master/manipulation/survey-ellis.R 8.1.5 Elements Clear memory In scripting languages like R (unlike compiled languages like Java), its easy for old variables to hang around. Explicitly clear them before you run the file again. rm(list=ls(all=TRUE)) #Clear the memory of variables from previous run. This is not called by knitr, because it&#39;s above the first chunk. Load Sources In R, a source()d file is run to execute its code. We prefer that a sourced file only load variables (like function definitions), instead of do real operations like read a dataset or perform a calculation. There are many times that you want a function to be available to multiple files in a repo; there are two approaches we like. The first is collecting those common functions into a single file (and then sourcing it in the callers). The second is to make the repo a legitimate R package. The first approach is better suited for quick &amp; easy development. The second allows you to add documentation and unit tests. # ---- load-sources ------------------------------------------------------------ source(&quot;./manipulation/osdh/ellis/common-ellis.R&quot;) Load Packages This is another precaution necessary in a scripting language. Determine if the necessary packages are available on the machine. Avoiding attaching packages (with the library() function) when possible. Their functions dont need to be qualified (e.g., dplyr::intersect()) and could cause naming conflicts. Even if you can guarantee they dont conflict with packages now, packages could add new functions in the future that do conflict. # ---- load-packages ----------------------------------------------------------- # Attach these package(s) so their functions don&#39;t need to be qualified: http://r-pkgs.had.co.nz/namespace.html#search-path library(magrittr , quietly=TRUE) library(DBI , quietly=TRUE) # Verify these packages are available on the machine, but their functions need to be qualified: http://r-pkgs.had.co.nz/namespace.html#search-path requireNamespace(&quot;readr&quot; ) requireNamespace(&quot;tidyr&quot; ) requireNamespace(&quot;dplyr&quot; ) # Avoid attaching dplyr, b/c its function names conflict with a lot of packages (esp base, stats, and plyr). requireNamespace(&quot;testit&quot;) requireNamespace(&quot;checkmate&quot;) requireNamespace(&quot;OuhscMunge&quot;) # remotes::install_github(repo=&quot;OuhscBbmc/OuhscMunge&quot;) Declare Global Variables and Functions. This includes defining the expected column names and types of the data sources; use readr::cols_only() (as opposed to readr::cols()) to ignore any new columns that may be been added since the datasets last refresh. # ---- declare-globals --------------------------------------------------------- Load Data Source(s) See load-data chunk described in the prototypical file. # ---- load-data --------------------------------------------------------------- Tweak Data See tweak-data chunk described in the prototypical file. # ---- tweak-data -------------------------------------------------------------- Body of the Ellis Verify Specify Columns See specify-columns-to-upload chunk described in the prototypical file. # ---- specify-columns-to-upload ----------------------------------------------- Welcome into your warehouse. Until this chunk, nothing should be persisted. # ---- save-to-db -------------------------------------------------------------- # ---- save-to-disk ------------------------------------------------------------ 8.2 Arch 8.3 Ferry 8.4 Scribe 8.5 Analysis 8.6 Presentation -Static 8.7 Presentation -Interactive 8.8 Metadata Survey items can change across time (for justified and unjustified reasons). We prefer to dedicate a metadata csv to a single variable https://github.com/LiveOak/vasquez-mexican-census-1/issues/17#issuecomment-567254695 relationship_id code_2011 code_2016 relationship display_order description_2011 description_2016 1 1 1 Jefe(a) 1 Jefe(a) Jefe(a) 2 2 2 Esposo(a) o compañero(a) 2 Esposo(a) o compañero(a) Esposo(a) o compañero(a) 3 3 3 Hijo(a) 3 Hijo(a) Hijo(a) 4 4 4 Nieto(a) 4 Nieto(a) Nieto(a) 5 5 5 Yerno/nuera 5 Yerno/nuera Yerno/nuera 6 6 6 Hermano(a) 6 Hermano(a) Hermano(a) 7 7 NA Sobrino(a) 7 Sobrino(a) NA 8 8 NA Padre o madre 8 Padre o madre NA 9 9 NA Suegro(a) 9 Suegro(a) NA 10 10 NA Cuñado(a) 10 Cuñado(a) Cuñado(a) 11 11 7 Otros parientes 11 Otros parientes Otros parientes 12 12 8 No parientes 12 No parientes No parientes 13 13 9 Empleado(a) doméstico(a) 13 Empleado(a) doméstico(a) Empleado(a) doméstico(a) 99 99 NA No especificado 99 No especificado NA 8.8.1 Primary Rules for Mapping A few important rules are necessary to map concepts in this multidimensional space. each variable gets its own csv, such as relationship.csv (show above), education.csv, living-status.csv, or race.csv. Its easiest if this file name matches the variable. each variable also needs a unique integer that identifies the underlying level in the database, such as education_id, living_status_id, and relationship_id. each survey wave gets its own column within the csv, such as code_2011 and code_2016. each level within a variable-wave gets its own row, like Jefe, Esposo, and Hijo. 8.8.2 Secondary Rules for Mapping In this scenarios, the first three columns are critical (i.e., relationship_id, code_2011, code_2016). Yet these additional guidelines will help the plumbing and manipulation of lookup variables. each variable also needs a unique name that identifies the underlying level for human, such as education, living_status, and relationship. This is the human label corresponding to relationship_id. Its easiest if this column name matches the variable. each survey wave gets its own column within the csv, such as description_2011 and description_2016. These are the human labels corresponding to variables like code_2011 and code_2016. each variable benefits from a unique display order value, that will be used later in analyses. Categorical variables typically have some desired sequence in graph legends and tables; specify that order here. This helps define the factor levels in R or the pandas.Categorical levels in Python. Mappings are usually informed by outside documentation. For transparency and maintainability, clearly describe where the documentation can be found. One option is to include it in data-public/metadata/README.md. Another option is to include it at the bottonm of the csv, preceded by a #, or some comment character that can keep the csv-parser from treating the notes like data it needs to squeeze into cells. Notes for this example are: # Notes,,,,,, # 2016 codes come from `documentation/2106/fd_endireh2016_dbf.pdf`, pages 14-15,,,,, # 2011 codes come from `documentation/2011/fd_endireh11.xls`, TSDem tab,,,,, sometimes a notes column helps humans keep things straight, especially researchers new to the field/project. In the example above, the notes value in the first row might be jefe means head, not boss. "],["security.html", "Chapter 9 Security &amp; Private Data 9.1 Security Guidelines 9.2 Dataset-level Redaction 9.3 Security for Data at Rest 9.4 File-level permissions 9.5 Database permissions 9.6 Public &amp; Private Repositories", " Chapter 9 Security &amp; Private Data Overview {Include a few paragraphs that describe principles and mentality, and how the following sections contribute.} The reports dataset(s) are preferably stored in REDCap or SQL Server. Theyre absolutely not stored not on GitHub or the local machine. Avoid Microsoft Access, Excel, CSVs, or anything without user accounts. If the PHI must be stored as a loose file (eg, CSV), keep it on the encrypted file server. Any PHI on a fileserver should be stored in a directory controlled by a fairly restrictive Windows AD group. Only ~4 people on a project probably need access to those files, not all ~20 people on a project. There are many benefits of SQL Server over CSVs or Excel files . Its protected by Odyssey (not just the VPN). It provides auditing logs. It provides schemas to further partition authorization. Real databases arent accidentally emailed or copied to an unsecured location. Transfer PHI into REDCap &amp; SQL Server as early as possible (particularly the CSVs &amp; XLSXs we regularly receive from partners). Temporary and derivative datasets are stored in SQL Server, not as a CSV on the fileserver. 9.1 Security Guidelines If you encounter a decision thats not described by this chapters the security practices, follow these underlying concepts. And of course, consult other people. Principle of least privilege: expose as little as possible. Limit the number of team members. Limit the amount of data (consider rows &amp; columns). Obfuscate values and remove unnecessary PHI in derivative datasets. Redundant layers of protection. A single point of failure shouldnt be enough to breach PHI security. Simplicity when possible. Store data in only two houses (eg, REDCap &amp; SQL Server). Easier to identify &amp; manage than a bunch of PHI CSVs scattered across a dozen folders, with versions. Manipulate your data programmatically, not manually. Your Windows AD account controls everything, indirectly or directly: VPN, Odyssey, file server, SQL, REDCap, &amp; REDCap API. Lock out team members when possible. Its not that you dont trust them with a lot of unnecessary data, its that you dont trust their ex-boyfriends and their coffee shop hackers. 9.2 Dataset-level Redaction Several multi-layered strategies exist to prevent exposing PHI. One approach is simply to reduce the information contained in each variable. Much of the information in a medical record is not useful for modeling or descriptive statistics, and therefore can be omitted from downstream datasets. The techniques include: Remove the variable: An empty bucket has nothing to leak. Decrease the resolution: Many times, a patients year of birth is adequate for analysis, and include the month and day are unnecessary risks. Hash and salt identifiers: use cryptographic-quality algorithms transform an ID to a derived value. For example, 234 becomes 1432c1a399. The original value of 234 is not recoverable from 1432c1a399. But two rows with 1432c1a399 are still attributed to the same patient by the statistical model. 9.3 Security for Data at Rest The reports dataset(s) are preferably stored in REDCap or SQL Server. Theyre absolutely not stored not on GitHub or the local machine. Avoid Microsoft Access, Excel, CSVs, or anything without user accounts. If the PHI must be stored as a loose file (eg, CSV), keep it on the encrypted file server. Any PHI on a fileserver should be stored in a directory controlled by a fairly restrictive Windows AD group. Only ~4 people on a project probably need access to those files, not all ~20 people on a project. There are many benefits of SQL Server over CSVs or Excel files . Its protected by Odyssey (not just the VPN). It provides auditing logs. It provides schemas to further partition authorization. Real databases arent accidentally emailed or copied to an unsecured location. Transfer PHI into REDCap &amp; SQL Server as early as possible (particularly the CSVs &amp; XLSXs we regularly receive from partners). Temporary and derivative datasets are stored in SQL Server, not as a CSV on the fileserver. Hash values when possible. For instance, when we determine families/networks of people, we use things like SSNs. But the algorithm that identifies the clusters doesnt need to know the actual SSN, just that two records have the same SSN. Something like a SHA-256 hash is good for this. The algorithm can operate on the hashed SSN just as effectively as the real SSN. However the original SSN cant be determined from its hashed value. If the table is accidentally exposed to the public, no PHI is compromised. The following two files help the hashing &amp; salting process: HashUtility.R and CreateSalt.R. 9.4 File-level permissions 9.5 Database permissions 9.6 Public &amp; Private Repositories 9.6.1 Repo Rules A code repository should be private, and restricted to only the necessary project members. The repo should be controled by an OUHSC organization, and not by an individuals private account. The .gitignore file prohibits common data file formats from being pushed/uploaded to the central repository. Examples: accdb, mdb, xlsx, csv, sas7bdat, rdata, RHistory. If you have a text file without PHI that must be on GitHub, create a new extension for it like *.PhiFree. Or you can include a specific exception to the .gitignore file, but adding an exclamation point in front of the file, such as !RecruitmentProductivity/RecruitingZones/ZipcodesToZone.csv. An example is included in the current repositorys [.gitignore file(https://github.com/OuhscBbmc/RedcapExamplesAndPatterns/blob/master/.gitignore). 9.6.2 Scrubbing GitHub history Occasionally files may be committed to your git repository that need to be removed completely. Not just from the current collections of files (i.e., the branchs head), but from the entire history of the repo. Scrubbing is require typically when (a) a sensitive file has been accidentally committed and pushed to GitHub, or (b) a huge file has bloated your repository and disrupted productivity. The two suitable scrubbing approaches both require the command line. The first is the git-filter-branch command within git, and the second is the BFG repo-cleaner. We use the second approach, which is [recommended by GitHub]; it requires 15 minutes to install and configure from scratch, but then is much easier to develop against, and executes much faster. The bash-centric steps below remove any files from the repo history called monster-data.csv from the bloated repository. If the file contains passwords, change them immediately. Delete monster-data.csv from your branch and push the commit to GitHub. Ask your collaborators to push any outstanding commits to GitHub and delete their local copy of the repo. Once scrubbing is complete, they will re-clone it. Download and install the most recent Java JRE from the Oracle site. Download the most recent jar file from the BFG site to the home directory. Clone a fresh copy of the repository in the users home directory. The --mirror argument avoids downloading every file, and downloads only the bookkeeping details required for scrubbing. cd ~ git clone --mirror https://github.com/your-org/bloated.git Remove all files (in any directory) called monster-data.csv. java -jar bfg-*.jar --delete-files monster-data.csv bloated.git Reflog and garbage collect the repo. cd bloated.git git reflog expire --expire=now --all &amp;&amp; git gc --prune=now --aggressive Push your local changes to the GitHub server. git push Delete the bfg jar from the home directory. cd ~ rm bfg-*.jar Ask your collaborators to re-clone the repo to their local machine. It is important they restart with a fresh copy, so the once-scrubbed file is not reintroduced into the repos history. If the file contains sensitive information, like passwords or PHI, ask GitHub to refresh the cache so the files history isnt accessible through their website, even if the repo is private. 9.6.2.0.1 Resources BFG Repo-Cleaner site Additional BFG instructions GitHub Sensitive Data Removal Policy "],["automation.html", "Chapter 10 Automation &amp; Reproducibility 10.1 Mediator 10.2 Scheduling 10.3 Auxiliary Issues", " Chapter 10 Automation &amp; Reproducibility Automation is an important prerequisite of reproducibility. 10.1 Mediator A nontrivial project usually has multiple stages in its pipeline. Instead of a human deciding when to execute which piece, a single file should execute the pieces. The single file makes the project more portable, and also clearly documents the process. This single file is a special cases of the mediator pattern, in the sense that it defines how each piece relates to each other. 10.1.1 Flow File in R {Describe https://github.com/wibeasley/RAnalysisSkeleton/blob/master/flow.R.} See also the prototypical repo. 10.1.2 Makefile {Briefly describe this language, how it can be more efficient, and what additional obstacles it presents.} 10.1.3 SSIS {Describe SSIS package development.} 10.2 Scheduling 10.2.1 cron cron is the common choice when scheduling tasks on Linux. A plain text file specifies which file to run, and on what recurring schedule. A lot of helpful documentation and tutorials exists, as well as sites that help construct and validate your entries like crontab guru. 10.2.2 Task Scheduler Windows Task Scheduler is the common choice when scheduling tasks on Windows. Many of the GUI options are easy to specify, but three are error-prone, and must be specified carefully. The exist under Actions | Start a program. Program/script: is the absolute path to Rscript.exe. It needs to be updated every time you upgrade R (unless youre doing something tricky with the PATH environmental OS variable). Notice we are using the patched version of R. The entry should be enclosed in quotes. &quot;C:\\Program Files\\R\\R-3.6.2patched\\bin\\Rscript.exe&quot; Add arguments (optional): specifies the flow file to run. In this case, the repo butcher-hearing-screen-1 is under in the Documents/cdw/` directory; the flow file is located in the repos root directory, as discussed in the prototypical repo. The entry should be enclosed in quotes. &quot;C:\\Users\\wbeasley\\Documents\\cdw\\butcher-hearing-screen-1\\flow.R&quot; Start in (optional): sets the working directory. If not properly set, the relative paths of the files will not point to the correct locations. It should be identical to the entry above, but (a) does not include /flow.R and (b) does NOT contains quotes. C:\\Users\\wbeasley\\Documents\\cdw\\butcher-hearing-screen-1 Other options we typically specify are: Run whether the user is logged in or not. Run as the highest available version of Windows. Wake the computer to run this task is probably necessary if this is located on a normal desktop. It is not something we specify, because our tasks are located on a VM-based workstation that is never turned off. Following these instructions, you are required to enter your password every time you modify the task, and every time you update your password. If you are using network credentials, you probably should specify your account like domain/username. Be careful: when you modify a task and are prompted for a password, the GUI subtly alters the account entry to just username (instead of domain/username). Make sure you prepend the username with the domain, as you enter the password. 10.2.3 SQL Server Agent SQL Server Agent executes jobs on a specified schedule. It also naturally interfaces with SSIS packages deployed to the server, but can also execute other formats, like a plain sql file. An important distinction is that it runs as a service on the database server, as opposed to Task Scheduler, which runs as a service on the client machine. We prefer running jobs on the server when the job either: requires elevated/administrative privileges (for instance, to access sensitive data), would require a lot of network constraints when passing large amounts of data between the server and client, or feels like it is the servers responsibility, such as rebuilding a database index, or archiving server logs. 10.3 Auxiliary Issues The following subsections do not execute or schedule any code, but should be considered. 10.3.1 Sink Log Files {Describe how to sink output to a file that can be examined easily.} 10.3.2 Package Versions When a project runs repeatedly on a schedule without human intervention, errors can easily go undetected in simple systems. And when they are, the error messages may not be as clear as when you are running the procedure in RStudio. For these and other reasons, plan your strategy for maintaining the version of R and its packages. Here are some approaches, with different tradeoffs. For most conventional projects, we keep all packages up to date, and live with the occasional breaks. We stick to a practice of (a) run our daily workflow, (b) update the packages (and R &amp; RStudio if necessary), (c) rereun that same weeks workflow, and finally (d) verify that the results from a &amp; c are the same. If something is different, we have a day to adapt the pipeline code to the breaking changes in the packages. Before updating a package, read the NEWS file for changes that are not backwards-compatible (commonly called breaking changes in the news file). If the changes to the pipeline code are too difficult to complete in a day, we can roll back to a previous version with remotes::install_version(). On the other side of the spectrum, you can meticulously specify the desired version of each R package. This approach reduces the chance of a new version of a package breaking existing pipeline code. We recommend this approach when uptime is very important. The most intuitive implementation is to install with explicit code in a file like utility/install-dependencies.R: remotes::install_version(&quot;dplyr&quot; , version = &quot;0.4.3&quot; ) remotes::install_version(&quot;ggplot2&quot; , version = &quot;2.0.0&quot; ) remotes::install_version(&quot;data.table&quot;, version = &quot;1.10.4&quot;) remotes::install_version(&quot;lubridate&quot; , version = &quot;1.6.0&quot; ) remotes::install_version(&quot;openxlsx&quot; , version = &quot;4.0.17&quot;) # ... package list continues ... Another implementation is to convert the repo to a package itself, and specify the versions in the DESCRIPTION file. Imports: dplyr (== 0.4.3 ) ggplot2 (== 2.0.0 ) data.table (== 1.10.4) lubridate (== 1.6.0 ) openxlsx (== 4.0.17) A downside is that it can be difficult to set up a identical machine in a few months. Sometimes these packages have depend on packages that are incompatible with other package versions. For example, at one point, the current version of dplyr was 0.4.3. A few months later, the rlang package (which wasnt explicitly specified in the list of 42 packages) required at least version 0.8.0 of dplyr. The developer on the new machine needs to decide whether to upgrade dplyr (and test for breaking changes in the pipeline) or to install an older version of rlang. A second important downside is that this approach can lock all the users projects to specific outdated package version. We and others5 advocate this approach when your team is experienced with only R, and has a machine dedicated to an important line-of-business workflow. When uptime is important and your team is experienced with other languages like Java, Python, or C#, consider if those would be better suited. A compromise between these two previous approaches in the renv package - R Environmentals. It is a successor to packrat. It requires some learning and cognitive overhead. But this investment becomes very appealing if (a) you were running hourly predictions and downtime is a big deal, or (b) your machine contains multiple projects that require different versions of the same package (such as dplyr 0.4.3 and dplyr 0.8.0). Chris Modzelewski "],["scaling-up.html", "Chapter 11 Scaling Up 11.1 Data Storage 11.2 Data Processing", " Chapter 11 Scaling Up 11.1 Data Storage Local File vs Conventional Database vs Redshift Usage Cases 11.2 Data Processing R vs SQL R vs Spark "],["collaboration.html", "Chapter 12 Parallel Collaboration 12.1 Social Contract 12.2 Code Reviews 12.3 Remote 12.4 Additional Resources 12.5 Loose Notes", " Chapter 12 Parallel Collaboration 12.1 Social Contract Issues Organized Commits &amp; Coherent Diffs Branch &amp; Merge Strategy 12.2 Code Reviews Daily Reviews of PRs Periodic Reviews of Files 12.3 Remote Headset &amp; sharing screens 12.4 Additional Resources (Colin Gillespie 2017), particularly the Efficient collaboration chapter. (Brian Fitzpatrick 2012) 12.5 Loose Notes 12.5.1 GitHub Review your diffs before committing. Check for things like accidental deletions and debugging code that should be deleted (or at least commented out). Keep chatter to a minimum, especially on projects with 3+ people being notified of every issue post. When encountering a problem, Take as much ownership as reasonable. Dont merely report theres an error. If you cant figure it out, ask the question and describe it well. what low-level file &amp; line of code threw the error. how you have tried to solve it. If theres a questionable line/chunk of code, trace its origin. Not for the sake of pointing the finger at someone, but for the sake of understanding its origin and history. 12.5.2 Common Code This involves code/files that multiple people use, like the REDCap arches. Run the file before committing it. Run common downstream files too (e.g., if you make a change to the arch, also run the funnel). If an upstream variable name must change, alert people. Post a GitHub issue to announce it. Tell everyone, and search the repo (ctrl+shift+f in RStudio) to alert specific people who might be affected. H References "],["document.html", "Chapter 13 Documentation 13.1 Team-wide 13.2 Project-specific 13.3 Dataset Origin &amp; Structure 13.4 Issues &amp; Tasks 13.5 Flow Diagrams 13.6 Setting up new machine", " Chapter 13 Documentation 13.1 Team-wide 13.2 Project-specific 13.3 Dataset Origin &amp; Structure 13.4 Issues &amp; Tasks 13.4.1 GitHub Issue Template If you are going to open up a repo/package to the public, consider creating a template for GitHub Issues thats tailored to the repos unique characteristics. Furthermore, invite feedback from your userbase to improve the template. Here is our appeal in REDCapR that produced the Unexpected Behavior issue template: @nutterb @haozhu233, @rparrish, @sybandrew, and any one else, if you have time, please look at the new issue template that is customized for REDCapR/redcapAPI. Id appreciate any feedback that could improve the experience for someone encountering a problem. Id like something to (a) make it easier for the user to provide useful information with less effort and (b) make it easier for us to help more accurately with fewer back-and-forths. And if the template happens to help the user identify and solve the problem without creating the issue then I think everyone is happier too. I think the issue should leverage the Troubleshooter that 10+ people have contributed to. It should help locate the problematic area more quickly. @haozhu233, it seems youve liked the template in kableExtra. REDCapR is different in the sense its more difficult to provide a minimal &amp; self-contained example to reproduce the problem. But with your experience with so many users and issues, Id love any advice. @nutterb, Id like this template to be helpful to redcapAPI too. There are only three quick find-and-replace occurrences of REDCapR -&gt; redcapAPI. And those were mostly to distinguish the R package from REDCap itself. 13.5 Flow Diagrams 13.6 Setting up new machine (example) "],["style.html", "Chapter 14 Style Guide 14.1 Readability 14.2 Datasets 14.3 Categorical Variables 14.4 Dates 14.5 Naming 14.6 Whitespace 14.7 Database 14.8 ggplot2", " Chapter 14 Style Guide Using a consistent style across your projects can increase the overhead as your data science team discusses options, decides on a good choice, and develops in compliant code. But like in most themes in this document, the cost is worth the effort. Unforced code errors are reduced when code is consistent, because mistake-prone styles are more apparent. For the most part, our team follows the tidyverse style. Here are some additional conventions we attempt to follow. Many of these were inspired by (Francesco Balena 2005). 14.1 Readability 14.1.1 Number The word number is ambiguous, especially in data science. Try for these more specific terms: count: the number of discrete objects or events, such as visit_count, pt_count, dx_count. id: a value that uniquely identifies an entity that doesnt change over time, such as pt_id, clinic_id, client_id, index: a 1-based sequence thats typically temporary, but unique within the dataset. For instance, pt_index 195 in Tuesdays dataset is like;y a different person than pt_index 195 on Wednesday. On any given day, there is only one value of 195. tag: it is persistent across time like id, but typically created by the analysts and send to the research team. See the snippet in the appendix for an example. tally: a running count duration: a length of time. Specify the units if it not self-evident. physical and statistical quantities like depth, length, mass, mean, and sum. 14.1.2 Abbreviations Try to avoid abbreviations. Different people tend to shorten words differently; this variability increases the chance that people reference the wrong variable. At very least, it wastes time trying to remember if subject_number, subject_num, or subject_no was used. The Consistency section describes how this can reduce errors and increase efficiency. However, some terms are too long to reasonably use without shortening. We make some exceptions, such as the following scenarios: humans commonly use the term orally. For instance, people tend to say OR instead of operating room. your team has agreed on set list of abbreviations. The list for our CDW team includes: appt (not apt), cdw, cpt, drg (stands for diagnosis-related group), dx, hx, icd pt, and vr (vital records). When your team choose terms (e.g., apt vs appt), try to use a standard vocabulary, such as MedTerms Medical Dictionary. 14.2 Datasets 14.2.1 Filtering Rows Removing datasets rows is an important operation that is a frequent source of sneaky errors. These practices have hopefully reduced our mistakes and improved maintainability. 14.2.1.1 Dropping rows with missing values tidyr::drop_na() drops rows with a missing value in a specific column. # Good ds %&gt;% tidyr::drop_na(dob) is cleaner to read and write than these two styles. In particular, its easy to forget/overlook a !. # Worse ds %&gt;% dplyr::filter(!is.na(dob)) # Worst ds[!is.na(ds$dob), ] 14.2.1.2 Mimic number line When ordering quantities, go smallest-to-largest as you type left-to-right. 14.2.1.3 Searchable verbs Youve probably asked in frustration, Where did all the rows go? I had 1,000 in the middle of the file, but now have only 782. Try to keep a consistent tools for filtering, so you can ctrl+f only a handful of terms, such as filter, drop_na, and summarize/summarise. Its more difficult to highlight the When using the base Rs filtering style, (e.g., ds &lt;- ds[4 &lt;= ds$count, ]). 14.2.2 Dont attach As the Google Stylesheet says, The possibilities for creating errors when using attach() are numerous. 14.3 Categorical Variables There are lots of names for a categorical variable across the different disciplines (e.g., factor, categorical, ). 14.3.1 Explicit Missing Values Define a level like \"unknown\" so the data manipulation doesnt have to test for both is.na(x) and x==\"unknown\". The explicit labels also helps when included in a statistical procedure and coefficient table. 14.3.2 Granularity Sometimes it helps to represent the values differently, say a granular and a coarse way. We say cut7 or cut3 to denotes the number of levels; this is related to base::cut(). unknown and other are frequently levels, and they count toward the quantity. # Inside a dplyr::mutate() clause education_cut7 = dplyr::recode( education_cut7, &quot;No Highschool Degree / GED&quot; = &quot;no diploma&quot;, &quot;High School Degree / GED&quot; = &quot;diploma&quot;, &quot;Some College&quot; = &quot;some college&quot;, &quot;Associate&#39;s Degree&quot; = &quot;associate&quot;, &quot;Bachelor&#39;s Degree&quot; = &quot;bachelor&quot;, &quot;Post-graduate degree&quot; = &quot;post-grad&quot;, &quot;Unknown&quot; = &quot;unknown&quot;, .missing = &quot;unknown&quot;, ), education_cut3 = dplyr::recode( education_cut7, &quot;no diploma&quot; = &quot;no bachelor&quot;, &quot;diploma&quot; = &quot;no bachelor&quot;, &quot;some college&quot; = &quot;no bachelor&quot;, &quot;associate&quot; = &quot;no bachelor&quot;, &quot;bachelor&quot; = &quot;bachelor&quot;, &quot;post-grad&quot; = &quot;bachelor&quot;, &quot;unknown&quot; = &quot;unknown&quot;, ), education_cut7 = factor(education_cut7, levels=c( &quot;no diploma&quot;, &quot;diploma&quot;, &quot;some college&quot;, &quot;associate&quot;, &quot;bachelor&quot;, &quot;post-grad&quot;, &quot;unknown&quot; )), education_cut3 = factor(education_cut3, levels=c( &quot;no bachelor&quot;, &quot;bachelor&quot;, &quot;unknown&quot; )) 14.4 Dates yob is an integer, but mob and wob are dates. Typically months are collapsed to the 15th day and weeks are collapsed to Monday, which are the defaults of OuhscMunge::clump_month_date() and OuhscMunge::clump_week_date(). These help obfuscate the real value, if PHI is involved. Months are centered because the midpoint is usually a better representation of the months performance than the months initial day. birth_month_index can be values 1 through 12, while birth_month (or commonly mob) contains the year (e.g., 2014-07-15). Dont use the minus operator (i.e., -). See Defensive Date Arithmetic. 14.5 Naming 14.5.1 Variables This builds upon the tidyverse style guide for objects. 14.5.1.1 Characters Use lowercase letters, using underscores to separate words. Avoid uppercase letters and periods. 14.5.1.2 Lexigraphical Sorting For variables including multiple nouns or adjectives, use lexigraphical sorting. The bigger term goes first. # Good: parent_name_last parent_name_first parent_dob kid_name_last kid_name_first kid_dob # Bad: last_name_parent first_name_parent dob_parent last_name_kid first_name_kid dob_kid Large datasets with multiple questionaries (each with multiple subsections) are much more managable when the variables follow a lexigraphical order. SELECT asq3_medical_problems_01 ,asq3_medical_problems_02 ,asq3_medical_problems_03 ,asq3_behavior_concerns_01 ,asq3_behavior_concerns_02 ,asq3_behavior_concerns_03 ,asq3_worry_01 ,asq3_worry_02 ,asq3_worry_03 ,wai_01_steps_beneficial ,wai_02_hv_useful ,wai_03_parent_likes_me ,wai_04_hv_doubts ,hri_01_client_input ,hri_02_problems_discussed ,hri_03_addressing_problems_clarity ,hri_04_goals_discussed FROM miechv.gpav_3 14.5.2 Files and Folders Naming filers and their folders/directories follows the style of naming variables, with one small difference: separate words with dashes (i.e., -), not underscores (i.e., _). Infrequently, well use a dash if it helps identify a noun (that already contains an underscore). For instance, if theres a table called patient_demographics, we might call the files patient_demographics-truncate.sql and patient_demographics-insert.sql. Using lower case is important because some databases and operating systems are case-sensitive, and some are case-insensitive. To promote portability, keep everything lowercase. Again, file and folder names should contain only (a) lowercase letters, (b) digits, (c) dashes, and (d) an occassional dash. Do not include spaces, uppercase letters, and especially punctuation, such as : or (. 14.5.3 Datasets data.frames are used in almost every analysis file, so we put extra effort formulating conventions that are informative and consistent. Naming datasets follows the style of naming variables, with a few additional features. In the R world, dataset is typically a synonym of data.frame a rectangular structure of rows and columns. The database equivalent of a conventional table. Note that dataset means a collections of tables in the the .NET world, and a collection of (not-necessarily-rectangular) files in Dataverse.6 14.5.3.1 Prefix with ds_ and d_ Datasets are handled so differently than other variables that we find its easier to identify its type and scope. The prefix ds_ indicates the dataset is available to the entire file, while d_ indicates the scope is localized to a function. count_elements &lt;- function (d) { nrow(d) * ncol(d) } ds &lt;- mtcars count_elements(d = ds) 14.5.3.2 Express the grain The grain of a dataset describes what each row represents, which is a similar idea to the statisticians concept of unit of analysis. Essentially it the the most granular entity described. Many miscommunications and silly mistakes are avoided when your team is disciplined enough to define a tidy dataset with a clear grain. ds_student # One row per student ds_teacher # One row per teacher ds_course # One row per course ds_course_student # One row per student-course combination ds_pt # One row per patient ds_pt_visit # One row per patient-visit combination ds_visit # Same as above, since it&#39;s clear a visit is connected w/ a pt For more insight into grains, Ralph Kimball writes In debugging literally thousands of dimensional designs from my students over the years, I have found that the most frequent design error by far is not declaring the grain of the fact table at the beginning of the design process. If the grain isnt clearly defined, the whole design rests on quicksand. Discussions about candidate dimensions go around in circles, and rogue facts that introduce application errors sneak into the design.  I hope youve noticed some powerful effects from declaring the grain. First, you can visualize the dimensionality of the doctor bill line item very precisely, and you can therefore confidently examine your data sources, deciding whether or not a dimension can be attached to this data. For example, you probably would exclude treatment outcome from this example because most medical billing data doesnt tie to any notion of outcome. 14.5.3.3 Singular table names If you adopt the style that the tables name reflects the grain, this is a corollary. If the grain is singular like one row per client or one row per building, the name should be ds_client and ds_building (not ds_clients and ds_buildings). If these datasets are saved to a database, the tables are called client and building. Table names are plural when the grain is plural. If a record has field like client_id, date_birth, date_graduation and date_death, I suggest called the table client_milestones (because a single row contains three milestones). This Stack Overflow post presents a variety of opinions and justifications when adopting a singular or plural naming scheme. I think its acceptable if the R vectors follow a different style than R data.frames. For instance, a vector can have a plural name even though each element is singular (e.g., client_ids &lt;- c(10, 24, 25)). 14.5.3.4 Use ds when definition is clear Many times an ellis file handles with only one incoming csv and outgoing dataset, and the grain is obvious typically because the ellis filename clearly states the grain. 14.5.3.5 Use an adjective after the grain, if necessary If the same R file is manipulating two datasets with the same grain, qualify their differences after the grain, such as ds_client_all and ds_client_michigan. Adjectives commonly indicate that one dataset is a subset of another. An occasional limitation with our naming scheme is that the difficult to distinguish the grain from the adjective. For instance, is the grain of ds_student_enroll either (a) every instance of a student enrollment (i.e., student and enroll both describe the grain) or (b) the subset of students who enrolled (i.e., student is the grain and enroll is the adjective)? Its not clear without examine the code, comments, or documentation. If someone has a proposed solution, we would love to hear it. So far, weve been reluctant to decorate the variable name more, such as ds_grain_client_adj_enroll. 14.5.3.6 Define the dataset when in doubt If its potentially unclear to a new reader, use a comment immediately before the datasets initial use. # `ds_client_enroll`: # grain: one row per client # subset: only clients who have successfully enrolled are included # source: the `client` database table, where `enroll_count` is 1+. ds_client_enroll &lt;- ... 14.5.4 Semantic sorting Put the biggest term on the left side of the variable. 14.6 Whitespace Although execution is rarely affected by whitespace in R and SQL files, be consistent and minimalistic. One benefit is that Git diffs wont show unnecessary churn. When a line of code lights up in a diff, its nice when reflect a real change, and not something trivial like tabs were converted to spaces, or trailing spaces were added or deleted. Some of these guidelines are handled automatically by modern IDEs, if you configure the correct settings. Tabs should be replaced by spaces. Most modern IDEs have an option to do this for you automatically. (RStudio calls this Insert spaces for tabs.) Indentions should be replaced by a consistent number of spaces, depending on the file type. R: 2 spaces SQL: 2 spaces Python: 4 spaces Each file should end with a blank line. (RStudio calls this Ensure that source files end with newline.) Remove spaces and tabs at the end of lines. (RStudio calls this Strip trailing horizontal whitespace when saving.) 14.7 Database GitLabs data team has a good style guide for databases and sql thats fairly consistent with our style. Some important similarities and differences are Favor CTEs The name of the primary key should typically contain the table. In the employee table, the key should be employee_id, not id. 14.8 ggplot2 The expressiveness of ggplot2 allows someone to quickly develop precise scientific graphics. One graph can be specified in many equivalent styles, which increases the opportunity for confusion. We formalized much of this style while writing a textbook for introductory statistics (Lise DeShea (2015)); the 200+ graphs and their code is publicly available. There are a few additional ggplot2 tips in the tidyverse style guide. 14.8.1 Order of commands ggplot2 is essentially a collection of functions combined with the + operator. Publication graphs common require at least 20 functions, which means the functions can sometimes be redundant or step on each other toes. The family of functions should follow a consistent order ideally starting with the more important structural functions and ending with the cosmetic functions. Our preference is: ggplot() is the primary function to specify the default dataset and aesthetic mappings. Many arguments can be passed to aes(), and we prefer to follow an order consistent with the scale_*() order below. geom_*() and annotate() creates the geometric elements that represent the data. Unlike most categories in this list, the order matters. Geoms specified first are drawn first, and therefore can be obscured by subsequent geoms. scale_*() describes how a dimension of data (specified in aes()) is translated into a visual element. We specify the dimensions in descending order of (typical) importance: x, y, group, color, fill, size, radius, alpha, shape, linetype. coord_*() facet_*() and label_*() guides() theme() (call the big themes like theme_minimal() before overriding the details like theme(panel.grid = element_line(color = \"gray\"))) labs() 14.8.2 Gotchas Here are some common mistakes we see not-so-infrequently (even sometimes in our own code). 14.8.2.1 Zooming Call coord_*() to restrict the plotted x/y values, not scale_*() or lims()/xlim()/ylim(). coord_*() zooms in on the axes, so extreme values essentially fall off the page; in contrast, the latter three functions essentially remove the values from the dataset. The distinction does not matter for a simple bivariate scatterplot, but likely will mislead you and the viewer in two common scenarios. First, a call to geom_smooth() (e.g., that overlays a loess regression curve) ignore the extreme values entirely; consequently the summary location will be misplaced and its standard errors too tight. Second, when a line graph or spaghetti plots contains an extreme value, it is sometimes desirable to zoom in on the the primary area of activity; when calling coord_*(), the trend line will leave and return to the plotting panel (which implies points exist which do not fit the page), yet when calling the others, the trend line will appear interrupted, as if the extreme point is a missing value. 14.8.2.2 Seed When jittering, set the seed in the declare-globals chunk so that rerunning the report wont create a (slightly) different png. The insignificantly different pngs will consume extra space in the Git repository. Also, the GitHub diff will show the difference between png versions, which requires extra cognitive load to determine if the difference is due solely to jittering, or if something really changed in the analysis. H References "],["publication.html", "Chapter 15 Publishing Results 15.1 To Other Analysts 15.2 To Researchers &amp; Content Experts 15.3 To Technical-Phobic Audiences", " Chapter 15 Publishing Results 15.1 To Other Analysts 15.2 To Researchers &amp; Content Experts 15.3 To Technical-Phobic Audiences "],["testing-and-validation.html", "Chapter 16 Testing, Validation, &amp; Defensive Programming 16.1 Testing Functions 16.2 Defensive Programming 16.3 Validator", " Chapter 16 Testing, Validation, &amp; Defensive Programming 16.1 Testing Functions 16.2 Defensive Programming Throwing errors 16.3 Validator Benefits for Analysts Benefits for Data Collectors "],["troubleshooting.html", "Chapter 17 Troubleshooting and Debugging 17.1 Finding Help 17.2 Debugging", " Chapter 17 Troubleshooting and Debugging 17.1 Finding Help Within your group (eg, Thomas and REDCap questions) Within your university (eg, SCUG) Outside (eg, Stack Overflow; GitHub issues) 17.2 Debugging traceback(), browser(), etc "],["workstation.html", "Chapter 18 Workstation 18.1 Required Installation 18.2 Recommended Installation 18.3 Optional Installation 18.4 Asset Locations 18.5 Administrator Installation 18.6 Installation Troubleshooting 18.7 Ubuntu Installation 18.8 Retired Tools", " Chapter 18 Workstation We believe it is important to keep software updated and consistent across workstations in your project. This material was originally posted at https://github.com/OuhscBbmc/RedcapExamplesAndPatterns/blob/master/DocumentationGlobal/ResourcesInstallation.md. It should help establish our tools on a new development computer. 18.1 Required Installation The installation order matters. 18.1.1 R R is the centerpiece of the analysis. Every few months, youll need to download the most recent version. {added Sept 2012} 18.1.2 RStudio RStudio Desktop is the IDE (integrated design interface) that youll use to interact with R, GitHub, Markdown, and LaTeX. Updates can be checked easily through the menus Help -&gt; Check for updates. 18.1.3 Installing R Packages Dozens of R Packages will need to be installed. Choose between one of the two related scripts. It will install from our list of packages that our data analysts typically need. The script installs a package only if its not already installed; also an existing package is updated if a newer version is available. Create a new personal library if it prompts you. It takes at least fifteen minutes, so start it before you go to lunch. The list of packages will evolve over time, so please help keep the list updated. To install our frequently-used packages, run the following snippet. The first lines installs an important package. The second line calls the online Gist, which defines the package_janitor_remote() function. The final line calls the function (and passes a specific CSV of packages)7. if (!base::requireNamespace(&quot;devtools&quot;)) utils::install.packages(&quot;devtools&quot;) devtools::source_gist(&quot;2c5e7459b88ec28b9e8fa0c695b15ee3&quot;, filename=&quot;package-janitor-bbmc.R&quot;) package_janitor_remote( &quot;https://raw.githubusercontent.com/OuhscBbmc/RedcapExamplesAndPatterns/master/utility/package-dependency-list.csv&quot; ) Some of our projects require specialized packages that are not typically used. In these cases, we will develop the git repo as an R package that includes a proper DESCRIPTION file. See RAnalysisSkeleton for an example. When the project is opened in RStudio, update_packages_addin() in OuhscMunge will find the DESCRIPTION file and install the package dependencies. if( !base::requireNamespace(&quot;remotes&quot; ) ) utils::install.packages(&quot;remotes&quot;) if( !base::requireNamespace(&quot;OuhscMunge&quot;) ) remotes::install_github(&quot;OuhscBbmc/OuhscMunge&quot;) OuhscMunge::update_packages_addin() 18.1.4 Updating R Packages Several R packages will need to be updated every weeks. Unless you have been told not to (because it would break something -this is rare), periodically update the packages by executing the following code update.packages(checkBuilt=TRUE). 18.1.5 GitHub GitHub registration is necessary to push modified files to the repository. First, register a free user account, then tell the repository owner your exact username, and they will add you as a collaborator (e.g., to https://github.com/OuhscBbmc/RedcapExamplesAndPatterns). 18.1.6 GitHub Desktop GitHub Desktop does the basic tasks a little easier than the git features built into RStudio. This client is available for Windows and macOS. (Occasionally, someone might need to use git from the command line to fix problems, but this is not required to start.) 18.1.7 R Tools R Tools for Windows is necessary to build some packages in development hosted on GitHub. If running Linux, the components of R Tools are likely already installed on your machine. {added Feb 2017} 18.2 Recommended Installation The installation order does not matter. 18.2.1 ODBC Driver ODBC Driver for SQL Server is for connecting to the token server, if your institution is using one. As of this writing, version 17 is the most recent driver version. See if a new one exists. {updated Apr 2018} 18.2.2 Notepad++ Notepad++ is a text editor that allows you look at the raw text files, such as code and CSVs. For CSVs and other data files, it is helpful when troubleshooting (instead of looking at the file through Excel, which masks &amp; causes some issues). {added Sept 2012} 18.2.3 Azure Data Studio Azure Data Studio (ADS) is now recommended by Microsoft and others for analysts (and some other roles) ahead of SQL Server Management Studio. Note: here are some non-default changes that facilitate our workflow. Settings | Text Editor | Tab Size: 2 {\"editor.tabSize\": 2} Settings | Text Editor | Detect Indentation: uncheck {\"editor.detectIndentation\": false} Settings | Text Editor | Insert Final Newlines: check {\"files.insertFinalNewline\": true} Settings | Text Editor | Trim Final Newlines: check {\"files.trimFinalNewlines\": true} Settings | Text Editor | Trim Trailing Whitespace: check {\"files.trimTrailingWhitespace\": true} Data | Sql | Show Connection Info In Title: uncheck {\"sql.showConnectionInfoInTitle\": false} Data | Sql | Copy Include Headers: check {\"sql.copyIncludeHeaders\": true} 18.2.4 Visual Studio Code Visual Studio Code is an extensible text editor that runs on Windows and Linux, similar to Atom (described above). Its much lighter than the full Visual Studio. Like Atom, it supports browsing through the directory structure, replacing across files, interaction with git, and previewing markdown. Currently, it supports searching CSVs better than Atom. Productivity is enhanced with the following extensions: {added Dec 2018} Excel Viewer isnt a good name, but Ive liked the capability. It displays CSVs and other files in a grid. {added Dec 2018} Rainbow CSV color codes the columns, but still allows you to see and edit the raw plain-text file. {added Dec 2018} SQL Server allows you to execute against a database, and view/copy/save the grid results. It doesnt replicate all SSMS features, but is nice as your scanning through files. {added Dec 2018} Code Spell Checker produces green squiggly lines under words not in its dictionary. You can add words to your user dictionary, or a project dictionary. Markdown All in One has some useful markdown capabilities, such as converting the file to html. Markdown PDF has some useful markdown capbilities, such as converting the file to pdf. markdownlint has linting and style checking. These extensions can be installed by command line. code --list-extensions code --install-extension GrapeCity.gc-excelviewer code --install-extension mechatroner.rainbow-csv code --install-extension ms-mssql.mssql code --install-extension streetsidesoftware.code-spell-checker code --install-extension yzhang.markdown-all-in-one code --install-extension yzane.markdown-pdf code --install-extension DavidAnson.vscode-markdownlint Note: here are some non-default changes that facilitate our workflow. Either copy this configuration into settings.json, or manually specify the options with the settings editor. { &quot;diffEditor.ignoreTrimWhitespace&quot;: false, &quot;diffEditor.maxComputationTime&quot;: 0, &quot;editor.acceptSuggestionOnEnter&quot;: &quot;off&quot;, &quot;editor.renderWhitespace&quot;: &quot;all&quot;, &quot;explorer.confirmDragAndDrop&quot;: false, &quot;files.associations&quot;: { &quot;*.Rmd&quot;: &quot;markdown&quot; }, &quot;files.trimFinalNewlines&quot;: true, &quot;files.trimTrailingWhitespace&quot;: true, &quot;git.autofetch&quot;: true, &quot;git.confirmSync&quot;: false, &quot;window.zoomLevel&quot;: 2, &quot;markdown.extension.orderedList.autoRenumber&quot;: false, &quot;markdown.extension.orderedList.marker&quot;: &quot;one&quot;, &quot;markdownlint.config&quot;: { &quot;MD003&quot;: { &quot;style&quot;: &quot;setext_with_atx&quot; }, &quot;MD007&quot;: { &quot;indent&quot;: 2 }, &quot;MD022&quot;: { &quot;lines_above&quot;: 1, &quot;lines_below&quot;: 1 }, &quot;MD024&quot;: { &quot;siblings_only&quot;: true }, &quot;no-bare-urls&quot;: false, &quot;no-inline-html&quot;: { &quot;allowed_elements&quot;: [ &quot;mermaid&quot;, &quot;a&quot;, &quot;img&quot; ] } } } Settings | Extensions |Markdown All in One | Ordered List | Auto Renumber: false {\"markdown.extension.orderedList.autoRenumber\": false} Settings | Extensions |Markdown All in One | Ordered List | Marker: one {\"markdown.extension.orderedList.marker\": \"one\"} 18.3 Optional Installation The installation order does not matter. 18.3.1 Git Git command-line utility enables some advanced operations that the GitHub client doesnt support. Use the default installation options, except these preferences of ours: 1. Nano is the default text editor. 18.3.2 LibreOffice Calc LibreOffice Calc is an alternative to Excel. Unlike it Excel, it doesnt guess much with formatting (which usually mess up things, especially dates). 18.3.3 pandoc pandoc converts files from one markup format into another. {added Sept 2012} 18.3.4 Python Python is used by some analysts. The prototypical installation involves two options. Anaconda, which include Jupyter Notebooks, Jupyter Lab, and Spyder. Plus two programs that are already on this list: RStudio and VS Code. In Windows, open Anaconda Prompt with administrative privileges conda install numpy pandas scikit-learn matplotlib Standard Python, while installing packages through pip3 in the terminal. If the pip3 command is unrecognized because its missing from the OS path variable, an alternative is py -3 -mpip install pysftp; this calls pip through the py command which is sometimes in the path variable after installation. 18.4 Asset Locations GitHub repository https://github.com/OuhscBbmc/RedcapExamplesAndPatterns {added Sept 2012} File server directory Ask your PI. For Peds, its typically on the S drive. SQL Server Database Ask Thomas, Will or David REDCap database Ask Thomas, Will or David. It is a http url, and were trying not to publicize its value. ODBC UserDsn The name depends on your specific repository, and SQL Server database. Ask Thomas, Will or David for how to set it up. 18.5 Administrator Installation These programs are useful to people administrating servers, but not to the typical data scientist. 18.5.1 MySQL Workbench MySQL Workbench is useful occasionally for REDCap admins. 18.5.2 Postman Postman Native App is useful for developing with the API and has replaced the Chrome app. If thats not possible, a web client is available as well. With either program, do not access any PHI. 18.5.3 SQL Server Management Studio (SSMS) SQL Server Management Studio has been replaced by Azure Data Studio for some roles, but is still recommended for database administrators. It is an easy way to access the database and write queries (and transfer the SQL to an R file). Its not required for the REDCap API, but its usually necessary when integrating REDCap with other databases. Note: here are some non-default changes that facilitate our workflow. The first two help when we save the database structure (not data) on GitHub, so we can easily track/monitor the structural changes over time. The tabs options keeps things consistent between editors. In the SSMS Tools | Options dialog box: SQL Server Object Explorer | Scripting | Include descriptive headers: False SQL Server Object Explorer | Scripting | Script extended properties: False Text Editor | All Languages | Tabs | Tab size: 2 Text Editor | All Languages | Tabs | Indent size: 2 Text Editor | All Languages | Tabs | Insert Spaces: true These dont affect the saved files, but make life easier. The first makes the result font bigger. Environment | Fonts and Colors | Show settings for: Grid Results | Size: 10 Query Results | SQL Server | Results to Grid | Include column headers when copying or saving the results: false` Designers | Table and Database Designers | Prevent saving changes that require table-recreation: false Text Editor | Editor Tab and Status Bar | Tab Text | Include Server Name: false Text Editor | Editor Tab and Status Bar | Tab Text | Include Database Name: false Text Editor | Editor Tab and Status Bar | Tab Text | Include Login Name: false Text Editor | All Languages | General | Line Numbers: true A dark theme is unofficially supported in SSMS 18. If you have write privileges in the Program Files directory, a quick modification to a config file will reduce eye strain. This change also prevents your screen from flashing dark-to-light-to-dark, which broadcasts your wandering attention during a Zoom meeting. For more details, see setting-up-dev-machine.md (in a private repo thats restricted to BBMC members). 18.5.4 WinSCP WinSCP is a GUI for SCP and SFTP file transfer using SSH keys. The tool is occassionally useful for admins when collaborating with other institutions or other OU computing resources. Because PHI can accidentally be sent to collaborators without a DUA, we recommend that WinSCP be installed only informed administrators. The typical data scientist on our teams does not need this tool. 18.6 Installation Troubleshooting Git: Will Beasley resorted to this workaround Sept 2012: http://stackoverflow.com/questions/3431361/git-for-windows-the-program-cant-start-because-libiconv2-dll-is-missing. And then he copied the following four files from D:/Program Files/msysgit/mingw/bin/ to D:/Program Files/msysgit/bin/: (1) libiconv2.dll, (2) libcurl-4.dll, (3) libcrypto.dll, and (4) libssl.dll. (If you install to the default location, youll move instead from C:/msysgit/mingw/bin/ to C:/msysgit/bin/) {added Sept 2012} Git: On a different computer, Will Beasley couldnt get RStudio to recognize msysGit, so installed the Full installer for official Git for Windows 1.7.11 from (http://code.google.com/p/msysgit/downloads/list) and switched the Git Path in the RStudio Options. {added Sept 2012} RStudio If something goes wrong with RStudio, re-installing might not fix the issue, because your personal preferences arent erased. To be safe, you can be thorough and delete the equivalent of C:\\Users\\wibeasley\\AppData\\Local\\RStudio-Desktop\\. The options settings are stored (and can be manipulated) in this extentionless text file: C:\\Users\\wibeasley\\AppData\\Local\\RStudio-Desktop\\monitored\\user-settings\\user-settings. {added Sept 2012} 18.7 Ubuntu Installation Ubuntu desktop 19.04 follows these instructions for the R and RStudio and required these debian packages to be installed before the R packages. The --yes option avoids manual confirmation for each line, so you can copy &amp; paste this into the terminal. Add the following to the sources with sudo nano /etc/apt/sources.list. The eoan version may be updated; The metrocast part could be modified too from this list. I found it worked better for a new Ubuntu release than cloud.r-project.org. # For R 4.0 deb https://cloud.r-project.org/bin/linux/ubuntu focal-cran40/ deb http://mirror.genesisadaptive.com/ubuntu/ focal-backports main restricted universe # For R 3.5 &amp; #.6 deb https://cloud.r-project/bin/linux/ubuntu/ eoan-cran35/ deb-src https://cloud.r-project/bin/linux/ubuntu/ eoan-cran35/ deb http://mirror.metrocast.net/ubuntu/ eoan-backports main restricted universe This next block can be copied and pasted (ctrl-shift-v) into the console entirely. Or lines can be pasted individual (without the ( function install-packages { line, or the last three lines). ( function install-packages { ### Add the key, update the list, then install base R. sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 sudo apt-get update sudo apt-get install r-base r-base-dev ### Git sudo apt-get install git-core git config --global user.email &quot;wibeasley@hotmail.com&quot; git config --global user.name &quot;Will Beasley&quot; git config --global credential.helper &#39;cache --timeout=3600000&#39; ### Ubuntu &amp; Bioconductor packages that are indirectly needed for packages and BBMC scripts # Supports the `locate` command in bash sudo apt-get install mlocate # The genefilter package is needed for &#39;modeest&#39; on CRAN. # No longer a modeest dependency: Rscript -e &#39;BiocManager::install(&quot;genefilter&quot;)&#39; ### CRAN packages that are also on the Ubuntu repositories # The &#39;xml2&#39; package; https://CRAN.R-project.org/package=xml2 sudo apt-get --yes install libxml2-dev r-cran-xml # The &#39;curl&#39; package, and others; https://CRAN.R-project.org/package=curl sudo apt-get --yes install libssl-dev libcurl4-openssl-dev # The &#39;udunits2&#39; package: https://cran.r-project.org/web/packages/udunits2/index.html sudo apt-get --yes install libudunits2-dev # The &#39;odbc&#39; package: https://github.com/r-dbi/odbc#linux---debian--ubuntu sudo apt-get --yes install unixodbc-dev tdsodbc odbc-postgresql libsqliteodbc # The &#39;rgl&#39; package; https://stackoverflow.com/a/39952771/1082435 sudo apt-get --yes install libcgal-dev libglu1-mesa-dev # The &#39;magick&#39; package; https://docs.ropensci.org/magick/articles/intro.html#build-from-source sudo apt-get --yes install &#39;libmagick++-dev&#39; # To compress vignettes when building a package; https://kalimu.github.io/post/checklist-for-r-package-submission-to-cran/ sudo apt-get --yes install qpdf # The &#39;pdftools&#39; and &#39;Rpoppler&#39; packages, which involve PDFs sudo apt-get --yes install libpoppler-cpp-dev libpoppler-glib-dev # The &#39;sys&#39; package sudo apt-get --yes install libapparmor-dev # The &#39;sf&#39; and other spatial packages: https://github.com/r-spatial/sf#ubuntu; https://github.com/r-spatial/sf/pull/1208 sudo apt-get --yes install libudunits2-dev libgdal-dev libgeos-dev libproj-dev libgeos++-dev # For Cairo package, a dependency of Shiny &amp; plotly; https://gykovacsblog.wordpress.com/2017/05/15/installing-cairo-for-r-on-ubuntu-17-04/ sudo apt-get --yes install libcairo2-dev # &#39;rJava&#39; and others; https://www.r-bloggers.com/installing-rjava-on-ubuntu/ sudo apt-get --yes install default-jre default-jdk sudo R CMD javareconf sudo apt-get --yes install r-cran-rjava # For reprex and sometimes ssh keys; https://github.com/tidyverse/reprex#installation sudo apt-get --yes install xclip # gifski -apparently the rust compiler is necessary sudo apt-get --yes install cargo # For databases sudo apt-get --yes install sqlite sqliteman sudo apt-get --yes install postgresql postgresql-contrib pgadmin3 # pandoc sudo apt-get --yes install pandoc # For checking packages. Avoid `/usr/bin/texi2dvi: not found` warning. sudo apt-get install texinfo } install-packages ) The version of pandoc from the Ubuntu repository may be delayed. To install the latest version, download the .deb file then install from the same directory. Finally, verify the version. sudo dpkg -i pandoc-* pandoc -v The Postman native app for Ubuntu is installed through snap, which is updated daily automatically. snap install postman 18.8 Retired Tools We previously installed the software below. Most have been replaced by software above thats either newer or more natural to use. GitLab SSL Certificate isnt software, but still needs to be configured. Talk to Will for the server URL and the *.cer file. Save the file in something like ~/keys/ca-bundle-gitlab.cer Associate the file with git config --global http.sslCAInfo ...path.../ca-bundle-gitlab.cer (but replace ...path...). MiKTeX is necessary only if youre using knitr or Sweave to produce LaTeX files (and not just markdown files). Its a huge, slow installation that can take an hour or two. {added Sept 2012} Pulse Secure is VPN client for OUHSC researchers. Its not required for the REDCap API, but its usually necessary to communicate with other campus data sources. msysGit allows RStudio to track changes and commit &amp; sync them to the GitHub server. Connect RStudio to GitHub repository. I moved this to optional (Oct 14, 2012) because the GitHub client (see above) does almost everything that the RStudio plugin does; and it does it a little better and a little more robust; and its installation hasnt given me problems. {added Oct 2012} Starting in the top right of RStudio, click: Project -&gt; New Project -&gt; Create Project from Version Control -&gt; Git {added Sept 2012} An example of a repository URL is https://github.com/OuhscBbmc/RedcapExamplesAndPatterns. Specify a location to save (a copy of) the project on your local computer. {added Sept 2012} CSVed is a lightweight program for viewing data files. It fits somewhere between a text editor and Excel. SourceTree is a rich client that has many more features than the GitHub client. I dont recommend it for beginners, since it has more ways to mess up things. But for developers, it nicely fills a spot in between the GitHub client and command-line operations. The branching visualization is really nice too. Unfortunately and ironically, it doesnt currently support Linux. {added Sept 2014}. git-cola is probably the best GUI for Git supported on Linux. Its available through the official Ubuntu repositories with apt-get (also see this). The branch visualization features are in a different, but related program, git dag. {added Sept 2014} GitHub for Eclipse is something I discourage for a beginner, and I strongly recommend you start with RStudio (and GitHub Client or the git capabilities within RStudio) for a few months before you even consider Eclipse. Its included in this list for the sake of completeness. When installing EGit plug-in, ignore eclipse site and check out this youtube video:http://www.youtube.com/watch?v=I7fbCE5nWPU. Color Oracle simulates the three most common types of color blindness. If you have produce a color graph in a report you develop, check it with Color Oracle (or ask someone else too). If its already installed, it takes less than 10 second to check it against all three types of color blindness. If its not installed, extra work may be necessary if Java isnt already installed. When you download the zip, extract the ColorOracle.exe program where you like. {added Sept 2012} Atom is a text editor, similar to Notepad++. Notepad++ appears more efficient opening large CSVs. Atom is better suited when editing a lot of files in a repository. For finding and replacing across a lot of files, it is superior to Notepad++ and RStudio; it permits regexes and has a great GUI preview of the potential replacements. Productivity is enhanced with the following Atom packages: Sublime Style Column Selection: Enable Sublime style Column Selection. Just hold alt while you select, or select using your middle mouse button. atom-language-r allows Atom to recognize files as R. This prevents spell checking indicators and enable syntax highlighting. When you need to browse through a lot of scattered R files quickly, Atoms tree panel (on the left) works well. An older alternative is language-r. language-csv: Adds syntax highlighting to CSV files. The highlighting is nice, and it automatically disables spell checking lines. atom-beautify: Beautify HTML, CSS, JavaScript, PHP, Python, Ruby, Java, C, C++, C#, Objective-C, CoffeeScript, TypeScript, Coldfusion, SQL, and more in Atom. atom-wrap-in-tag: wraps tag around selection; just select a word or phrase and hit Alt + Shift + w. minimap: A preview of the full source code (in the right margin). script: Run scripts based on file name, a selection of code, or by line number. git-plus: Do git things without the terminal (I dont think this is necessary anymore). The packages can be installed through Atom, or through the apm utility in the command line: apm install sublime-style-column-selection atom-language-r language-csv atom-beautify atom-wrap-in-tag minimap script And the following settings keep files consistent among developers. File | Settings | Editor | Tab Length: 2 (As opposed to 3 or 4, used in other conventions) File | Settings | Editor | Tab Type: soft (This inserts 2 spaces instead of a tab when Tab is pressed) As an alternative to the Gist, run the local R script install-packages.R (located in the utility/ directory) that lives in this repository. The workhorse of this function is OuhscMunge::package_janitor(). "],["tools.html", "Chapter 19 Considerations when Selecting Tools 19.1 General 19.2 Languages 19.3 R Packages 19.4 Database 19.5 Additional Resources", " Chapter 19 Considerations when Selecting Tools 19.1 General 19.1.1 The Components Goal While discussing the advantages and disadvantages of tools, a colleague once said, Tidyverse packages dont do anything that I cant already do in Base R, and sometimes it even requires more lines of code. Regardless if I agree, I feel these two points are irrelevant. Sometimes the advantage of a tool isnt to expand existing capabilities, but rather to facilitate development and maintenance for the same capability. Likewise, I care less about the line count, and more about the readability. Id prefer to maintain a 20-line chunk that is familiar and readable than a 10-line chunk with dense phrases and unfamiliar functions. The bottleneck for most of our projects is human time, not execution time. 19.1.2 Current Skillset of Team 19.1.3 Desired Future Skillset of Team 19.1.4 Skillset of Audience 19.2 Languages 19.3 R Packages When developing a codebase used by many people, choose packages both on their functionality, as well as their ease of installation and maintainability. For example, the rJava package is a powerful package that allows R package developers to leverage the widespread Java framework and many popular Java packages. However, installing Java and setting the appropriate path or registry settings can be error-prone, especially for non-developers. Therefore when considering between two functions with comparable capabilities (e.g., xlsx::read.xlsx() and readxl::read_excel()), avoid the package that requires a proper installation and configuration of Java and rJava. If the more intensive choice is required (say, you need to a capability in xslx missing from readxl), take: 20 minutes to start a markdown file that enumerates the packages direct and indirect dependencies that require manual configuration (e.g., rJava and Java), where to download them, and the typical installation steps. 5 minutes to create a GitHub Issue that (a) announces the new requirement, (b) describes who/what needs to install the requirement, (c) points to the markdown documentation, and (d) encourages teammates to post their problems, recommendations, and solutions in this issue. Weve found that a dedicated Issue helps communicate that the package dependency necessitates some intention and encourages people to assist other peoples troubleshooting. When something potentially useful is posted in the Issue, move it to the markdown document. Make sure the document and the issue hyperlink to each other. 15 minutes every year to re-evaluate the landscape. Confirm that the package is still actively maintained, and that no newer (and easily- maintained) package offers the desired capability.8 If better fit now exists, evaluate if the effort to transition to the new package is worth the benefit. Be more willing to transition is the project is relatively green, and more development is upcoming. Be more willing to transition if the transition is relatively in-place, and will not require much modification of code or training of people. Finally, consider how much traffic passes through the dependency A brittle dependency will not be too disruptive if isolated in a downstream analysis file run by only one statistician. On the other hand, be very protective in the middle of the pipeline where typically most of your team runs. 19.4 Database Ease of installation &amp; maintenance Support from IT which database engine are they most comfortable supporting. Integration with LDAP, Active Directory, or Shibboleth. Warehouse vs transactional performance 19.5 Additional Resources (Colin Gillespie 2017), particularly the Package selection section. H References "],["team.html", "Chapter 20 Growing a Team 20.1 Recruiting 20.2 Training to Data Science 20.3 Bridges Outside the Team", " Chapter 20 Growing a Team 20.1 Recruiting 20.2 Training to Data Science Starting with a Researcher Starting with a Statistician Starting with a DBA Starting with a Software Developer 20.3 Bridges Outside the Team Monthly User Groups Annual Conferences "],["git.html", "A Git &amp; GitHub A.1 for Code Development A.2 for Collaboration A.3 for Stability A.4 for New Collaborators A.5 Steps for Contributing to Repo", " A Git &amp; GitHub A.1 for Code Development Jenny Bryan and Jim Hester have published a thorough description of using Git from a data scientists perspective (Happy Git and GitHub for the useR), and we recommend following their guidance. It is consistent with our approach, with a few exceptions noted below. A complementary resource is Team Geek, which has insightful advice for the human and collaborative aspects of version control. Other Resources Setting up a CI/CD Process on GitHub with Travis CI. Travis-CI blob from August 2019. A.2 for Collaboration Somewhat separate from its version control capabilities, GitHub provides built-in tools for coordinating projects across people and time. This tools revolves around GitHub Issues, which allow teammates to track issues assigned to them and others search if other teammates have encountered similar problems that their facing now (e.g., the new computer cant install the rJava package). Theres nothing magical about GitHub issues, but if you dont use them, consider using a similar or more capable tools like those offered by Atlassian, Asana, Basecamp, and many others. Here are some tips from our experiences with projects involving between 2 and 10 statisticians are working with an upcoming deadline. If you create an error that describes a problem blocking your progress, include both the raw text (e.g., error: JAVA_HOME cannot be determined from the Registry) and possibly a screenshot. The text allows the problem to be more easily searched by people later; the screenshot usually provides extra context that allows other to understand the situation and help more quickly. Include enough broad context and enough specific details that teammates can quickly understand the problem. Ideally they can even run your code and debug it. Good recommendations can be found in the Stack Overflow posts, How to make a great R reproducible example and How do I ask a good question?. The issues dont need to be as thorough, because your teammates start with more context than a Stack Overflow reader. We typically include a description of the problem or fishy behavior. the exact error message (or a good description of the fishy behavior). a snippet of the 1-10 lines of code suspected of causing the problem. a link to the codes file (and ideally the line number, such as https://github.com/OuhscBbmc/REDCapR/blob/master/R/redcap-version.R#L40) so the reader can hop over to the entire file. references to similar GitHub Issues or Stack Overflow questions that could aid troubleshooting. A.3 for Stability Review Git commits closely No unintended functional difference (e.g., !match accidentally changed to match). No PHI snuck in (e.g., a patient ID used while isolating and debugging). The metadata format didnt change (e.g., Excel sometimes changes the string 010 to the number 10). See the appendix for a longer discussion about the problems that Excel typically introduces. A.4 for New Collaborators A.5 Steps for Contributing to Repo A.5.1 Regular Contributions A.5.1.1 Keep your dev branch fresh We recommend doing this at least every day you write code in a repo. Perhaps more frequently if a lot of developers are pushing code (e.g., right before a reporting deadline). Update master on your local machine (from the GitHub server) Merge master into your local dev branch Push your local dev branch to the GitHub server A.5.1.2 Make your code contributions available to other analysts At least every few days, push your changes to the master branch so teammates can benefit from your work. Especially if you are improving the pipeline code (e.g. Ellises or REDCap Arches) Make sure you dev branch is updated immediately before you create a Pull Request. Follow the steps above. Verify the merged code still works as expected. In other words, make sure that when your new code is blended with the newest master code, nothing breaks. Depending on the repo, these steps might include Build and Check the repo (assuming the rep is also a package). Run any code that verifys the basic functionality of the repo. (For example, our MIECHV team should run high-school-funnel.R and verify the assertions passed). Commit changes in your dev branch and push to the GitHub server. Create a Pull Request (otherwise known as a PR) and assign a reviewer. (For example, developers in the MIECHV team are paired together to review each others code.) The reviewer will pull your dev branch on to their local machine and run the same checks and verification (that you did on the 2nd step above). This duplicate effort helps verify that your code likely works for everyone on their own machines. The reviewer then accepts the PR and the master branch now contains your changes and are available to teammates. {Transfer &amp; update the material from https://github.com/OuhscBbmc/BbmcResources/blob/master/instructions/github.md} "],["snippets.html", "B Snippets B.1 Reading External Data B.2 Grooming B.3 Identification B.4 Correspondence with Collaborators", " B Snippets B.1 Reading External Data B.1.1 Reading from Excel Background: Avoid Excel for the reasons previously discussed. But if there isnt another good option, be protective. readxl::read_excel() allows you to specify column types, but not column order. The names of col_types is ignored by readxl::read_excel(). To defend against roaming columns (e.g., the files changed over time), tesit::assert() that the order is what you expect. Last Modified: 2019-12-12 by Will # ---- declare-globals --------------------------------------------------------- config &lt;- config::get() # cat(sprintf(&#39; `%s` = &quot;text&quot;,\\n&#39;, colnames(ds)), sep=&quot;&quot;) # &#39;text&#39; by default --then change where appropriate. col_types &lt;- c( `Med Rec Num` = &quot;text&quot;, `Admit Date` = &quot;date&quot;, `Tot Cash Pymt` = &quot;numeric&quot; ) # ---- load-data --------------------------------------------------------------- ds &lt;- readxl::read_excel( path = config$path_admission_charge, col_types = col_types # sheet = &quot;dont-use-sheets-if-possible&quot; ) testit::assert( &quot;The order of column names must match the expected list.&quot;, names(col_types) == colnames(ds) ) B.1.2 Removing Trailing Comma from Header Background: Occasionally a Meditech Extract will have an extra comma at the end of the 1st line. For each subsequent line, readr:read_csv() appropriately throws a new warning that it is missing a column. This warning flood can mask real problems. Explanation: This snippet (a) reads the csv as plain text, (b) removes the final comma, and (c) passes the plain text to readr::read_csv() to convert it into a data.frame. Instruction: Modify Dx50 Name to the name of the final (real) column. Real Example: truong-pharmacist-transition-1 (Accessible to only CDW members.) Last Modified: 2019-12-12 by Will # The next two lines remove the trailing comma at the end of the 1st line. raw_text &lt;- readr::read_file(path_in) raw_text &lt;- sub(&quot;^(.+Dx50 Name),&quot;, &quot;\\\\1&quot;, raw_text) ds &lt;- readr::read_csv(raw_text, col_types=col_types) B.1.3 Removing Trailing Comma from Header Background: When incoming data files are on the large side to comfortably accept with readr, we use vroom. The two packages are develoepd by the same group and might be combined in the future. Explanation: This snippet defines the col_types list with names to mimic our approach of using readr. There are some small differences with our readr approach: 1. col_types is a list instead of a readr::cols_only object. 1. The call to vroom::vroom() passes col_names = names(col_types) explicitly. 1. If the data file contains columns we dont need, we define them in col_types anyway; vroom needs to know the file structure if its missing a header row. Real Example: akande-medically-complex-1 (Accessible to only CDW members.) Thesee files did not have a header of variable names; the first line of the file is the first data row. Last Modified: 2020-08-21 by Will # ---- declare-globals --------------------------------------------------------- config &lt;- config::get() col_types &lt;- list( sak = vroom::col_integer(), # &quot;system-assigned key&quot; aid_category_id = vroom::col_character(), age = vroom::col_integer(), service_date_first = vroom::col_date(&quot;%m/%d/%Y&quot;), service_date_lasst = vroom::col_date(&quot;%m/%d/%Y&quot;), claim_type = vroom::col_character(), provider_id = vroom::col_character(), provider_lat = vroom::col_double(), provider_long = vroom::col_double(), provider_zip = vroom::col_character(), cpt = vroom::col_integer(), revenue_code = vroom::col_integer(), icd_code = vroom::col_character(), icd_sequence = vroom::col_integer(), vocabulary_coarse_id = vroom::col_integer() ) # ---- load-data --------------------------------------------------------------- ds &lt;- vroom::vroom( file = config$path_ohca_patient, delim = &quot;\\t&quot;, col_names = names(col_types), col_types = col_types ) rm(col_types) B.2 Grooming B.2.1 Correct for misinterpreted two-digit year Background: Sometimes the Meditech dates are specified like 1/6/54 instead of 1/6/1954. readr::read_csv() has to choose if the year is supposed to be 1954 or 2054. A human can use context to guess a birth date is in the past (so it guesses 1954), but readr cant (so it guesses 2054). For avoid this and other problems, request dates in an ISO-8601 format. Explanation: Correct for this in a dplyr::mutate() clause; compare the date value against today. If the date is today or before, use it; if the day is in the future, subtract 100 years. Instruction: For future dates such as loan payments, the direction will flip. Last Modified: 2019-12-12 by Will ds %&gt;% dplyr::mutate( dob = dplyr::if_else(dob &lt;= Sys.Date(), dob, dob - lubridate::years(100)) ) B.3 Identification B.3.1 Generating tags Background: When you need to generate unique identification values for future people/clients/patients, as described in the style guide. Explanation: This snippet will create a 5-row csv with random 7-character tags to send to the research team collecting patients. The Instruction: Set pt_count, tag_length, path_out, and execute. Add and rename the columns to be more appropriate for your domain (e.g., change patient tag to store tag). Last Modified: 2019-12-30 by Will pt_count &lt;- 5L # The number of rows in the dataset. tag_length &lt;- 7L # The number of characters in each tag. path_out &lt;- &quot;data-private/derived/pt-pool.csv&quot; draw_tag &lt;- function (tag_length = 4L, urn = c(0:9, letters)) { paste(sample(urn, size = tag_length, replace = T), collapse = &quot;&quot;) } ds_pt_pool &lt;- tibble::tibble( pt_index = seq_len(pt_count), pt_tag = vapply(rep(tag_length, pt_count), draw_tag, character(1)), assigned = FALSE, name_last = &quot;--&quot;, name_first = &quot;--&quot; ) readr::write_csv(ds_pt_pool, path_out) The resulting dataset will look like this, but with different randomly-generated tags. # A tibble: 5 x 5 pt_index pt_tag assigned name_last name_first &lt;int&gt; &lt;chr&gt; &lt;lgl&gt; &lt;chr&gt; &lt;chr&gt; 1 1 seikyfr FALSE -- -- 2 2 voiix4l FALSE -- -- 3 3 wosn4w2 FALSE -- -- 4 4 jl0dg84 FALSE -- -- 5 5 r5ei5ph FALSE -- -- B.4 Correspondence with Collaborators B.4.1 Excel files Receiving and storing Excel files should almost always be avoided for the reasons explained in this letter. We receive extracts as Excel files frequently, and have the following request ready to email the person sending us Excel files. Adapt the bold values like 109.19 to your situation. If you are familiar with their tools, suggest an alternative for saving the file as a csv. Once presented with these Excel gotchas, almost everyone has an aha moment and recognizes the problem. Unfortunately, not everyone has flexible software and can adapt easily. [Start of the letter] Sorry to be tedious, but could you please resend the extract as a csv file? Please call me if you have questions. Excel is being too helpful with some of the values, and essentially corrupting them. For example, values like 109.19 is interpreted as a number, not a character code (e.g., see cell L14). Because of limitations of finite precision, this becomes 109.18999999999999773. We cant round it, because there are other values in this column that cannot be cast to numbers, such as V55.0. Furthermore, the Es in some codes are incorrectly interpreted as the exponent operator (e.g., 4E5 is converted to 400,000). Finally, values like 41.0 are being converted to a number and the trailing zero is dropped (so cells like 41 are not distinguishable from 41.0). Unfortunately the problems exist in the Excel file itself. When we import the columns as text, the values are already in their corrupted state. Please compress/zip the csv if the file is be too large to email. Weve found that an Excel file is typically 5-10 times larger than a compressed csv. As much as Excel interferes with our medical variables, were lucky. It has messed with other branches of science much worse. Genomics were using it far too late before they realized their mistakes. What happened? By default, Excel and other popular spreadsheet applications convert some gene symbols to dates and numbers. For example, instead of writing out Membrane-Associated Ring Finger (C3HC4) 1, E3 Ubiquitin Protein Ligase, researchers have dubbed the gene MARCH1. Excel converts this into a date03/01/2016, saybecause thats probably what the majority of spreadsheet users mean when they type it into a cell. Similarly, gene identifiers like 2310009E13 are converted to exponential numbers (2.31E+19). In both cases, the conversions strip out valuable information about the genes in question. [End of the letter] "],["presentations.html", "C Presentations C.1 CDW C.2 REDCap C.3 Reproducible Research &amp; Visualization C.4 Data Management C.5 GitHub C.6 Software C.7 Architectures C.8 Components", " C Presentations Here is a collection of presentations by the BBMC and friends that may help demonstrate concepts discussed in the previous chapters. C.1 CDW prairie-outpost-public: Documentation and starter files for OUHSCs Clinical Data Warehouse. OUHSC CDW C.2 REDCap REDCap Systems Integration. REDCap Con 2015, Portland, Oregon. Literate Programming Patterns and Practices with REDCap REDCap Con 2014, Park City, Utah. Interacting with the REDCap API using the REDCapR Package REDCap Con 2014, Park City, Utah. Optimizing Study Management using REDCap, R, and other software tools. SCUG 2013. C.3 Reproducible Research &amp; Visualization Building pipelines and dashboards for practitioners: Mobilizing knowledge with reproducible reporting. Displaying Health Data Colloquium 2018, University of Victoria. Interactive reports and webpages with R &amp; Shiny. SCUG 2015. Big data, big analysis: a collaborative framework for multistudy replication. Conventional of Canadian Psychological Association, Victoria BC, 2016. WATS: wrap-around time series: Code to accompany WATS Plot article, 2014. C.4 Data Management BBMC Validator: catch and communicate data errors. SCUG 2016. Text manipulation with Regular Expressions, Part 1 and Part 2. SCUG 2016. Time and Effort Data Synthesis. SCUG 2015. C.5 GitHub Scientific Collaboration with GitHub. OU Bioinformatics Breakfast Club 2015. C.6 Software REDCapR: Interaction Between R and REDCap. OuhscMunge: Data manipulation operations commonly used by the Biomedical and Behavioral Methodology Core within the Department of Pediatrics of the University of Oklahoma Health Sciences Center. codified: Produce standard/formalized demographics tables. usnavy billets: Optimally assigning naval officers to billets. C.7 Architectures Linear Pipeline of the R Analysis Skeleton . Many-to-many Pipeline of the R Analysis Skeleton . Immunization transfer . IALSA: A Collaborative Modeling Framework for Multi-study Replication . POPS: Automated daily screening eligibility for rare and understudied prescriptions. . C.8 Components Customizing display tables: using css with DT and kableExtra. SCUG 2018. yaml and expandable trees that selectively show subsets of hierarchy, 2017. "],["scratch-pad.html", "D Scratch Pad of Loose Ideas D.1 Chapters &amp; Sections to Form D.2 Practices D.3 Good Sites", " D Scratch Pad of Loose Ideas D.1 Chapters &amp; Sections to Form Tools to Consider tidyverse odbc ggplot2 use factors for explanatory variables when you want to keep the order consistent across graphs. (genevamarshall) automation on a remote server or VDI Theres always a chance that my machine is configured a little differently than yours, which may affect results. Will you glance at those results too? I forgot what this project is about, and I wouldnt be able to spot problems like you can. The S drive file and the tables dont seem to have any obvious problems public reports (and dashboards) when developing a report for a external audience (ie, people outside your immediate research team), choose one or two pals who are unfamiliar with your aims/methods as an impromptu focus group. Ask them what things need to be redesigned/reframed/reformated/further-explained. (genevamarshall) plots plot labels/axes variable names units of measurement (eg, proportion vs percentage on the y axis) documentation - bookdown Bookdown has worked well for us so far. Its basically independent markdown documents stored on a dedicated git repo. Then you click build in RStudio and it converts all the markdown files to static html files. Because GitHub is essentially serving as the backend, everyone can make changes to sections and we dont have to be too worried about Heres a version thats hosted publicly, but I tested that it can be hosted on our shared file server. (Its possible because the html files are so static.) If this is what you guys want for OUs collective CDW, please tell me: who you want to be able to edit the documents without review. Ill add them to the GitHub repo. who you want to be able to view the documents. Ill add them to a dedicate file server space. https://ouhscbbmc.github.io/data-science-practices-1/workstation.html#installation-required I was thinking that each individual database gets it own chapter. The BBMC has ~4 databases in this sense: a Centricity staging database, a GECB staging database, the central warehouse, and the (fledgling) downstream OMOP database. Then there are ~3 sections within each chapter: (a) a black-and-white description of the tables, columns, &amp; indexes (written mostly for consumers), (b) recommendations how to use each table (written mostly for consumers), and (c) a description of the ETL process (written mostly for developers &amp; admins). My proposal uses GitHub and Markdown because theyre so universal (no knowledge of R is required really you could write it with any text editor &amp; commit, and let someone else click build in RStudio on their machine). But Im very flexible on all this. Ill support &amp; contribute to any system that you guys feel will work well across the teams. developing packages R packages by Hadley Wickham http://mangothecat.github.io/goodpractice/ Cargo cult programming is a style of computer programming characterized by the ritual inclusion of code or program structures that serve no real purpose. (Wikipedia) Your team should decide which elements of a file prototype and repo prototype are best for you. D.2 Practices on.exit() should have add = TRUE (Wickham (2019), Exit handlers). D.3 Good Sites Posts on these sites are almost always worth your time reading. The frequently improve how you develop with the common components used in our data pipelines. Yihui Xie, created knitr and other important contributions to reproducible research. RStudio, in addition to their IDE, many of the packages used here were created by their developers. Explain xkcd because its good. Occasionally skim the titles on these sites and pick a few relevant to your interests. We think it helps keep you aware of developments in the field, so your skills continually grow and our approaches dont become stagnant. OReillys Data science ideas and resources Towards Data Science These books havent been referenced (yet), but have good guidance and could be worth your time skimming to see what is relevant. The Tidynomicon by Dhavide Aruliah &amp; Greg Wilson Efficient R programming by Colin Gillespie &amp; Robin Lovelace Mastering Software Development in R H References "],["example-dashboard.html", "E Example Dashboard E.1 Example E.2 Style Guide E.3 Architecture", " E Example Dashboard Communicating quantitative trends to a community with a quantitative phobia can be difficult. This appendix showcases a dashboard style that has evolved during the past few years of OSDH Home Visiting, where twelve local programs practitioners implemented their own intervention ideas tailored to their interests and community. Over 50 dashboards have been developed: a custom dashboard is developed for each programs cycle, and a three additional dashboards communicate the results of program-agnostic investigations. A style guide is an important tool when managing this many unique investigations For a program-specific dashboard, its more important to meet the needs of the individual PDSA than to conform to a guide. However, we aim to make the dashboards as consistent as possible for several reasons: Its less work for the practitioners. A familiar presentation will help the practitioners grow comfortable with their new cycles dashboard. Recall most will use at least five dashboards in only a few years. Its less work for the analysts/developers. Within a cycle, a consistent format (with relatively interchangeable features) means that one analyst can more easily contribute and trouble shoot a colleagues dashboard. The lessons weve learned (and mistakes weve made) can be applied to later dashboards. The quality should improve and the development should quicken. Just like our CQI grant encourages an HV program to learn from its history and to learn from others, we as analysts should too. As we work with the programs to design a PDSA, each one analyst will learn about the strengths and weaknesses of our current dashboard style, and propose improvements. E.1 Example A example dashboard that mimic the real CQI is available at https://ouhscbbmc.github.io/data-science-practices-1/dashboard-1.html. The dashboard source code is available in the analysis/dashboard-1 directory of the R Analysis Skeleton repository; this repo contains the code and documents the entire pipeline leading up to this dashboard. Weve had success developing and distributing dashboards as self-contained html files. They are portable and dont have dependencies on local data files or remote databases, yet the JavaScript and CSS provide a modest amount of interactivity. The dashboards principal components are flexdashboard, plotly, ggplot2, and R Markdown. In this dashboard of synthetic data, a cognitive measure is tracked across 14 years in three home visiting counties. E.2 Style Guide This section describes a set of practices that the BBMC analysts have decided are best for the CQI dashboards used in our MIECHV evaluations. In a sense, this CQI dashboard guide supplements our overall style guide. The MIECHV CQI dashboards are based on RStudios flexdashboard package, which uses rmarkdown, JavaScript, and CSS. flexdashboard has a great website that should be read by anyone adapting this guide for their own CQI projects. E.2.1 Headline page The dashboards greeting should be a good blend of (a) orientating the user to the context and (b) being welcoming but not overwhelming. For the second PDSA cycle, try to have only one or two important and impactful graphs on the first page; specialized graphs have their own pages later. Left column: Text qualified with {.tabset} Notes tab: text that provides info about the dashboards dataset, such as Count of (a) models, (b) programs, (c) clients, and (d) observations Date range The specific program_codes. Even though a PDSA is focused on a specific program, ideally other programs are included so they have a feel for what others are doing. Right column: Headline Graph(s) optionally qualified with {.tabset}. Ideally starts with an overall graph, with no longitudinal component. Show data only from the program, not the overall model. E.2.2 Tables page The tables provide exactness, especially the exactness of (a) the actual y value and (b) the frequency of the longitudinal values. These tables make it easier to see if youre inadvertently plotting multiple values for the same month, or if some month is missing. In the future, we can add a Download as CSV button if anyone requests it. Another advantage of the tables is that all measures are visible in the same screen. A typical program-month table will have at least 6 columns: program_code, month, model, outcome measure, process measure, and disruptor measure. If this is difficult to do, then the upstream scribe probably isnt doing its job well. These tables should be almost untouched from the rds files created in the load-data chunk. Each tab should represent a different unit of analysis (e.g., a single row summarizing the completed visits for a program-month). Use all the tabs below that are appropriate for the PDSA. Go from biggest unit (e.g., model) to smallest unit (e.g., Provider-Week). Unnamed column qualified with {.tabset}. Model tab Program tab Program-Month tab Program-Week tab Provider-Week tab Spaghetti Annotation tab If your spaghetti plots use faint vertical lines to mark events (e.g., the start of a PDSA intervention), include the events here too. E.2.3 Graphs page The graphs plots should provide the user with a feel of the trends. One graph focuses on one measure, so ideally a max of three spaghetti plots. Ideally the change over time (for the PDSAs program) is compared to the other programs during the same period. If a PSDA has multiple Process Measures, give them separate tabs labeled Process Measure 1 &amp; Process Measure 2. Unnamed column qualified with {.tabset}. Outcome Measure tab Process Measure tab Disruptor Measure tab If a spaghetti plot depicts a proportion/percentage measure, then include a visual layer the count/denominator behind each proportion (instead of a separate spaghetti plot dedicated to the denominator). This may include one or more of the following: geom_point where presence/absence denotes a nonzero/zero denominator geom_point where size denotes denominators size. geom_text (in place of geom_point) that explicitly shows denominators size geom_text along the bottom axis that explicitly shows denominators size use spaghetti_2() located in display-1.R. (not yet developed.) Add hover text to each spaghetti. E.2.4 Marginal Graphs page The marginal histograms provide context. Single column, qualified with {.tabset}. Contains a marginal/univariate graph of all variables in the analysis. Marginal graph of outcome measure Marginal graph of process measure Marginal graph of disruptor measure Show data only from the program, not the overall model. Use histogram_2() located in display-1.R (this link is accessible only to Oklahomas MIECHV evaluation team). Add hover text to each histogram. If all datasets are the same unit of analysis (e.g., program-month), then dont use an H3 tab. Use (H3) tabs if you have marginals more than one level (e.g., visit date at program-month, visit date at program-week, visit date at provider-week). But avoid multiple levels, if possible; especially if program isnt fluent with a single level. histograms have a more specific y-axis. For example, Count of Months instead of Frequency E.2.5 Documentation page The documentation should be self-contained in the same html file, so its easier for the practitioner to quickly get the explanation and return to the trends. Sometimes its best to place an explanation/annotation right next to the relevant content, but other times its distracting. And its always more work to maintain the explanations if theyre spread-out across the interface. So lets try keeping almost everything under one or two tabs in the Documentation page. To help beyond that, lets try to reuse as many documentation tabs as possible. The first tab will be specific to the methodology and displays of the PDSA. The remaining tabs will reference common Rmd files; the content will automatically update when the dashboard is rendered next. Unnamed column qualified with {.tabset}. Explanation Current PDSA Explanation All CQI Dashboards Glossary Tips Config E.2.6 Miscellaneous Notes The hierarchy level in this outline indicates the HTML-heading level. Numbers are H1 (i.e., ======) that specify pages, roman numerals are H2 (i.e., ------) that specify columns, and letters are H3 (i.e., ###) that specify tabs. Cosmetics connote the type of dashboard. Specify using the theme or css yaml keywords in the Rmd header. Common measures: theme: simplex uses a red banner. 1st cycle PDSAs (i.e., initial cycle of MIECHV 3): theme: cosmo uses a blue banner. This default is used if no theme is specified. 2nd cycle PDSAs: theme: flatly uses a turquoise banner. 3rd cycle PDSAs: theme: journal uses a light red banner. 4th cycle PDSAs (i.e., initial cycle of MIECHV 5): custom css with a purple banner (a public copy of this css is available). Instead of a theme, the below line (with four leading spaces, because the yaml entry is nested under output and flexdashboard::flex_dashboard) css: ../../common/style-cqi-cycle-4.css E.3 Architecture The dashboard is only one piece of a large workflow. The design and construction of this workflow are discussed in this book, which are highlighted below. . E.3.1 Data from External System E.3.2 Groomed Data in Warehouse E.3.3 Analysis-Ready Dataset Very little data manipulation should occur in the dashboard. The upstream scribe should produce an analysis-ready rds file. The dashboard should be concerned only with presenting the graphs, tables, summary text, and documentation. Include a common measure if the PDSA explicitly mentions it. Try to show measures only if theyre directly related to the PDSA. The PDSA dashboard will have less exposure to change (which makes it easier to maintain). If a program needs context for their measures, they can look at the common measure dashboard. "],["example-chapter.html", "F Example Chapter", " F Example Chapter This intro was copied from the 1st chapter of the example bookdown repo. Im keeping it temporarily for reference. You can label chapter and section titles using {#label} after them, e.g., we can reference the Intro Chapter. If you do not manually label them, there will be automatic labels anyway Figures and tables with captions will be placed in figure and table environments, respectively. par(mar = c(4, 4, .1, .1)) plot(pressure, type = &#39;b&#39;, pch = 19) Figure F.1: Here is a nice figure! Reference a figure by its code chunk label with the fig: prefix, e.g., see Figure F.1. Similarly, you can reference tables generated from knitr::kable(), e.g., see Table F.1. knitr::kable( head(iris, 20), caption = &#39;Here is a nice table!&#39;, booktabs = TRUE ) Table F.1: Here is a nice table! Sepal.Length Sepal.Width Petal.Length Petal.Width Species 5.1 3.5 1.4 0.2 setosa 4.9 3.0 1.4 0.2 setosa 4.7 3.2 1.3 0.2 setosa 4.6 3.1 1.5 0.2 setosa 5.0 3.6 1.4 0.2 setosa 5.4 3.9 1.7 0.4 setosa 4.6 3.4 1.4 0.3 setosa 5.0 3.4 1.5 0.2 setosa 4.4 2.9 1.4 0.2 setosa 4.9 3.1 1.5 0.1 setosa 5.4 3.7 1.5 0.2 setosa 4.8 3.4 1.6 0.2 setosa 4.8 3.0 1.4 0.1 setosa 4.3 3.0 1.1 0.1 setosa 5.8 4.0 1.2 0.2 setosa 5.7 4.4 1.5 0.4 setosa 5.4 3.9 1.3 0.4 setosa 5.1 3.5 1.4 0.3 setosa 5.7 3.8 1.7 0.3 setosa 5.1 3.8 1.5 0.3 setosa You can write citations, too. For example, we are using the bookdown package (Xie 2021) in this sample book, which was built on top of R Markdown and knitr (Xie 2015). H References "],["acknowledgements.html", "G Acknowledgements", " G Acknowledgements The authors thank all our colleagues for the discussions and experiences about data science that lead to this book. At OUHSC, this includes @adrose, @aggie-dbc, @ARPeters, @Ashley-Jorgensen, @athumann, @atreat1, @caston60, @chanukyalakamsani, @CWilliamsOUHSC, @DavidBard, @evoss1, @genevamarshall, @Maleeha, @man9472, @rmatkins, @sbohora, @thomasnwilson, @vimleshbavadiya, @waleboro, @YuiYamaoka, @yutiantang. Outside the OUHSC, this includes @andkov, @ben519, @cscherrer, @cmodzelewski, @jimquallen, @mhunter1, @probinso, @russelljonas, and @spopovych. `r if (knitr::is_html_output())  "],["references.html", "H References", " H References "]]
+[["index.html", "Collaborative Data Science Practices Chapter 1 Introduction", " Collaborative Data Science Practices Will Beasley 2021-06-08 Chapter 1 Introduction This collection of documents describe practices used by the OUHSC BBMC in our analytics projects. "],["coding.html", "Chapter 2 Coding Principles 2.1 Simplify 2.2 Defensive Style", " Chapter 2 Coding Principles 2.1 Simplify 2.1.1 Data Types Use the simplest data type reasonable. A simpler data type is less likely contain unintended values. As we have seen, a string variable called gender can simultaneously contain the values m, f, F, Female, MALE, 0, 1, 2, Latino, \"\", and NA. On the other hand, a boolean variable gender_male can be only FALSE, TRUE, and NA.1 SQLite does not have a dedicated datatype, so you must resort to storing it as 0, 1 and NULL values. Because a caller cant assume that an ostensible boolean SQLite variable contains only those three values, the variable should be checked.] Once you have cleaned a variable in your initial ETL files (like an Ellis), lock it down so you do not have to spend time in the downstream files verifying that no bad values have been introduced. As a small bonus, simpler data types are typically faster, consume less memory, and translate more cleanly across platforms. Within R, the preference for numeric-ish variables is logical/boolean/bit, integer, bit64::integer64, and numeric/double-precision floats. The preference for categorical variables is logical/boolean/bit, factor, and character. 2.1.2 Categorical Levels When a boolean variable would be too restrictive and a factor or character is required, choose the simplest representation. Where possible: Use only lower case (e.g., male instead of Male for the gender variable). avoid repeating the variable in the level (e.g., control instead of control condition for the condition variable). 2.1.3 Recoding Almost every project recodes many variables. Choose the simplest function possible. The functions at the top are much easier to read and harder to mess up. Leverage existing booleans: Suppose you have the logical variable gender_male (which can be only TRUE, FALSE, or NA). Writing gender_male == TRUE or gender_male == FALSE will evaluate to a boolean thats unnecessary because gender_male is already a boolean. Testing for TRUE: use the variable by itself (i.e., gender_male instead of gender_male == TRUE). Testing for FALSE: use !. Write !gender_male instead of gender_male == FALSE or gender_male != TRUE. dplyr::coalesce(): The function evaluates a single variable and replaces NA with values from another variable. A coalesce like visit_completed = dplyr::coalesce(visit_completed, FALSE) is much easier to read and not mess up than visit_completed = dplyr::if_else(!is.na(visit_completed), visit_completed, FALSE) dplyr::na_if() transforms a nonmissing value into an NA. Recoding missing values like birth_apgar = dplyr::na_if(birth_apgar, 99) is easier to read and not mess up than birth_apgar = dplyr::if_else(birth_apgar == 99, NA_real_, birth_apgar) &lt;= (or a similar comparison operator): Compare two quantities to output a boolean variable. dplyr::if_else(): The function evaluates a single boolean variable. The output branches to only three possibilities: condition is (a) true, (b) false, or (c) (optionally) NA. An advantage over &lt;= is that NA values can be specified directly. date_start &lt;- as.Date(&quot;2017-01-01&quot;) # If a missing month element needs to be handled explicitly. stage = dplyr::if_else(date_start &lt;= month, &quot;pre&quot;, &quot;post&quot;, missing = &quot;missing-month&quot;) # Otherwise a simple boolean output is sufficient. stage_post = (date_start &lt;= month) base::cut(): The function evaluations only a single numeric variable. Its range is cut into different segments/categories on the one-dimensional number line. The output branches to single discrete value (either a factor-level or an integer). dplyr::recode(): The function evaluates a single integer or character variable. The output branches to a single discrete value. lookup table: It feasible recode 6 levels of race directly in R. Its less feasible to recode 200 provider names. Specify the mapping in a csv, readr the csv to a data.frame, and left-join it. dplyr::case_when(): The function is the most complicated because it can evaluate multiple variables. Also, multiple cases can be true, but only the first output is returned. This water fall execution helps in complicated scenarios, but is overkill for most. 2.2 Defensive Style 2.2.1 Qualify functions Try to prepend each function with its package. Write dplyr::filter() instead of filter(). When two packages contain public functions with the same name, the package that was most recently called with library() takes precedent. When multiple R files are executed, the packages precedents may not be predictable. Specifying the package eliminates the ambiguity, while also making the code easier to follow. For this reason, we recommend that almost all R files contain a load-packages chunk. See the Google Style Guide for more about qualifying functions. Some exceptions exist, including: The sf package if youre using its objects with dplyr verbs. 2.2.2 Date Arithmetic Dont use the minus operator (i.e., -) to subtract dates. Instead use as.integer(difftime(stop, start, units=\"days\")). Its longer but protects from the scenario that start or stop are changed upstream to a datetime. In that case, stop - start equals the number of seconds between the two points, not the number of days. 2.2.3 Excluding Bad Cases Some variables are critical to the record, and if its missing, you dont want or trust any of its other values. For instance, a hospital visit record rarely useful if missing the patient ID. In these cases, prevent the record from passing through the ellis. In this example, well presume we cant trust a patient record if it lacks a clean date of birth (dob). Define the permissible range, in either the elliss declare-globals chunk, or in the config-file. (Well use the config file for this example.) Well exclude anyone born before 2000, or after tomorrow. Even though its illogical for someone in a retrospective record to be born tomorrow, consider bending a little for small errors. range_dob : !expr c(as.Date(&quot;2000-01-01&quot;), Sys.Date() + lubridate::days(1)) In the tweak-data chunk, use OuhscMunge::trim_date() to set the cell to NA if it falls outside an acceptable range. After dplyr::mutate(), call tidyr::drop_na() to exclude the entire record, regardless if (a) it was already NA, or (b) was trimmed to NA. ds &lt;- ds %&gt;% dplyr::mutate( dob = OuhscMunge::trim_date(dob, config$range_dob) ) %&gt;% tidyr::drop_na(dob) Near the end of the file, verify the variable for three reasons: (a) theres a chance that the code above isnt working as expected, (b) some later code later might have introduced bad values, and (c) it clearly documents to a reader that dob was included in this range at this stage of the pipeline. checkmate::assert_date(ds$dob, any.missing=F, lower=config$range_dob[1], upper=config$range_dob[2]) The equivalent of Rs logical data type is called a bit in SQL Server, and a boolean in Postgres and MySQL. "],["architecture.html", "Chapter 3 Architecture Principles 3.1 Encapsulation 3.2 Leverage team members strengths &amp; avoid weaknesses 3.3 Scales 3.4 Consistency", " Chapter 3 Architecture Principles 3.1 Encapsulation 3.2 Leverage team members strengths &amp; avoid weaknesses 3.2.1 Focused code files 3.2.2 Metadata for content experts 3.3 Scales 3.3.1 Single source &amp; single analysis 3.3.2 Multiple sources &amp; multiple analyses 3.4 Consistency 3.4.1 Across Files 3.4.2 Across Languages 3.4.3 Across Projects "],["file-prototype-r.html", "Chapter 4 Prototypical R File 4.1 Clear Memory 4.2 Load Sources 4.3 Load Packages 4.4 Declare Globals 4.5 Load Data 4.6 Tweak Data 4.7 (Unique Content) 4.8 Verify Values 4.9 Specify Output Columns 4.10 Save to Disk or Database 4.11 Additional Resources", " Chapter 4 Prototypical R File As stated in Consistency across Files, using a consistent file structure can (a) improve the quality of the code because the structure has been proven over time to facilitate good practices and (b) allow your intentions to be more clear to teammates because they are familiar with the order and intentions of the chunks. We use the term chunk for a section of code because it corresponds with knitr terminology (Xie 2015), and in many analysis files (as opposed to manipulation files), the chunk of our R file connects to a knitr Rmd file. 4.1 Clear Memory Before the initial chunk many of our files clear the memory of variables from previous run. This is important when developing and debugging because it prevents previous runs from contaminating subsequent runs. However it has little effect during production; well look at manipulation files separately from analysis files. Manipulation R files are sourced with the argument local=new.env(). The file is executed in a fresh environment, so there are no variables to clear. Analysis R files are typically called from an Rmd files knitr::read_chunk(), and code positioned above the first chunk is not called by knitr.2 However typically do not clear the memory in R files that are sourced in the same environment as the caller, as it will interfere with the callers variables. rm(list = ls(all.names = TRUE)) 4.2 Load Sources In the first true chunk, source any R files containing global variables and functions that the current file requires. For instance, when a team of statisticians is producing a large report containing many analysis files, we define many of the graphical elements in a single file. This sourced file defines common color palettes and graphical functions so the cosmetics are more uniform across analyses. We prefer not to have sourced files perform any real action, such as importing data or manipulating a file. One reason is because it is difficult to be consistent about the environmental variables when the sourced files functions are run. A second reason is that it more cognitively difficult to understand how the files are connected. When the sourced file contains only function definitions, these operations can be called at any time in the current file with much tighter control of which variables are modified. A bonus of the discipline of defining functions (instead of executing functions) is that the operations are typically more robust and generalizable. Keep the chunk even if no files are sourced. An empty chunk is instructive to readers trying to determine if any files are sourced. This applies recommendation applies to all the chunks discussed in this chapter. As always, your team should agree on its own set of standards. # ---- load-sources ------------------------------------------------------------ base::source(file=&quot;./analysis/common/display-1.R&quot;) # Load common graphing functions. 4.3 Load Packages The load-packages chunk declares required packages near the files beginning for three reasons. First, a reader scanning the file can quickly determine its dependencies when located in a single chunk. Second, if your machine is lacking a required package, it is best to know early3. Third, this style mimics a requirement of other languages (such as declaring headers at the top of a C++ file) and follows the tidyverse style guide. As discussed in the previous qualify all functions section, we recommend that functions are qualified with their package (e.g., foo::bar() instead of merely bar()). Consequently, the load-packages chunk calls requireNamespace() more frequently than library(). requireNamespace() verifies the package is available on the local machine, but does not load it into memory; library() verifies the package is available, and then loads it. requireNamespace() is not used in several scenarios. Core packages (e.g., base and stats) are loaded by R in most default installations. We avoid unnecessary calls like library(stats) because they distract from more important features. Obvious dependencies are not called by requireNamespace() or library() for similar reasons, especially if they are not called directly. For example tidyselect is not listed when tidyr is listed. The pipe function (declared in the magrittr package , i.e., %&gt;%) is attached with import::from(magrittr, \"%&gt;%\"). This frequently-used function called be called throughout the execution without qualification. Compared to manipulation files, our analysis files tend to use many functions in a few concentrated packages so conflicting function names are less common. Typical packages used in analysis are ggplot2 and lme4. The sourced files above may load their own packages (by calling library()). It is important that the library() calls in this file follow the load-sources chunk so that identically-named functions (in different packages) are called with the correct precedent. Otherwise identically-named functions will conflict in the namespace with hard-to-predict results. Read R Packages for more about library(), requireNamespace(), and their siblings, as well as the larger concepts such as attaching functions into the search path. Here are packages found in most of our manipulation files. Notice the lesser-known packages have a quick explanation; this helps maintainers decide if the declaration is still necessary. Also notice the packages distributed outside of CRAN (e.g., GitHub) have a quick commented line to help the user install or update the package. # ---- load-packages ----------------------------------------------------------- import::from(magrittr, &quot;%&gt;%&quot; ) requireNamespace(&quot;readr&quot; ) requireNamespace(&quot;tidyr&quot; ) requireNamespace(&quot;dplyr&quot; ) requireNamespace(&quot;config&quot; ) requireNamespace(&quot;checkmate&quot; ) # Asserts expected conditions requireNamespace(&quot;OuhscMunge&quot;) # remotes::install_github(repo=&quot;OuhscBbmc/OuhscMunge&quot;) 4.4 Declare Globals When values are repeatedly used within a file, consider dedicating a variable so its defined and set only once. This is also a good place for variables that are used only once, but whose value are central to the files mission. Typical variables in our declare-globals chunk include data file paths, data file variables, color palettes, and values in the config file. The config file can coordinate a static variable across multiple files. Centrally # ---- declare-globals --------------------------------------------------------- # Constant values that won&#39;t change. config &lt;- config::get() path_db &lt;- config$path_database # Execute to specify the column types. It might require some manual adjustment (eg doubles to integers). # OuhscMunge::readr_spec_aligned(config$path_subject_1_raw) col_types &lt;- readr::cols_only( subject_id = readr::col_integer(), county_id = readr::col_integer(), gender_id = readr::col_double(), race = readr::col_character(), ethnicity = readr::col_character() ) 4.5 Load Data All data ingested by this file occurs in this chunk. We like to think of each file as a linear pipe with a single point of input and single point of output. Although it is possible for a file to read data files on any line, we recommend avoiding this sprawl because it is more difficult for humans to understand. If the software developer is a deist watchmaker, the files fate has been sealed by the end of this chunk. This makes is easier for a human to reason to isolate problems as either existing with (a) the incoming data or (b) the calculations on that data. Ideally this chunk consumes data from either a plain-text csv or a database. Many capable R functions and packages ingest data. We prefer the tidyverse readr for reading conventional files; its younger cousin, vroom has some nice advantages when working with larger files and some forms of jagged rectangles4. Depending on the file format, good packages to consider are data.table, haven, readxl, openxlsx, arrow, jsonlite, fst, yaml, and rio. When used in an Ellis, this chunk likely consumes a flat file like a csv with data or metadata. When used in a Ferry, Arch, or Scribe, this chunk likely consumes a database table. When used in an Analysis file, this chunk likely consumes a database table or rds (i.e., a compressed R data file). In some large-scale scenarios, there may be a series of datasets that cannot be held in RAM simultaneously. Our first choice is to split the R file so each new file has only a subset of the datasets in other words, the R file probably was given too much responsibility. Occassionaly the multiple datasets need to be considered at once, so splitting the R file is not a option. In these scenarios, we prefer to upload all the datasets to a database, which is better manipulating datasets too large for RAM. An R solution may be to loosen the restriction that dataset enter the R file only during the load-data chunk. Once a dataset is processed and no longer needed, rm() removes it from RAM. Now another dataset can be read from a file and manipulated. loose scrap: the chunk reads all data (e.g., database table, networked CSV, local lookup table). After this chunk, no new data should be introduced. This is for the sake of reducing human cognition load. Everything below this chunk is derived from these first four chunks. 4.6 Tweak Data loose scrap: Its best to rename the dataset (a) in a single place and (b) early in the pipeline, so the bad variable are never referenced. # OuhscMunge::column_rename_headstart(ds) # Help write `dplyr::select()` call. ds &lt;- ds %&gt;% dplyr::select( # `dplyr::select()` drops columns not included. subject_id, county_id, gender_id, race, ethnicity ) %&gt;% dplyr::mutate( ) %&gt;% dplyr::arrange(subject_id) # %&gt;% # tibble::rowid_to_column(&quot;subject_id&quot;) # Add a unique index if necessary 4.7 (Unique Content) This section represents all the chunks between tweak-data and verify-values. These chunks contain most of of the files creativity and contribution. In a sense, the structure of the first and last chunks allow these middle chunks to focus on concepts instead of plumbing. For simple files like the ellis of a metadata file, may not even need anything here. But complex analysis files may have 200+ lines distributed across a dozen chunks. We recommend that you create dedicate a chunk to each conceptual stage. If one starts to contain more than ~20 lines, consider if a more granular organization would clarify the codes intent. 4.8 Verify Values Running OuhscMunge::verify_value_headstart(ds) will # ---- verify-values ----------------------------------------------------------- # Sniff out problems # OuhscMunge::verify_value_headstart(ds) checkmate::assert_integer( ds$county_month_id , any.missing=F , lower=1, upper=3080 , unique=T) checkmate::assert_integer( ds$county_id , any.missing=F , lower=1, upper=77 ) checkmate::assert_date( ds$month , any.missing=F , lower=as.Date(&quot;2012-06-15&quot;), upper=Sys.Date()) checkmate::assert_character(ds$county_name , any.missing=F , pattern=&quot;^.{3,12}$&quot; ) checkmate::assert_integer( ds$region_id , any.missing=F , lower=1, upper=20 ) checkmate::assert_numeric( ds$fte , any.missing=F , lower=0, upper=40 ) checkmate::assert_logical( ds$fte_approximated , any.missing=F ) checkmate::assert_numeric( ds$fte_rolling_median , any.missing=T , lower=0, upper=40 ) county_month_combo &lt;- paste(ds$county_id, ds$month) checkmate::assert_character(county_month_combo, pattern =&quot;^\\\\d{1,2} \\\\d{4}-\\\\d{2}-\\\\d{2}$&quot;, any.missing=F, unique=T) 4.9 Specify Output Columns This chunk: verifies these variables exist before uploading, documents (to troubleshooting developers) these variables are a product of the file, and reorders the variables to match the expected structure. Variable order is especially important for the database engines/drivers that ignore the variable name, and use only the variable position. We use the term slim because typically this output has fewer variables than the full dataset processed by the file. If you doubt the variable will be needed downstream, leave it in the dplyr::select(), but commented out. If someone needs it in the future, theyll easily determine where it might come from, and then uncomment the line (and possibly modify the database table). Once you import a column into a warehouse that multiple people are using, it can be tough to remove without breaking their code. This chunk follows verify-values because sometimes you want to check the validity of variables that are not consumed downstream. These variables are not important themselves, but an illegal value may reveal a larger problem with the dataset. # Print colnames that `dplyr::select()` should contain below: # cat(paste0(&quot; &quot;, colnames(ds), collapse=&quot;,\\n&quot;)) # Define the subset of columns that will be needed in the analyses. # The fewer columns that are exported, the fewer things that can break downstream. ds_slim &lt;- ds %&gt;% # dplyr::slice(1:100) %&gt;% dplyr::select( subject_id, county_id, gender_id, race, ethnicity ) ds_slim 4.10 Save to Disk or Database 4.11 Additional Resources (Colin Gillespie 2017), particularly the Efficient input/output chapter. H References "],["file-prototype-sql.html", "Chapter 5 Prototypical SQL File 5.1 Choice of Database Engine 5.2 Ferry 5.3 Default Databases 5.4 Declare Values Databases 5.5 Recreate Table 5.6 Truncate Table 5.7 INSERT INTO 5.8 SELECT 5.9 FROM 5.10 WHERE 5.11 ORDER BY 5.12 Indexing", " Chapter 5 Prototypical SQL File New data scientists typically import entire tables from a database into R, and then merge, filter, and groom the data.frames. A more efficient approach is to submit sql that executes on the database and returns a more specialized dataset. This provides several advantages: A database will be much more efficient when filtering and joining tables than any programing language, such as R or Python. A well-designed database will have indexed columns and other optimizations that surpass R and Python capabilities. A database handles datasets that are thousands of times larger than what R and Python can accommodate in RAM. For large datasets, database engines persist the data on a hard drive (instead of just RAM) and are optimized to read the necessary information into RAM the moment before it is needed, and then return the processed back to disk before progressing to the next block of data. Frequently, only a portion of the tables rows and columns are ultimately needed by the analysis. Reducing the size of the dataset leaving the database has two benefits: less information travels across the network and Rs and Pythons limited memory space is conserved. In some scenarios, it is desirable to use the INSERT SQL command to transfer data within the database; and never travel across the network and never touch R or your local machine. For our large and complicated projects, the majority of data movement uses INSERT commands within SQL files. Among these scenarios, the analysis-focused projects use R to call the sequence of SQL files (see flow.R), while the database-focused project uss SSIS. In both cases, we try to write the SQL files to conform to similar standards and conventions. As stated in Consistency across Files (and in the previous chapter), using a consistent file structure can (a) improve the quality of the code because the structure has been proven over time to facilitate good practices and (b) allow your intentions to be more clear to teammates because they are familiar with the order and intentions of the chunks. 5.1 Choice of Database Engine The major relational database engines use roughly the same syntax, but they all have slight deviations and enhancements beyond the SQL standards. Most of our databases are hosted by SQL Server, since that is what OUHSCs campus seems most comfortable supporting. Consequently, this chapter uses SQL Server 2017+ syntax. But like most data science teams, we still need to consume other databases, such as Oracle and MySQL. Outside OUHSC projects, we tend to use PostgreSQL and Redshift. 5.2 Ferry This basic sql file moves data within a database to create a table named dx, which is contained in the ley_covid_1 schema of the cdw_staging database. --use cdw_staging declare @start_date date = &#39;2020-02-01&#39;; -- sync with config.yml declare @stop_date date = dateadd(day, -1, cast(getdate() as date)); -- sync with config.yml DROP TABLE if exists ley_covid_1.dx; CREATE TABLE ley_covid_1.dx( dx_id int identity(1, 1) primary key, patient_id int not null, covid_confirmed bit not null, problem_date date null, icd10_code varchar(20) not null ); -- TRUNCATE TABLE ley_covid_1.dx; INSERT INTO ley_covid_1.dx SELECT pr.patient_id ,ss.covid_confirmed ,pr.invoice_date as problem_date ,pr.code as icd10_code -- into ley_covid_1.dx FROM cdw.star_1.fact_problem as pr inner join beasley_covid_1.ss_dx as ss on pr.code = ss.icd10_code WHERE pr.problem_date_start between @start_date and @stop_date and pr.patient_id is not null ORDER BY pr.patient_id, pr.problem_date_start desc CREATE INDEX ley_covid_1_dx_patient_id on ley_covid_1.dx (patient_id); CREATE INDEX ley_covid_1_dx_icd10_code on ley_covid_1.dx (icd10_code); 5.3 Default Databases We prefer not to specify the database of each table, and instead control it through the connection (such as the DSNs default database value). Nevertheless, its helpful to include the default database behind a comment for two reasons. First, it communicates to the default database to the human reader. Second, during debugging, the code can be highlighted in ADS/SSMS and executed with F5; this will mimic what happens when the file is run via automation with a DSN. --use cdw_staging 5.4 Declare Values Databases Similar to the Declare Globals chunk in a prototypical R file, values set at the top of the file are easy to read and modify. declare @start_date date = &#39;2020-02-01&#39;; -- sync with config.yml declare @stop_date date = dateadd(day, -1, cast(getdate() as date)); -- sync with config.yml 5.5 Recreate Table When batch-loading data, it is typically easiest drop and recreate a database table. In the snippet below, any table with the specific name is dropped/deleted from the database and replaced with a (possibly new) definition. We like to dedicate a line to each table column, with at least three elements per line: the name, the data type, and if nulls are allowed. Many other features and keywords are available when designing tables. The ones we occasionally use are: primary key helps database optimization when later querying the table, and enforces uniqueness, such as a patient table should not have any two rows with the same patient_id value. Primary keys must be nonmissing, so the not null keyword is redundant. unique is helpful when a table has additional columns that need to be unique (such as patient_ssn and patient_id). A more advanced scenario using a clustered columnar table, which is incompatible with the primary key designation. identity(1, 1) creates a 1, 2, 3,  sequence, which relieves the client of creating the sequence with something like row_number(). Note that when identity column exists, the number columns in the SELECT clause will be one fewer than the columns defined in CREATE TABLE. DROP TABLE if exists ley_covid_1.dx; CREATE TABLE ley_covid_1.dx( dx_id int identity(1, 1) primary key, patient_id int not null, covid_confirmed bit not null, problem_date date null, icd10_code varchar(20) not null ); To jump-start the creation of the table definition, we frequently use the INTO clause. This operation creates a new table, informed the column properties of the source tables. Within ADS and SSMS, refresh the list of tables and select the new table; there will be an option to copy the CREATE TABLE statement (similar to the snippet above) and paste it into the sql file. The definition can then be modified, such as tightening from null to not null. -- into ley_covid_1.dx 5.6 Truncate Table In scenarios where the table definition is stable and the data is refreshed frequently (say, daily), consider TRUNCATE-ing the table. When taking this approach, we prefer to keep the DROP and CREATE code in the file, but commented out. This saves development time in the future if the table definition needs to be modified. -- TRUNCATE TABLE ley_covid_1.dx; 5.7 INSERT INTO The INSERT INTO (when followed by a SELECT clause), simply moves data from the query into the specified table. The INSERT INTO clause transfers the columns in the exact order of the query. It does not try to match to the names of the destination table. An error will be thrown if the column types are mismatched (e.g., attempting to insert a character string into an integer value). Even worse, no error will be thrown if the mismatched columns have compatible types. This will occur if the tables columns are patient_id, weight_kg, and height_cm, but the querys columns are patient_id, height_cm, and weight_in. Not only will the weight and height be written to the incorrect columns, but the execution will not catch that the source is weight_kg, but the destination is weight_in. INSERT INTO ley_covid_1.dx 5.8 SELECT The SELECT clause specifies the desired columns. It can also rename columns and perform manipulations. We prefer to specify the aliased table of each column. If two source tables have the same column name, an error will be thrown regarding the ambiguity. Even if thats not a concern, we believe that explicitly specifying the source improves readability and reduces errors. SELECT pr.patient_id ,ss.covid_confirmed ,cast(pr.invoice_datetime as date) as problem_date ,pr.code as icd10_code 5.9 FROM FROM cdw.star_1.fact_problem as pr inner join beasley_covid_1.ss_dx as ss on pr.code = ss.icd10_code 5.10 WHERE The WHERE clause reduces the number of returned rows (as opposed to reducing the number of columns in the SELECT clause). Use the indention level to communicate to reader how the subclauses are combined. This is especially important if it both AND and OR operators are used, since their order of operations can be confused easily. WHERE pr.problem_date_start between @start_date and @stop_date and pr.patient_id is not null 5.11 ORDER BY The ORDER BY clause simply specifies the order of the rows. Be default, a columns values will be in ascending order, but can be descending if desired. ORDER BY pr.patient_id, pr.problem_date_start desc 5.12 Indexing If the table is large or queried in a variety of ways, indexing the table can speed up performance dramatically. CREATE INDEX ley_covid_1_dx_patient_id on ley_covid_1.dx (patient_id); CREATE INDEX ley_covid_1_dx_icd10_code on ley_covid_1.dx (icd10_code); "],["repo-prototype.html", "Chapter 6 Prototypical Repository 6.1 Root 6.2 Analysis 6.3 Data Public 6.4 Data Unshared 6.5 Documentation 6.6 Manipulation 6.7 Stitched Output 6.8 Utility", " Chapter 6 Prototypical Repository https://github.com/wibeasley/RAnalysisSkeleton 6.1 Root 6.1.1 config.R The configuration file is simply a plain-text yaml file read by the config package. It is great when a value has to be coordinated across multiple files. Also see the discussion of how we use the config file for excluding bad data values and of how the config file relates to yaml, json, and xml. default: # To be processed by Ellis lanes path_subject_1_raw: &quot;data-public/raw/subject-1.csv&quot; path_mlm_1_raw: &quot;data-public/raw/mlm-1.csv&quot; # Central Database (produced by Ellis lanes). path_database: &quot;data-public/derived/db.sqlite3&quot; # Analysis-ready datasets (produced by scribes &amp; consumed by analyses). path_mlm_1_derived: &quot;data-public/derived/mlm-1.rds&quot; # Metadata path_annotation: &quot;data-public/metadata/cqi-annotation.csv&quot; # Logging errors and messages from automated execution. path_log_flow: !expr strftime(Sys.time(), &quot;data-unshared/log/flow-%Y-%m-%d--%H-%M-%S.log&quot;) # time_zone_local : &quot;America/Chicago&quot; # Force local time, in case remotely run. # ---- Validation Ranges &amp; Patterns ---- range_record_id : !expr c(1L, 999999L) range_dob : !expr c(as.Date(&quot;2010-01-01&quot;), Sys.Date() + lubridate::days(1)) range_datetime_entry : !expr c(as.POSIXct(&quot;2019-01-01&quot;, tz=&quot;America/Chicago&quot;), Sys.time()) max_age : 25 pattern_mrn : &quot;^E\\\\d{9}$&quot; # An &#39;E&#39;, followed by 9 digits. 6.1.2 flow.R The workflow of the repo is determined by flow.R. It calls (typically R and SQL) files in a specific order, while sending the log messages to a file. See automation mediators for more details. 6.1.3 README.md 6.1.4 *.Rproj The Rproj file stores project-wide settings used by the RStudio IDE, such how trailing whitespaces are handled. The files major benefit is that it sets the R sessions working directory, which facilitates good discipline about setting a constant location for all files in the repo. Although the plain-text file can be edited directly, we recommend using RStudios dialog box. There is good documentation about Rproj settings. If you are unsure, copy this file to the repos root directory and rename it to match the repo exactly. 6.2 Analysis 6.3 Data Public Raw Derived Metadata Database Original 6.4 Data Unshared 6.5 Documentation 6.6 Manipulation 6.7 Stitched Output 6.8 Utility "],["rest.html", "Chapter 7 Data at Rest 7.1 Data States 7.2 Data Containers 7.3 Storage Conventions", " Chapter 7 Data at Rest 7.1 Data States Raw Derived Project-wide File on Repo Project-wide File on Protected File Server User-specific File on Protected File Server Project-wide Database Original 7.2 Data Containers 7.2.1 csv When exchanging data between two different systems, the preferred format is frequently plain text, where each cell in a record is separated by a comma. This is commonly called a csv a comma separated value file. As opposed to proprietary formats like xlsx or sas7bdat, a csv file is easily opened and parsable by most statistical software, and even conventional text editors and GitHub. 7.2.2 rds 7.2.3 yaml, json, and xml yaml, json, and xml are three plain-text hierarchical formats commonly used when the data structure cannot be naturally represented by a rectangle or a set of rectangles (and therefore it is not a good fit for csv or rds). If you are unsure where to start with a nested dataset, see tidyrs Rectangling vignette. In the same way we advocate for the simplest recoding function that is adequate for the task, we prefer yaml over json, and json over xml. Yaml accommodates most, but not all our needs. Initially it may be tricky to correctly use whitespacing to specify the correct nesting structure in yaml, but once you are familar, the file is easy to read and edit, and the Git diffs can be quickly reviewed. The yaml package reads a yaml file, and returns a (nested) R list; it can also convert an R list into a yaml file. The config package wraps the yaml package to fill a common need: retrieving repository configuration information from a yaml file. We recommend using the config package when it fits. In some ways its functionality is a simplification of the yaml package, but it is an extension in other ways. For example, when a value follows !expr, R will evaluate the expression. We commonly specify the allowable ranges for variables in config.yml range_dob : !expr c(as.Date(&quot;2010-01-01&quot;), Sys.Date() + lubridate::days(1)) See the discussion of the config.yml in our prototypical repository, as well. 7.2.4 Arrow Apache Arrow is an open source specification that is developed to work with many languages such as R, Spark, Python, and many others. It accommodates nice rectangles where CSVs are used, and hierarchical nesting where json and xml are used. It is both an in-memory specification (which allows a Python process to directly access an R object), and an on-disk specification (which allows a Python process to read a saved R file). The file format is compressed, so it takes much less space to store on disk and less time to transfer over a network. Its downside is the file is not plain-text, but binary. That means the file is not readable and editable by as many programs, which hurts your projects portability. You wouldnt want to store most metadata files as arrow because then your collaborators couldnt easily help you map the values to qqq 7.2.5 SQLite 7.2.6 Central Enterprise database 7.2.7 Central REDCap database 7.2.8 Containers to avoid 7.2.8.1 Spreadsheets Try not to receive data in Excel files. We think Excel can be useful for light brainstorming and prototyping equations but is should not be trusted to transport serious information. Other spreadsheet software like LibreOffice Calc is less problematic in our experience, but still less desirable than the formats mentioned above. If you receive a csv and open it in a typical spreadsheet program, we strongly recommend to you do not save it, because of the potential for mangling values. After you close the spreadsheet, review the Git commits to verify no values were corrupted. See the appendix for a list of the ways your analyses can be undermined when receiving Excel files, as well as a template to correspond with your less-experienced colleagues that is sending your team Excel files. 7.2.8.2 Proprietary Proprietary formats like SASs sas7bdat are less accessible to people without the current expensive software licenses. Therefore distributing proprietary file formats hurts reproducibility and decreases your projects impact. On the other hand, using proprietary formats may be advantageous when you need to conceal the projects failure. We formerly distributed sas7bdat files to supplement (otherwise identical) csvs, in order to cater to the suprisingly large population of SAS users who were unfamiliar with proc import or the Google search engine. Recently we have distributed only the csvs, with example code for reading the file from SAS. 7.3 Storage Conventions 7.3.1 All Sources Across all file formats, these conventions usually work best. consistency across versions: use a script to produce the dataset, and inform the recipient if the datasets structure changes. Most of our processes are automated, and changes that are trivial to humans (e.g., yyyy-mm-dd to mm/dd-yy) will break the automation. The specificity in our automation is intentional. We install guards on our processes so that bad values do not pass. For instance, we may place bounds on the toddlers age at 12 and 36 months. We want our automation to break if the next dataset contains age values between 1 and 3 (years). Our downstream analysis (say, a regression model where age is a predictor variable) would produce misleading results if the shift between months and years went undetected. date format: specify as YYYY-MM-DD (ISO-8601) time format: specify as HH:MM or HH:MM:SS, preferably in 24-hour time. Use a leading zero from midnight to 9:59am, with a colon separating hours, minutes, and seconds (i.e., 09:59) patient names: separate the name_last, name_first, and name_middle as three distinct variables when possible. currency: represent money as an integer or floating-point variable. This representation is more easily parsable by software, and enables mathematical operations (like max() or mean()) to be performed directly. Avoid commas and symbols like $. If there is a possibility of ambiguity, indicate the denomination in the variable name (e.g., payment_dollars or payment_euros). 7.3.2 Text These conventions usually work best within plain-text formats. csv: comma separated values are the most common plain-text format, so they have better support than similar formats where cells are separated by tabs or semi-colons. However, if you are receiving a well-behaved file separated by these characters, be thankful and go with the flow. cells enclosed in quotes: a cell should be enclosed in double quotes, especially if its a string/character variable. 7.3.3 Excel As discussed above avoid Excel. When that is not possible, these conventions helps reduce ambiguity and corrupted values. See the appendix for our preferred approach to reading Excel files. avoid multiple tabs/worksheets: Excel files containing multiple worksheets are more complicated to read with automation, and the produces the opportunities for inconsistent variables across tabs/worksheets. save the cells as text: avoiding Excel attempting to save cells as dates or numbers. Admitedly, this is a last-ditch effort. If someone is using Excel to convert cells to text, the values are probably already corrupted. 7.3.4 Meditech patient identifier: mrn_meditech instead of mrn, MRN Rec#, or Med Rec#. account/admission identifier: account_number instead of mrn, Acct#, or Account#. patients full name: name_full instead of Patient Name or Name. long/tall format: one row per dx per patient (up to 50 dxs) instead of 50 columns of dx per patient. Applies to diagnosis code &amp; description order date &amp; number procedure name &amp; number Meditech Idiosyncracies: blood pressure: in most systems the bp_diastolic and bp_systolic values are stored in separate integer variables. In Meditech, they are stored in a single character variable, separated by a forward slash. 7.3.5 Databases When exchanging data between two different systems,  "],["patterns.html", "Chapter 8 Patterns 8.1 Ellis 8.2 Arch 8.3 Ferry 8.4 Scribe 8.5 Analysis 8.6 Presentation -Static 8.7 Presentation -Interactive 8.8 Metadata", " Chapter 8 Patterns 8.1 Ellis 8.1.1 Purpose To incorporate outside data source into your system safely. 8.1.2 Philosophy Without data immigration, all warehouses are useless. Embrace the power of fresh information in a way that is: repeatable when the data source is updated (and you have to refresh your warehouse) similar to other Ellis lanes (that are designed for other data sources) so you dont have to learn/remember an entirely new pattern. (Like Rubiks cube instructions.) 8.1.3 Guidelines Take small bites. Like all software development, dont tackle all the complexity the first time. Start by processing only the important columns before incorporating move. Use only the variables you need in the short-term, especially for new projects. As everyone knows, the variables from the upstream source can change. Dont spend effort writing code for variables you wont need for a few months/years; theyll likely change before you need them. After a row passes through the verify-values chunk, youre accountable for any failures it causes in your warehouse. All analysts know that external data is messy, so dont be surprised. Sometimes Ill spend an hour writing an Ellis for 6 columns. Narrowly define each Ellis lane. One code file should strive to (a) consume only one CSV and (b) produce only one table. Exceptions include: if multiple input files are related, and really belong together (e.g., one CSV per month, or one CSV per clinic). This scenario is pretty common. if the CSV should legitimately produce two different tables after munging. This happens infrequently, such as one warehouse table needs to be wide, and another long. 8.1.4 Examples https://github.com/wibeasley/RAnalysisSkeleton/blob/master/manipulation/te-ellis.R https://github.com/wibeasley/RAnalysisSkeleton/blob/master/manipulation/ https://github.com/OuhscBbmc/usnavy-billets/blob/master/manipulation/survey-ellis.R 8.1.5 Elements Clear memory In scripting languages like R (unlike compiled languages like Java), its easy for old variables to hang around. Explicitly clear them before you run the file again. rm(list=ls(all=TRUE)) #Clear the memory of variables from previous run. This is not called by knitr, because it&#39;s above the first chunk. Load Sources In R, a source()d file is run to execute its code. We prefer that a sourced file only load variables (like function definitions), instead of do real operations like read a dataset or perform a calculation. There are many times that you want a function to be available to multiple files in a repo; there are two approaches we like. The first is collecting those common functions into a single file (and then sourcing it in the callers). The second is to make the repo a legitimate R package. The first approach is better suited for quick &amp; easy development. The second allows you to add documentation and unit tests. # ---- load-sources ------------------------------------------------------------ source(&quot;./manipulation/osdh/ellis/common-ellis.R&quot;) Load Packages This is another precaution necessary in a scripting language. Determine if the necessary packages are available on the machine. Avoiding attaching packages (with the library() function) when possible. Their functions dont need to be qualified (e.g., dplyr::intersect()) and could cause naming conflicts. Even if you can guarantee they dont conflict with packages now, packages could add new functions in the future that do conflict. # ---- load-packages ----------------------------------------------------------- # Attach these package(s) so their functions don&#39;t need to be qualified: http://r-pkgs.had.co.nz/namespace.html#search-path library(magrittr , quietly=TRUE) library(DBI , quietly=TRUE) # Verify these packages are available on the machine, but their functions need to be qualified: http://r-pkgs.had.co.nz/namespace.html#search-path requireNamespace(&quot;readr&quot; ) requireNamespace(&quot;tidyr&quot; ) requireNamespace(&quot;dplyr&quot; ) # Avoid attaching dplyr, b/c its function names conflict with a lot of packages (esp base, stats, and plyr). requireNamespace(&quot;testit&quot;) requireNamespace(&quot;checkmate&quot;) requireNamespace(&quot;OuhscMunge&quot;) # remotes::install_github(repo=&quot;OuhscBbmc/OuhscMunge&quot;) Declare Global Variables and Functions. This includes defining the expected column names and types of the data sources; use readr::cols_only() (as opposed to readr::cols()) to ignore any new columns that may be been added since the datasets last refresh. # ---- declare-globals --------------------------------------------------------- Load Data Source(s) See load-data chunk described in the prototypical file. # ---- load-data --------------------------------------------------------------- Tweak Data See tweak-data chunk described in the prototypical file. # ---- tweak-data -------------------------------------------------------------- Body of the Ellis Verify Specify Columns See specify-columns-to-upload chunk described in the prototypical file. # ---- specify-columns-to-upload ----------------------------------------------- Welcome into your warehouse. Until this chunk, nothing should be persisted. # ---- save-to-db -------------------------------------------------------------- # ---- save-to-disk ------------------------------------------------------------ 8.2 Arch 8.3 Ferry 8.4 Scribe 8.5 Analysis 8.6 Presentation -Static 8.7 Presentation -Interactive 8.8 Metadata Survey items can change across time (for justified and unjustified reasons). We prefer to dedicate a metadata csv to a single variable https://github.com/LiveOak/vasquez-mexican-census-1/issues/17#issuecomment-567254695 relationship_id code_2011 code_2016 relationship display_order description_2011 description_2016 1 1 1 Jefe(a) 1 Jefe(a) Jefe(a) 2 2 2 Esposo(a) o compañero(a) 2 Esposo(a) o compañero(a) Esposo(a) o compañero(a) 3 3 3 Hijo(a) 3 Hijo(a) Hijo(a) 4 4 4 Nieto(a) 4 Nieto(a) Nieto(a) 5 5 5 Yerno/nuera 5 Yerno/nuera Yerno/nuera 6 6 6 Hermano(a) 6 Hermano(a) Hermano(a) 7 7 NA Sobrino(a) 7 Sobrino(a) NA 8 8 NA Padre o madre 8 Padre o madre NA 9 9 NA Suegro(a) 9 Suegro(a) NA 10 10 NA Cuñado(a) 10 Cuñado(a) Cuñado(a) 11 11 7 Otros parientes 11 Otros parientes Otros parientes 12 12 8 No parientes 12 No parientes No parientes 13 13 9 Empleado(a) doméstico(a) 13 Empleado(a) doméstico(a) Empleado(a) doméstico(a) 99 99 NA No especificado 99 No especificado NA 8.8.1 Primary Rules for Mapping A few important rules are necessary to map concepts in this multidimensional space. each variable gets its own csv, such as relationship.csv (show above), education.csv, living-status.csv, or race.csv. Its easiest if this file name matches the variable. each variable also needs a unique integer that identifies the underlying level in the database, such as education_id, living_status_id, and relationship_id. each survey wave gets its own column within the csv, such as code_2011 and code_2016. each level within a variable-wave gets its own row, like Jefe, Esposo, and Hijo. 8.8.2 Secondary Rules for Mapping In this scenarios, the first three columns are critical (i.e., relationship_id, code_2011, code_2016). Yet these additional guidelines will help the plumbing and manipulation of lookup variables. each variable also needs a unique name that identifies the underlying level for human, such as education, living_status, and relationship. This is the human label corresponding to relationship_id. Its easiest if this column name matches the variable. each survey wave gets its own column within the csv, such as description_2011 and description_2016. These are the human labels corresponding to variables like code_2011 and code_2016. each variable benefits from a unique display order value, that will be used later in analyses. Categorical variables typically have some desired sequence in graph legends and tables; specify that order here. This helps define the factor levels in R or the pandas.Categorical levels in Python. Mappings are usually informed by outside documentation. For transparency and maintainability, clearly describe where the documentation can be found. One option is to include it in data-public/metadata/README.md. Another option is to include it at the bottonm of the csv, preceded by a #, or some comment character that can keep the csv-parser from treating the notes like data it needs to squeeze into cells. Notes for this example are: # Notes,,,,,, # 2016 codes come from `documentation/2106/fd_endireh2016_dbf.pdf`, pages 14-15,,,,, # 2011 codes come from `documentation/2011/fd_endireh11.xls`, TSDem tab,,,,, sometimes a notes column helps humans keep things straight, especially researchers new to the field/project. In the example above, the notes value in the first row might be jefe means head, not boss. "],["security.html", "Chapter 9 Security &amp; Private Data 9.1 Security Guidelines 9.2 Dataset-level Redaction 9.3 Security for Data at Rest 9.4 File-level permissions 9.5 Database permissions 9.6 Public &amp; Private Repositories", " Chapter 9 Security &amp; Private Data Overview {Include a few paragraphs that describe principles and mentality, and how the following sections contribute.} The reports dataset(s) are preferably stored in REDCap or SQL Server. Theyre absolutely not stored not on GitHub or the local machine. Avoid Microsoft Access, Excel, CSVs, or anything without user accounts. If the PHI must be stored as a loose file (eg, CSV), keep it on the encrypted file server. Any PHI on a fileserver should be stored in a directory controlled by a fairly restrictive Windows AD group. Only ~4 people on a project probably need access to those files, not all ~20 people on a project. There are many benefits of SQL Server over CSVs or Excel files . Its protected by Odyssey (not just the VPN). It provides auditing logs. It provides schemas to further partition authorization. Real databases arent accidentally emailed or copied to an unsecured location. Transfer PHI into REDCap &amp; SQL Server as early as possible (particularly the CSVs &amp; XLSXs we regularly receive from partners). Temporary and derivative datasets are stored in SQL Server, not as a CSV on the fileserver. 9.1 Security Guidelines If you encounter a decision thats not described by this chapters the security practices, follow these underlying concepts. And of course, consult other people. Principle of least privilege: expose as little as possible. Limit the number of team members. Limit the amount of data (consider rows &amp; columns). Obfuscate values and remove unnecessary PHI in derivative datasets. Redundant layers of protection. A single point of failure shouldnt be enough to breach PHI security. Simplicity when possible. Store data in only two houses (eg, REDCap &amp; SQL Server). Easier to identify &amp; manage than a bunch of PHI CSVs scattered across a dozen folders, with versions. Manipulate your data programmatically, not manually. Your Windows AD account controls everything, indirectly or directly: VPN, Odyssey, file server, SQL, REDCap, &amp; REDCap API. Lock out team members when possible. Its not that you dont trust them with a lot of unnecessary data, its that you dont trust their ex-boyfriends and their coffee shop hackers. 9.2 Dataset-level Redaction Several multi-layered strategies exist to prevent exposing PHI. One approach is simply to reduce the information contained in each variable. Much of the information in a medical record is not useful for modeling or descriptive statistics, and therefore can be omitted from downstream datasets. The techniques include: Remove the variable: An empty bucket has nothing to leak. Decrease the resolution: Many times, a patients year of birth is adequate for analysis, and include the month and day are unnecessary risks. Hash and salt identifiers: use cryptographic-quality algorithms transform an ID to a derived value. For example, 234 becomes 1432c1a399. The original value of 234 is not recoverable from 1432c1a399. But two rows with 1432c1a399 are still attributed to the same patient by the statistical model. 9.3 Security for Data at Rest The reports dataset(s) are preferably stored in REDCap or SQL Server. Theyre absolutely not stored not on GitHub or the local machine. Avoid Microsoft Access, Excel, CSVs, or anything without user accounts. If the PHI must be stored as a loose file (eg, CSV), keep it on the encrypted file server. Any PHI on a fileserver should be stored in a directory controlled by a fairly restrictive Windows AD group. Only ~4 people on a project probably need access to those files, not all ~20 people on a project. There are many benefits of SQL Server over CSVs or Excel files . Its protected by Odyssey (not just the VPN). It provides auditing logs. It provides schemas to further partition authorization. Real databases arent accidentally emailed or copied to an unsecured location. Transfer PHI into REDCap &amp; SQL Server as early as possible (particularly the CSVs &amp; XLSXs we regularly receive from partners). Temporary and derivative datasets are stored in SQL Server, not as a CSV on the fileserver. Hash values when possible. For instance, when we determine families/networks of people, we use things like SSNs. But the algorithm that identifies the clusters doesnt need to know the actual SSN, just that two records have the same SSN. Something like a SHA-256 hash is good for this. The algorithm can operate on the hashed SSN just as effectively as the real SSN. However the original SSN cant be determined from its hashed value. If the table is accidentally exposed to the public, no PHI is compromised. The following two files help the hashing &amp; salting process: HashUtility.R and CreateSalt.R. 9.4 File-level permissions 9.5 Database permissions 9.6 Public &amp; Private Repositories 9.6.1 Repo Rules A code repository should be private, and restricted to only the necessary project members. The repo should be controled by an OUHSC organization, and not by an individuals private account. The .gitignore file prohibits common data file formats from being pushed/uploaded to the central repository. Examples: accdb, mdb, xlsx, csv, sas7bdat, rdata, RHistory. If you have a text file without PHI that must be on GitHub, create a new extension for it like *.PhiFree. Or you can include a specific exception to the .gitignore file, but adding an exclamation point in front of the file, such as !RecruitmentProductivity/RecruitingZones/ZipcodesToZone.csv. An example is included in the current repositorys [.gitignore file(https://github.com/OuhscBbmc/RedcapExamplesAndPatterns/blob/master/.gitignore). 9.6.2 Scrubbing GitHub history Occasionally files may be committed to your git repository that need to be removed completely. Not just from the current collections of files (i.e., the branchs head), but from the entire history of the repo. Scrubbing is require typically when (a) a sensitive file has been accidentally committed and pushed to GitHub, or (b) a huge file has bloated your repository and disrupted productivity. The two suitable scrubbing approaches both require the command line. The first is the git-filter-branch command within git, and the second is the BFG repo-cleaner. We use the second approach, which is [recommended by GitHub]; it requires 15 minutes to install and configure from scratch, but then is much easier to develop against, and executes much faster. The bash-centric steps below remove any files from the repo history called monster-data.csv from the bloated repository. If the file contains passwords, change them immediately. Delete monster-data.csv from your branch and push the commit to GitHub. Ask your collaborators to push any outstanding commits to GitHub and delete their local copy of the repo. Once scrubbing is complete, they will re-clone it. Download and install the most recent Java JRE from the Oracle site. Download the most recent jar file from the BFG site to the home directory. Clone a fresh copy of the repository in the users home directory. The --mirror argument avoids downloading every file, and downloads only the bookkeeping details required for scrubbing. cd ~ git clone --mirror https://github.com/your-org/bloated.git Remove all files (in any directory) called monster-data.csv. java -jar bfg-*.jar --delete-files monster-data.csv bloated.git Reflog and garbage collect the repo. cd bloated.git git reflog expire --expire=now --all &amp;&amp; git gc --prune=now --aggressive Push your local changes to the GitHub server. git push Delete the bfg jar from the home directory. cd ~ rm bfg-*.jar Ask your collaborators to re-clone the repo to their local machine. It is important they restart with a fresh copy, so the once-scrubbed file is not reintroduced into the repos history. If the file contains sensitive information, like passwords or PHI, ask GitHub to refresh the cache so the files history isnt accessible through their website, even if the repo is private. 9.6.2.0.1 Resources BFG Repo-Cleaner site Additional BFG instructions GitHub Sensitive Data Removal Policy "],["automation.html", "Chapter 10 Automation &amp; Reproducibility 10.1 Mediator 10.2 Scheduling 10.3 Auxiliary Issues", " Chapter 10 Automation &amp; Reproducibility Automation is an important prerequisite of reproducibility. 10.1 Mediator A nontrivial project usually has multiple stages in its pipeline. Instead of a human deciding when to execute which piece, a single file should execute the pieces. The single file makes the project more portable, and also clearly documents the process. This single file is a special cases of the mediator pattern, in the sense that it defines how each piece relates to each other. 10.1.1 Flow File in R {Describe https://github.com/wibeasley/RAnalysisSkeleton/blob/master/flow.R.} See also the prototypical repo. 10.1.2 Makefile {Briefly describe this language, how it can be more efficient, and what additional obstacles it presents.} 10.1.3 SSIS {Describe SSIS package development.} 10.2 Scheduling 10.2.1 cron cron is the common choice when scheduling tasks on Linux. A plain text file specifies which file to run, and on what recurring schedule. A lot of helpful documentation and tutorials exists, as well as sites that help construct and validate your entries like crontab guru. 10.2.2 Task Scheduler Windows Task Scheduler is the common choice when scheduling tasks on Windows. Many of the GUI options are easy to specify, but three are error-prone, and must be specified carefully. The exist under Actions | Start a program. Program/script: is the absolute path to Rscript.exe. It needs to be updated every time you upgrade R (unless youre doing something tricky with the PATH environmental OS variable). Notice we are using the patched version of R. The entry should be enclosed in quotes. &quot;C:\\Program Files\\R\\R-3.6.2patched\\bin\\Rscript.exe&quot; Add arguments (optional): specifies the flow file to run. In this case, the repo butcher-hearing-screen-1 is under in the Documents/cdw/` directory; the flow file is located in the repos root directory, as discussed in the prototypical repo. The entry should be enclosed in quotes. &quot;C:\\Users\\wbeasley\\Documents\\cdw\\butcher-hearing-screen-1\\flow.R&quot; Start in (optional): sets the working directory. If not properly set, the relative paths of the files will not point to the correct locations. It should be identical to the entry above, but (a) does not include /flow.R and (b) does NOT contains quotes. C:\\Users\\wbeasley\\Documents\\cdw\\butcher-hearing-screen-1 Other options we typically specify are: Run whether the user is logged in or not. Run as the highest available version of Windows. Wake the computer to run this task is probably necessary if this is located on a normal desktop. It is not something we specify, because our tasks are located on a VM-based workstation that is never turned off. Following these instructions, you are required to enter your password every time you modify the task, and every time you update your password. If you are using network credentials, you probably should specify your account like domain/username. Be careful: when you modify a task and are prompted for a password, the GUI subtly alters the account entry to just username (instead of domain/username). Make sure you prepend the username with the domain, as you enter the password. 10.2.3 SQL Server Agent SQL Server Agent executes jobs on a specified schedule. It also naturally interfaces with SSIS packages deployed to the server, but can also execute other formats, like a plain sql file. An important distinction is that it runs as a service on the database server, as opposed to Task Scheduler, which runs as a service on the client machine. We prefer running jobs on the server when the job either: requires elevated/administrative privileges (for instance, to access sensitive data), would require a lot of network constraints when passing large amounts of data between the server and client, or feels like it is the servers responsibility, such as rebuilding a database index, or archiving server logs. 10.3 Auxiliary Issues The following subsections do not execute or schedule any code, but should be considered. 10.3.1 Sink Log Files {Describe how to sink output to a file that can be examined easily.} 10.3.2 Package Versions When a project runs repeatedly on a schedule without human intervention, errors can easily go undetected in simple systems. And when they are, the error messages may not be as clear as when you are running the procedure in RStudio. For these and other reasons, plan your strategy for maintaining the version of R and its packages. Here are some approaches, with different tradeoffs. For most conventional projects, we keep all packages up to date, and live with the occasional breaks. We stick to a practice of (a) run our daily workflow, (b) update the packages (and R &amp; RStudio if necessary), (c) rereun that same weeks workflow, and finally (d) verify that the results from a &amp; c are the same. If something is different, we have a day to adapt the pipeline code to the breaking changes in the packages. Before updating a package, read the NEWS file for changes that are not backwards-compatible (commonly called breaking changes in the news file). If the changes to the pipeline code are too difficult to complete in a day, we can roll back to a previous version with remotes::install_version(). On the other side of the spectrum, you can meticulously specify the desired version of each R package. This approach reduces the chance of a new version of a package breaking existing pipeline code. We recommend this approach when uptime is very important. The most intuitive implementation is to install with explicit code in a file like utility/install-dependencies.R: remotes::install_version(&quot;dplyr&quot; , version = &quot;0.4.3&quot; ) remotes::install_version(&quot;ggplot2&quot; , version = &quot;2.0.0&quot; ) remotes::install_version(&quot;data.table&quot;, version = &quot;1.10.4&quot;) remotes::install_version(&quot;lubridate&quot; , version = &quot;1.6.0&quot; ) remotes::install_version(&quot;openxlsx&quot; , version = &quot;4.0.17&quot;) # ... package list continues ... Another implementation is to convert the repo to a package itself, and specify the versions in the DESCRIPTION file. Imports: dplyr (== 0.4.3 ) ggplot2 (== 2.0.0 ) data.table (== 1.10.4) lubridate (== 1.6.0 ) openxlsx (== 4.0.17) A downside is that it can be difficult to set up a identical machine in a few months. Sometimes these packages have depend on packages that are incompatible with other package versions. For example, at one point, the current version of dplyr was 0.4.3. A few months later, the rlang package (which wasnt explicitly specified in the list of 42 packages) required at least version 0.8.0 of dplyr. The developer on the new machine needs to decide whether to upgrade dplyr (and test for breaking changes in the pipeline) or to install an older version of rlang. A second important downside is that this approach can lock all the users projects to specific outdated package version. We and others5 advocate this approach when your team is experienced with only R, and has a machine dedicated to an important line-of-business workflow. When uptime is important and your team is experienced with other languages like Java, Python, or C#, consider if those would be better suited. A compromise between these two previous approaches in the renv package - R Environmentals. It is a successor to packrat. It requires some learning and cognitive overhead. But this investment becomes very appealing if (a) you were running hourly predictions and downtime is a big deal, or (b) your machine contains multiple projects that require different versions of the same package (such as dplyr 0.4.3 and dplyr 0.8.0). Chris Modzelewski "],["scaling-up.html", "Chapter 11 Scaling Up 11.1 Data Storage 11.2 Data Processing", " Chapter 11 Scaling Up 11.1 Data Storage Local File vs Conventional Database vs Redshift Usage Cases 11.2 Data Processing R vs SQL R vs Spark "],["collaboration.html", "Chapter 12 Parallel Collaboration 12.1 Social Contract 12.2 Code Reviews 12.3 Remote 12.4 Additional Resources 12.5 Loose Notes", " Chapter 12 Parallel Collaboration 12.1 Social Contract Issues Organized Commits &amp; Coherent Diffs Branch &amp; Merge Strategy 12.2 Code Reviews Daily Reviews of PRs Periodic Reviews of Files 12.3 Remote Headset &amp; sharing screens 12.4 Additional Resources (Colin Gillespie 2017), particularly the Efficient collaboration chapter. (Brian Fitzpatrick 2012) 12.5 Loose Notes 12.5.1 GitHub Review your diffs before committing. Check for things like accidental deletions and debugging code that should be deleted (or at least commented out). Keep chatter to a minimum, especially on projects with 3+ people being notified of every issue post. When encountering a problem, Take as much ownership as reasonable. Dont merely report theres an error. If you cant figure it out, ask the question and describe it well. what low-level file &amp; line of code threw the error. how you have tried to solve it. If theres a questionable line/chunk of code, trace its origin. Not for the sake of pointing the finger at someone, but for the sake of understanding its origin and history. 12.5.2 Common Code This involves code/files that multiple people use, like the REDCap arches. Run the file before committing it. Run common downstream files too (e.g., if you make a change to the arch, also run the funnel). If an upstream variable name must change, alert people. Post a GitHub issue to announce it. Tell everyone, and search the repo (ctrl+shift+f in RStudio) to alert specific people who might be affected. H References "],["document.html", "Chapter 13 Documentation 13.1 Team-wide 13.2 Project-specific 13.3 Dataset Origin &amp; Structure 13.4 Issues &amp; Tasks 13.5 Flow Diagrams 13.6 Setting up new machine", " Chapter 13 Documentation 13.1 Team-wide 13.2 Project-specific 13.3 Dataset Origin &amp; Structure 13.4 Issues &amp; Tasks 13.4.1 GitHub Issue Template If you are going to open up a repo/package to the public, consider creating a template for GitHub Issues thats tailored to the repos unique characteristics. Furthermore, invite feedback from your userbase to improve the template. Here is our appeal in REDCapR that produced the Unexpected Behavior issue template: @nutterb @haozhu233, @rparrish, @sybandrew, and any one else, if you have time, please look at the new issue template that is customized for REDCapR/redcapAPI. Id appreciate any feedback that could improve the experience for someone encountering a problem. Id like something to (a) make it easier for the user to provide useful information with less effort and (b) make it easier for us to help more accurately with fewer back-and-forths. And if the template happens to help the user identify and solve the problem without creating the issue then I think everyone is happier too. I think the issue should leverage the Troubleshooter that 10+ people have contributed to. It should help locate the problematic area more quickly. @haozhu233, it seems youve liked the template in kableExtra. REDCapR is different in the sense its more difficult to provide a minimal &amp; self-contained example to reproduce the problem. But with your experience with so many users and issues, Id love any advice. @nutterb, Id like this template to be helpful to redcapAPI too. There are only three quick find-and-replace occurrences of REDCapR -&gt; redcapAPI. And those were mostly to distinguish the R package from REDCap itself. 13.5 Flow Diagrams 13.6 Setting up new machine (example) "],["style.html", "Chapter 14 Style Guide 14.1 Readability 14.2 Datasets 14.3 Categorical Variables 14.4 Dates 14.5 Naming 14.6 Whitespace 14.7 Database 14.8 ggplot2", " Chapter 14 Style Guide Using a consistent style across your projects can increase the overhead as your data science team discusses options, decides on a good choice, and develops in compliant code. But like in most themes in this document, the cost is worth the effort. Unforced code errors are reduced when code is consistent, because mistake-prone styles are more apparent. For the most part, our team follows the tidyverse style. Here are some additional conventions we attempt to follow. Many of these were inspired by (Francesco Balena 2005). 14.1 Readability 14.1.1 Number The word number is ambiguous, especially in data science. Try for these more specific terms: count: the number of discrete objects or events, such as visit_count, pt_count, dx_count. id: a value that uniquely identifies an entity that doesnt change over time, such as pt_id, clinic_id, client_id, index: a 1-based sequence thats typically temporary, but unique within the dataset. For instance, pt_index 195 in Tuesdays dataset is like;y a different person than pt_index 195 on Wednesday. On any given day, there is only one value of 195. tag: it is persistent across time like id, but typically created by the analysts and send to the research team. See the snippet in the appendix for an example. tally: a running count duration: a length of time. Specify the units if it not self-evident. physical and statistical quantities like depth, length, mass, mean, and sum. 14.1.2 Abbreviations Try to avoid abbreviations. Different people tend to shorten words differently; this variability increases the chance that people reference the wrong variable. At very least, it wastes time trying to remember if subject_number, subject_num, or subject_no was used. The Consistency section describes how this can reduce errors and increase efficiency. However, some terms are too long to reasonably use without shortening. We make some exceptions, such as the following scenarios: humans commonly use the term orally. For instance, people tend to say OR instead of operating room. your team has agreed on set list of abbreviations. The list for our CDW team includes: appt (not apt), cdw, cpt, drg (stands for diagnosis-related group), dx, hx, icd pt, and vr (vital records). When your team choose terms (e.g., apt vs appt), try to use a standard vocabulary, such as MedTerms Medical Dictionary. 14.2 Datasets 14.2.1 Filtering Rows Removing datasets rows is an important operation that is a frequent source of sneaky errors. These practices have hopefully reduced our mistakes and improved maintainability. 14.2.1.1 Dropping rows with missing values tidyr::drop_na() drops rows with a missing value in a specific column. # Good ds %&gt;% tidyr::drop_na(dob) is cleaner to read and write than these two styles. In particular, its easy to forget/overlook a !. # Worse ds %&gt;% dplyr::filter(!is.na(dob)) # Worst ds[!is.na(ds$dob), ] 14.2.1.2 Mimic number line When ordering quantities, go smallest-to-largest as you type left-to-right. 14.2.1.3 Searchable verbs Youve probably asked in frustration, Where did all the rows go? I had 1,000 in the middle of the file, but now have only 782. Try to keep a consistent tools for filtering, so you can ctrl+f only a handful of terms, such as filter, drop_na, and summarize/summarise. Its more difficult to highlight the When using the base Rs filtering style, (e.g., ds &lt;- ds[4 &lt;= ds$count, ]). 14.2.2 Dont attach As the Google Stylesheet says, The possibilities for creating errors when using attach() are numerous. 14.3 Categorical Variables There are lots of names for a categorical variable across the different disciplines (e.g., factor, categorical, ). 14.3.1 Explicit Missing Values Define a level like \"unknown\" so the data manipulation doesnt have to test for both is.na(x) and x==\"unknown\". The explicit labels also helps when included in a statistical procedure and coefficient table. 14.3.2 Granularity Sometimes it helps to represent the values differently, say a granular and a coarse way. We say cut7 or cut3 to denotes the number of levels; this is related to base::cut(). unknown and other are frequently levels, and they count toward the quantity. # Inside a dplyr::mutate() clause education_cut7 = dplyr::recode( education_cut7, &quot;No Highschool Degree / GED&quot; = &quot;no diploma&quot;, &quot;High School Degree / GED&quot; = &quot;diploma&quot;, &quot;Some College&quot; = &quot;some college&quot;, &quot;Associate&#39;s Degree&quot; = &quot;associate&quot;, &quot;Bachelor&#39;s Degree&quot; = &quot;bachelor&quot;, &quot;Post-graduate degree&quot; = &quot;post-grad&quot;, &quot;Unknown&quot; = &quot;unknown&quot;, .missing = &quot;unknown&quot;, ), education_cut3 = dplyr::recode( education_cut7, &quot;no diploma&quot; = &quot;no bachelor&quot;, &quot;diploma&quot; = &quot;no bachelor&quot;, &quot;some college&quot; = &quot;no bachelor&quot;, &quot;associate&quot; = &quot;no bachelor&quot;, &quot;bachelor&quot; = &quot;bachelor&quot;, &quot;post-grad&quot; = &quot;bachelor&quot;, &quot;unknown&quot; = &quot;unknown&quot;, ), education_cut7 = factor(education_cut7, levels=c( &quot;no diploma&quot;, &quot;diploma&quot;, &quot;some college&quot;, &quot;associate&quot;, &quot;bachelor&quot;, &quot;post-grad&quot;, &quot;unknown&quot; )), education_cut3 = factor(education_cut3, levels=c( &quot;no bachelor&quot;, &quot;bachelor&quot;, &quot;unknown&quot; )) 14.4 Dates yob is an integer, but mob and wob are dates. Typically months are collapsed to the 15th day and weeks are collapsed to Monday, which are the defaults of OuhscMunge::clump_month_date() and OuhscMunge::clump_week_date(). These help obfuscate the real value, if PHI is involved. Months are centered because the midpoint is usually a better representation of the months performance than the months initial day. birth_month_index can be values 1 through 12, while birth_month (or commonly mob) contains the year (e.g., 2014-07-15). Dont use the minus operator (i.e., -). See Defensive Date Arithmetic. 14.5 Naming 14.5.1 Variables This builds upon the tidyverse style guide for objects. 14.5.1.1 Characters Use lowercase letters, using underscores to separate words. Avoid uppercase letters and periods. 14.5.1.2 Lexigraphical Sorting For variables including multiple nouns or adjectives, use lexigraphical sorting. The bigger term goes first. # Good: parent_name_last parent_name_first parent_dob kid_name_last kid_name_first kid_dob # Bad: last_name_parent first_name_parent dob_parent last_name_kid first_name_kid dob_kid Large datasets with multiple questionaries (each with multiple subsections) are much more managable when the variables follow a lexigraphical order. SELECT asq3_medical_problems_01 ,asq3_medical_problems_02 ,asq3_medical_problems_03 ,asq3_behavior_concerns_01 ,asq3_behavior_concerns_02 ,asq3_behavior_concerns_03 ,asq3_worry_01 ,asq3_worry_02 ,asq3_worry_03 ,wai_01_steps_beneficial ,wai_02_hv_useful ,wai_03_parent_likes_me ,wai_04_hv_doubts ,hri_01_client_input ,hri_02_problems_discussed ,hri_03_addressing_problems_clarity ,hri_04_goals_discussed FROM miechv.gpav_3 14.5.2 Files and Folders Naming filers and their folders/directories follows the style of naming variables, with one small difference: separate words with dashes (i.e., -), not underscores (i.e., _). Infrequently, well use a dash if it helps identify a noun (that already contains an underscore). For instance, if theres a table called patient_demographics, we might call the files patient_demographics-truncate.sql and patient_demographics-insert.sql. Using lower case is important because some databases and operating systems are case-sensitive, and some are case-insensitive. To promote portability, keep everything lowercase. Again, file and folder names should contain only (a) lowercase letters, (b) digits, (c) dashes, and (d) an occassional dash. Do not include spaces, uppercase letters, and especially punctuation, such as : or (. 14.5.3 Datasets data.frames are used in almost every analysis file, so we put extra effort formulating conventions that are informative and consistent. Naming datasets follows the style of naming variables, with a few additional features. In the R world, dataset is typically a synonym of data.frame a rectangular structure of rows and columns. The database equivalent of a conventional table. Note that dataset means a collections of tables in the the .NET world, and a collection of (not-necessarily-rectangular) files in Dataverse.6 14.5.3.1 Prefix with ds_ and d_ Datasets are handled so differently than other variables that we find its easier to identify its type and scope. The prefix ds_ indicates the dataset is available to the entire file, while d_ indicates the scope is localized to a function. count_elements &lt;- function (d) { nrow(d) * ncol(d) } ds &lt;- mtcars count_elements(d = ds) 14.5.3.2 Express the grain The grain of a dataset describes what each row represents, which is a similar idea to the statisticians concept of unit of analysis. Essentially it the the most granular entity described. Many miscommunications and silly mistakes are avoided when your team is disciplined enough to define a tidy dataset with a clear grain. ds_student # One row per student ds_teacher # One row per teacher ds_course # One row per course ds_course_student # One row per student-course combination ds_pt # One row per patient ds_pt_visit # One row per patient-visit combination ds_visit # Same as above, since it&#39;s clear a visit is connected w/ a pt For more insight into grains, Ralph Kimball writes In debugging literally thousands of dimensional designs from my students over the years, I have found that the most frequent design error by far is not declaring the grain of the fact table at the beginning of the design process. If the grain isnt clearly defined, the whole design rests on quicksand. Discussions about candidate dimensions go around in circles, and rogue facts that introduce application errors sneak into the design.  I hope youve noticed some powerful effects from declaring the grain. First, you can visualize the dimensionality of the doctor bill line item very precisely, and you can therefore confidently examine your data sources, deciding whether or not a dimension can be attached to this data. For example, you probably would exclude treatment outcome from this example because most medical billing data doesnt tie to any notion of outcome. 14.5.3.3 Singular table names If you adopt the style that the tables name reflects the grain, this is a corollary. If the grain is singular like one row per client or one row per building, the name should be ds_client and ds_building (not ds_clients and ds_buildings). If these datasets are saved to a database, the tables are called client and building. Table names are plural when the grain is plural. If a record has field like client_id, date_birth, date_graduation and date_death, I suggest called the table client_milestones (because a single row contains three milestones). This Stack Overflow post presents a variety of opinions and justifications when adopting a singular or plural naming scheme. I think its acceptable if the R vectors follow a different style than R data.frames. For instance, a vector can have a plural name even though each element is singular (e.g., client_ids &lt;- c(10, 24, 25)). 14.5.3.4 Use ds when definition is clear Many times an ellis file handles with only one incoming csv and outgoing dataset, and the grain is obvious typically because the ellis filename clearly states the grain. 14.5.3.5 Use an adjective after the grain, if necessary If the same R file is manipulating two datasets with the same grain, qualify their differences after the grain, such as ds_client_all and ds_client_michigan. Adjectives commonly indicate that one dataset is a subset of another. An occasional limitation with our naming scheme is that the difficult to distinguish the grain from the adjective. For instance, is the grain of ds_student_enroll either (a) every instance of a student enrollment (i.e., student and enroll both describe the grain) or (b) the subset of students who enrolled (i.e., student is the grain and enroll is the adjective)? Its not clear without examine the code, comments, or documentation. If someone has a proposed solution, we would love to hear it. So far, weve been reluctant to decorate the variable name more, such as ds_grain_client_adj_enroll. 14.5.3.6 Define the dataset when in doubt If its potentially unclear to a new reader, use a comment immediately before the datasets initial use. # `ds_client_enroll`: # grain: one row per client # subset: only clients who have successfully enrolled are included # source: the `client` database table, where `enroll_count` is 1+. ds_client_enroll &lt;- ... 14.5.4 Semantic sorting Put the biggest term on the left side of the variable. 14.6 Whitespace Although execution is rarely affected by whitespace in R and SQL files, be consistent and minimalistic. One benefit is that Git diffs wont show unnecessary churn. When a line of code lights up in a diff, its nice when reflect a real change, and not something trivial like tabs were converted to spaces, or trailing spaces were added or deleted. Some of these guidelines are handled automatically by modern IDEs, if you configure the correct settings. Tabs should be replaced by spaces. Most modern IDEs have an option to do this for you automatically. (RStudio calls this Insert spaces for tabs.) Indentions should be replaced by a consistent number of spaces, depending on the file type. R: 2 spaces SQL: 2 spaces Python: 4 spaces Each file should end with a blank line. (RStudio calls this Ensure that source files end with newline.) Remove spaces and tabs at the end of lines. (RStudio calls this Strip trailing horizontal whitespace when saving.) 14.7 Database GitLabs data team has a good style guide for databases and sql thats fairly consistent with our style. Some important similarities and differences are Favor CTEs The name of the primary key should typically contain the table. In the employee table, the key should be employee_id, not id. 14.8 ggplot2 The expressiveness of ggplot2 allows someone to quickly develop precise scientific graphics. One graph can be specified in many equivalent styles, which increases the opportunity for confusion. We formalized much of this style while writing a textbook for introductory statistics (Lise DeShea (2015)); the 200+ graphs and their code is publicly available. There are a few additional ggplot2 tips in the tidyverse style guide. 14.8.1 Order of commands ggplot2 is essentially a collection of functions combined with the + operator. Publication graphs common require at least 20 functions, which means the functions can sometimes be redundant or step on each other toes. The family of functions should follow a consistent order ideally starting with the more important structural functions and ending with the cosmetic functions. Our preference is: ggplot() is the primary function to specify the default dataset and aesthetic mappings. Many arguments can be passed to aes(), and we prefer to follow an order consistent with the scale_*() order below. geom_*() and annotate() creates the geometric elements that represent the data. Unlike most categories in this list, the order matters. Geoms specified first are drawn first, and therefore can be obscured by subsequent geoms. scale_*() describes how a dimension of data (specified in aes()) is translated into a visual element. We specify the dimensions in descending order of (typical) importance: x, y, group, color, fill, size, radius, alpha, shape, linetype. coord_*() facet_*() and label_*() guides() theme() (call the big themes like theme_minimal() before overriding the details like theme(panel.grid = element_line(color = \"gray\"))) labs() 14.8.2 Gotchas Here are some common mistakes we see not-so-infrequently (even sometimes in our own code). 14.8.2.1 Zooming Call coord_*() to restrict the plotted x/y values, not scale_*() or lims()/xlim()/ylim(). coord_*() zooms in on the axes, so extreme values essentially fall off the page; in contrast, the latter three functions essentially remove the values from the dataset. The distinction does not matter for a simple bivariate scatterplot, but likely will mislead you and the viewer in two common scenarios. First, a call to geom_smooth() (e.g., that overlays a loess regression curve) ignore the extreme values entirely; consequently the summary location will be misplaced and its standard errors too tight. Second, when a line graph or spaghetti plots contains an extreme value, it is sometimes desirable to zoom in on the the primary area of activity; when calling coord_*(), the trend line will leave and return to the plotting panel (which implies points exist which do not fit the page), yet when calling the others, the trend line will appear interrupted, as if the extreme point is a missing value. 14.8.2.2 Seed When jittering, set the seed in the declare-globals chunk so that rerunning the report wont create a (slightly) different png. The insignificantly different pngs will consume extra space in the Git repository. Also, the GitHub diff will show the difference between png versions, which requires extra cognitive load to determine if the difference is due solely to jittering, or if something really changed in the analysis. H References "],["publication.html", "Chapter 15 Publishing Results 15.1 To Other Analysts 15.2 To Researchers &amp; Content Experts 15.3 To Technical-Phobic Audiences", " Chapter 15 Publishing Results 15.1 To Other Analysts 15.2 To Researchers &amp; Content Experts 15.3 To Technical-Phobic Audiences "],["testing-and-validation.html", "Chapter 16 Testing, Validation, &amp; Defensive Programming 16.1 Testing Functions 16.2 Defensive Programming 16.3 Validator", " Chapter 16 Testing, Validation, &amp; Defensive Programming 16.1 Testing Functions 16.2 Defensive Programming Throwing errors 16.3 Validator Benefits for Analysts Benefits for Data Collectors "],["troubleshooting.html", "Chapter 17 Troubleshooting and Debugging 17.1 Finding Help 17.2 Debugging", " Chapter 17 Troubleshooting and Debugging 17.1 Finding Help Within your group (eg, Thomas and REDCap questions) Within your university (eg, SCUG) Outside (eg, Stack Overflow; GitHub issues) 17.2 Debugging traceback(), browser(), etc "],["workstation.html", "Chapter 18 Workstation 18.1 Required Installation 18.2 Recommended Installation 18.3 Optional Installation 18.4 Asset Locations 18.5 Administrator Installation 18.6 Installation Troubleshooting 18.7 Ubuntu Installation 18.8 Retired Tools", " Chapter 18 Workstation We believe it is important to keep software updated and consistent across workstations in your project. This material was originally posted at https://github.com/OuhscBbmc/RedcapExamplesAndPatterns/blob/master/DocumentationGlobal/ResourcesInstallation.md. It should help establish our tools on a new development computer. 18.1 Required Installation The installation order matters. 18.1.1 R R is the centerpiece of the analysis. Every few months, youll need to download the most recent version. {added Sept 2012} 18.1.2 RStudio RStudio Desktop is the IDE (integrated design interface) that youll use to interact with R, GitHub, Markdown, and LaTeX. Updates can be checked easily through the menus Help -&gt; Check for updates. 18.1.3 Installing R Packages Dozens of R Packages will need to be installed. Choose between one of the two related scripts. It will install from our list of packages that our data analysts typically need. The script installs a package only if its not already installed; also an existing package is updated if a newer version is available. Create a new personal library if it prompts you. It takes at least fifteen minutes, so start it before you go to lunch. The list of packages will evolve over time, so please help keep the list updated. To install our frequently-used packages, run the following snippet. The first lines installs an important package. The second line calls the online Gist, which defines the package_janitor_remote() function. The final line calls the function (and passes a specific CSV of packages)7. if (!base::requireNamespace(&quot;devtools&quot;)) utils::install.packages(&quot;devtools&quot;) devtools::source_gist(&quot;2c5e7459b88ec28b9e8fa0c695b15ee3&quot;, filename=&quot;package-janitor-bbmc.R&quot;) package_janitor_remote( &quot;https://raw.githubusercontent.com/OuhscBbmc/RedcapExamplesAndPatterns/master/utility/package-dependency-list.csv&quot; ) Some of our projects require specialized packages that are not typically used. In these cases, we will develop the git repo as an R package that includes a proper DESCRIPTION file. See RAnalysisSkeleton for an example. When the project is opened in RStudio, update_packages_addin() in OuhscMunge will find the DESCRIPTION file and install the package dependencies. if( !base::requireNamespace(&quot;remotes&quot; ) ) utils::install.packages(&quot;remotes&quot;) if( !base::requireNamespace(&quot;OuhscMunge&quot;) ) remotes::install_github(&quot;OuhscBbmc/OuhscMunge&quot;) OuhscMunge::update_packages_addin() 18.1.4 Updating R Packages Several R packages will need to be updated every weeks. Unless you have been told not to (because it would break something -this is rare), periodically update the packages by executing the following code update.packages(checkBuilt=TRUE). 18.1.5 GitHub GitHub registration is necessary to push modified files to the repository. First, register a free user account, then tell the repository owner your exact username, and they will add you as a collaborator (e.g., to https://github.com/OuhscBbmc/RedcapExamplesAndPatterns). 18.1.6 GitHub Desktop GitHub Desktop does the basic tasks a little easier than the git features built into RStudio. This client is available for Windows and macOS. (Occasionally, someone might need to use git from the command line to fix problems, but this is not required to start.) 18.1.7 R Tools R Tools for Windows is necessary to build some packages in development hosted on GitHub. If running Linux, the components of R Tools are likely already installed on your machine. {added Feb 2017} 18.2 Recommended Installation The installation order does not matter. 18.2.1 ODBC Driver ODBC Driver for SQL Server is for connecting to the token server, if your institution is using one. As of this writing, version 17 is the most recent driver version. See if a new one exists. {updated Apr 2018} 18.2.2 Notepad++ Notepad++ is a text editor that allows you look at the raw text files, such as code and CSVs. For CSVs and other data files, it is helpful when troubleshooting (instead of looking at the file through Excel, which masks &amp; causes some issues). {added Sept 2012} 18.2.3 Azure Data Studio Azure Data Studio (ADS) is now recommended by Microsoft and others for analysts (and some other roles) ahead of SQL Server Management Studio. Note: here are some non-default changes that facilitate our workflow. Settings | Text Editor | Tab Size: 2 {\"editor.tabSize\": 2} Settings | Text Editor | Detect Indentation: uncheck {\"editor.detectIndentation\": false} Settings | Text Editor | Insert Final Newlines: check {\"files.insertFinalNewline\": true} Settings | Text Editor | Trim Final Newlines: check {\"files.trimFinalNewlines\": true} Settings | Text Editor | Trim Trailing Whitespace: check {\"files.trimTrailingWhitespace\": true} Data | Sql | Show Connection Info In Title: uncheck {\"sql.showConnectionInfoInTitle\": false} Data | Sql | Copy Include Headers: check {\"sql.copyIncludeHeaders\": true} { &quot;workbench.enablePreviewFeatures&quot;: true, &quot;workbench.colorTheme&quot;: &quot;Default Dark Azure Data Studio&quot;, &quot;editor.tabSize&quot;: 2, &quot;editor.detectIndentation&quot;: false, &quot;files.insertFinalNewline&quot;: true, &quot;files.trimFinalNewlines&quot;: true, &quot;files.trimTrailingWhitespace&quot;: true, &quot;queryEditor.showConnectionInfoInTitle&quot;: false, &quot;queryEditor.results.copyIncludeHeaders&quot;: true } 18.2.4 Visual Studio Code Visual Studio Code is an extensible text editor that runs on Windows and Linux, similar to Atom (described above). Its much lighter than the full Visual Studio. Like Atom, it supports browsing through the directory structure, replacing across files, interaction with git, and previewing markdown. Currently, it supports searching CSVs better than Atom. Productivity is enhanced with the following extensions: {added Dec 2018} Excel Viewer isnt a good name, but Ive liked the capability. It displays CSVs and other files in a grid. {added Dec 2018} Rainbow CSV color codes the columns, but still allows you to see and edit the raw plain-text file. {added Dec 2018} SQL Server allows you to execute against a database, and view/copy/save the grid results. It doesnt replicate all SSMS features, but is nice as your scanning through files. {added Dec 2018} Code Spell Checker produces green squiggly lines under words not in its dictionary. You can add words to your user dictionary, or a project dictionary. Markdown All in One has some useful markdown capabilities, such as converting the file to html. Markdown PDF has some useful markdown capbilities, such as converting the file to pdf. markdownlint has linting and style checking. These extensions can be installed by command line. code --list-extensions code --install-extension GrapeCity.gc-excelviewer code --install-extension mechatroner.rainbow-csv code --install-extension ms-mssql.mssql code --install-extension streetsidesoftware.code-spell-checker code --install-extension yzhang.markdown-all-in-one code --install-extension yzane.markdown-pdf code --install-extension DavidAnson.vscode-markdownlint Note: here are some non-default changes that facilitate our workflow. Either copy this configuration into settings.json, or manually specify the options with the settings editor. { &quot;diffEditor.ignoreTrimWhitespace&quot;: false, &quot;diffEditor.maxComputationTime&quot;: 0, &quot;editor.acceptSuggestionOnEnter&quot;: &quot;off&quot;, &quot;editor.renderWhitespace&quot;: &quot;all&quot;, &quot;explorer.confirmDragAndDrop&quot;: false, &quot;files.associations&quot;: { &quot;*.Rmd&quot;: &quot;markdown&quot; }, &quot;files.trimFinalNewlines&quot;: true, &quot;files.trimTrailingWhitespace&quot;: true, &quot;git.autofetch&quot;: true, &quot;git.confirmSync&quot;: false, &quot;window.zoomLevel&quot;: 2, &quot;markdown.extension.orderedList.autoRenumber&quot;: false, &quot;markdown.extension.orderedList.marker&quot;: &quot;one&quot;, &quot;markdownlint.config&quot;: { &quot;MD003&quot;: { &quot;style&quot;: &quot;setext_with_atx&quot; }, &quot;MD007&quot;: { &quot;indent&quot;: 2 }, &quot;MD022&quot;: { &quot;lines_above&quot;: 1, &quot;lines_below&quot;: 1 }, &quot;MD024&quot;: { &quot;siblings_only&quot;: true }, &quot;no-bare-urls&quot;: false, &quot;no-inline-html&quot;: { &quot;allowed_elements&quot;: [ &quot;mermaid&quot;, &quot;a&quot;, &quot;img&quot; ] } } } Settings | Extensions |Markdown All in One | Ordered List | Auto Renumber: false {\"markdown.extension.orderedList.autoRenumber\": false} Settings | Extensions |Markdown All in One | Ordered List | Marker: one {\"markdown.extension.orderedList.marker\": \"one\"} 18.3 Optional Installation The installation order does not matter. 18.3.1 Git Git command-line utility enables some advanced operations that the GitHub client doesnt support. Use the default installation options, except these preferences of ours: 1. Nano is the default text editor. 18.3.2 LibreOffice Calc LibreOffice Calc is an alternative to Excel. Unlike it Excel, it doesnt guess much with formatting (which usually mess up things, especially dates). 18.3.3 pandoc pandoc converts files from one markup format into another. {added Sept 2012} 18.3.4 Python Python is used by some analysts. The prototypical installation involves two options. Anaconda, which include Jupyter Notebooks, Jupyter Lab, and Spyder. Plus two programs that are already on this list: RStudio and VS Code. In Windows, open Anaconda Prompt with administrative privileges conda install numpy pandas scikit-learn matplotlib Standard Python, while installing packages through pip3 in the terminal. If the pip3 command is unrecognized because its missing from the OS path variable, an alternative is py -3 -mpip install pysftp; this calls pip through the py command which is sometimes in the path variable after installation. 18.4 Asset Locations GitHub repository https://github.com/OuhscBbmc/RedcapExamplesAndPatterns {added Sept 2012} File server directory Ask your PI. For Peds, its typically on the S drive. SQL Server Database Ask Thomas, Will or David REDCap database Ask Thomas, Will or David. It is a http url, and were trying not to publicize its value. ODBC UserDsn The name depends on your specific repository, and SQL Server database. Ask Thomas, Will or David for how to set it up. 18.5 Administrator Installation These programs are useful to people administrating servers, but not to the typical data scientist. 18.5.1 MySQL Workbench MySQL Workbench is useful occasionally for REDCap admins. 18.5.2 Postman Postman Native App is useful for developing with the API and has replaced the Chrome app. If thats not possible, a web client is available as well. With either program, do not access any PHI. 18.5.3 SQL Server Management Studio (SSMS) SQL Server Management Studio has been replaced by Azure Data Studio for some roles, but is still recommended for database administrators. It is an easy way to access the database and write queries (and transfer the SQL to an R file). Its not required for the REDCap API, but its usually necessary when integrating REDCap with other databases. Note: here are some non-default changes that facilitate our workflow. The first two help when we save the database structure (not data) on GitHub, so we can easily track/monitor the structural changes over time. The tabs options keeps things consistent between editors. In the SSMS Tools | Options dialog box: SQL Server Object Explorer | Scripting | Include descriptive headers: False SQL Server Object Explorer | Scripting | Script extended properties: False Text Editor | All Languages | Tabs | Tab size: 2 Text Editor | All Languages | Tabs | Indent size: 2 Text Editor | All Languages | Tabs | Insert Spaces: true These dont affect the saved files, but make life easier. The first makes the result font bigger. Environment | Fonts and Colors | Show settings for: Grid Results | Size: 10 Query Results | SQL Server | Results to Grid | Include column headers when copying or saving the results: false` Designers | Table and Database Designers | Prevent saving changes that require table-recreation: false Text Editor | Editor Tab and Status Bar | Tab Text | Include Server Name: false Text Editor | Editor Tab and Status Bar | Tab Text | Include Database Name: false Text Editor | Editor Tab and Status Bar | Tab Text | Include Login Name: false Text Editor | All Languages | General | Line Numbers: true A dark theme is unofficially supported in SSMS 18. If you have write privileges in the Program Files directory, a quick modification to a config file will reduce eye strain. This change also prevents your screen from flashing dark-to-light-to-dark, which broadcasts your wandering attention during a Zoom meeting. For more details, see setting-up-dev-machine.md (in a private repo thats restricted to BBMC members). 18.5.4 WinSCP WinSCP is a GUI for SCP and SFTP file transfer using SSH keys. The tool is occassionally useful for admins when collaborating with other institutions or other OU computing resources. Because PHI can accidentally be sent to collaborators without a DUA, we recommend that WinSCP be installed only informed administrators. The typical data scientist on our teams does not need this tool. 18.6 Installation Troubleshooting Git: Will Beasley resorted to this workaround Sept 2012: http://stackoverflow.com/questions/3431361/git-for-windows-the-program-cant-start-because-libiconv2-dll-is-missing. And then he copied the following four files from D:/Program Files/msysgit/mingw/bin/ to D:/Program Files/msysgit/bin/: (1) libiconv2.dll, (2) libcurl-4.dll, (3) libcrypto.dll, and (4) libssl.dll. (If you install to the default location, youll move instead from C:/msysgit/mingw/bin/ to C:/msysgit/bin/) {added Sept 2012} Git: On a different computer, Will Beasley couldnt get RStudio to recognize msysGit, so installed the Full installer for official Git for Windows 1.7.11 from (http://code.google.com/p/msysgit/downloads/list) and switched the Git Path in the RStudio Options. {added Sept 2012} RStudio If something goes wrong with RStudio, re-installing might not fix the issue, because your personal preferences arent erased. To be safe, you can be thorough and delete the equivalent of C:\\Users\\wibeasley\\AppData\\Local\\RStudio-Desktop\\. The options settings are stored (and can be manipulated) in this extentionless text file: C:\\Users\\wibeasley\\AppData\\Local\\RStudio-Desktop\\monitored\\user-settings\\user-settings. {added Sept 2012} 18.7 Ubuntu Installation Ubuntu desktop 19.04 follows these instructions for the R and RStudio and required these debian packages to be installed before the R packages. The --yes option avoids manual confirmation for each line, so you can copy &amp; paste this into the terminal. Add the following to the sources with sudo nano /etc/apt/sources.list. The eoan version may be updated; The metrocast part could be modified too from this list. I found it worked better for a new Ubuntu release than cloud.r-project.org. # For R 4.0 deb https://cloud.r-project.org/bin/linux/ubuntu focal-cran40/ deb http://mirror.genesisadaptive.com/ubuntu/ focal-backports main restricted universe # For R 3.5 &amp; #.6 deb https://cloud.r-project/bin/linux/ubuntu/ eoan-cran35/ deb-src https://cloud.r-project/bin/linux/ubuntu/ eoan-cran35/ deb http://mirror.metrocast.net/ubuntu/ eoan-backports main restricted universe This next block can be copied and pasted (ctrl-shift-v) into the console entirely. Or lines can be pasted individual (without the ( function install-packages { line, or the last three lines). ( function install-packages { ### Add the key, update the list, then install base R. sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 sudo apt-get update sudo apt-get install r-base r-base-dev ### Git sudo apt-get install git-core git config --global user.email &quot;wibeasley@hotmail.com&quot; git config --global user.name &quot;Will Beasley&quot; git config --global credential.helper &#39;cache --timeout=3600000&#39; ### Ubuntu &amp; Bioconductor packages that are indirectly needed for packages and BBMC scripts # Supports the `locate` command in bash sudo apt-get install mlocate # The genefilter package is needed for &#39;modeest&#39; on CRAN. # No longer a modeest dependency: Rscript -e &#39;BiocManager::install(&quot;genefilter&quot;)&#39; ### CRAN packages that are also on the Ubuntu repositories # The &#39;xml2&#39; package; https://CRAN.R-project.org/package=xml2 sudo apt-get --yes install libxml2-dev r-cran-xml # The &#39;curl&#39; package, and others; https://CRAN.R-project.org/package=curl sudo apt-get --yes install libssl-dev libcurl4-openssl-dev # The &#39;udunits2&#39; package: https://cran.r-project.org/web/packages/udunits2/index.html sudo apt-get --yes install libudunits2-dev # The &#39;odbc&#39; package: https://github.com/r-dbi/odbc#linux---debian--ubuntu sudo apt-get --yes install unixodbc-dev tdsodbc odbc-postgresql libsqliteodbc # The &#39;rgl&#39; package; https://stackoverflow.com/a/39952771/1082435 sudo apt-get --yes install libcgal-dev libglu1-mesa-dev # The &#39;magick&#39; package; https://docs.ropensci.org/magick/articles/intro.html#build-from-source sudo apt-get --yes install &#39;libmagick++-dev&#39; # To compress vignettes when building a package; https://kalimu.github.io/post/checklist-for-r-package-submission-to-cran/ sudo apt-get --yes install qpdf # The &#39;pdftools&#39; and &#39;Rpoppler&#39; packages, which involve PDFs sudo apt-get --yes install libpoppler-cpp-dev libpoppler-glib-dev # The &#39;sys&#39; package sudo apt-get --yes install libapparmor-dev # The &#39;sf&#39; and other spatial packages: https://github.com/r-spatial/sf#ubuntu; https://github.com/r-spatial/sf/pull/1208 sudo apt-get --yes install libudunits2-dev libgdal-dev libgeos-dev libproj-dev libgeos++-dev # For Cairo package, a dependency of Shiny &amp; plotly; https://gykovacsblog.wordpress.com/2017/05/15/installing-cairo-for-r-on-ubuntu-17-04/ sudo apt-get --yes install libcairo2-dev # &#39;rJava&#39; and others; https://www.r-bloggers.com/installing-rjava-on-ubuntu/ sudo apt-get --yes install default-jre default-jdk sudo R CMD javareconf sudo apt-get --yes install r-cran-rjava # For reprex and sometimes ssh keys; https://github.com/tidyverse/reprex#installation sudo apt-get --yes install xclip # gifski -apparently the rust compiler is necessary sudo apt-get --yes install cargo # For databases sudo apt-get --yes install sqlite sqliteman sudo apt-get --yes install postgresql postgresql-contrib pgadmin3 # pandoc sudo apt-get --yes install pandoc # For checking packages. Avoid `/usr/bin/texi2dvi: not found` warning. sudo apt-get install texinfo } install-packages ) The version of pandoc from the Ubuntu repository may be delayed. To install the latest version, download the .deb file then install from the same directory. Finally, verify the version. sudo dpkg -i pandoc-* pandoc -v The Postman native app for Ubuntu is installed through snap, which is updated daily automatically. snap install postman 18.8 Retired Tools We previously installed the software below. Most have been replaced by software above thats either newer or more natural to use. GitLab SSL Certificate isnt software, but still needs to be configured. Talk to Will for the server URL and the *.cer file. Save the file in something like ~/keys/ca-bundle-gitlab.cer Associate the file with git config --global http.sslCAInfo ...path.../ca-bundle-gitlab.cer (but replace ...path...). MiKTeX is necessary only if youre using knitr or Sweave to produce LaTeX files (and not just markdown files). Its a huge, slow installation that can take an hour or two. {added Sept 2012} Pulse Secure is VPN client for OUHSC researchers. Its not required for the REDCap API, but its usually necessary to communicate with other campus data sources. msysGit allows RStudio to track changes and commit &amp; sync them to the GitHub server. Connect RStudio to GitHub repository. I moved this to optional (Oct 14, 2012) because the GitHub client (see above) does almost everything that the RStudio plugin does; and it does it a little better and a little more robust; and its installation hasnt given me problems. {added Oct 2012} Starting in the top right of RStudio, click: Project -&gt; New Project -&gt; Create Project from Version Control -&gt; Git {added Sept 2012} An example of a repository URL is https://github.com/OuhscBbmc/RedcapExamplesAndPatterns. Specify a location to save (a copy of) the project on your local computer. {added Sept 2012} CSVed is a lightweight program for viewing data files. It fits somewhere between a text editor and Excel. SourceTree is a rich client that has many more features than the GitHub client. I dont recommend it for beginners, since it has more ways to mess up things. But for developers, it nicely fills a spot in between the GitHub client and command-line operations. The branching visualization is really nice too. Unfortunately and ironically, it doesnt currently support Linux. {added Sept 2014}. git-cola is probably the best GUI for Git supported on Linux. Its available through the official Ubuntu repositories with apt-get (also see this). The branch visualization features are in a different, but related program, git dag. {added Sept 2014} GitHub for Eclipse is something I discourage for a beginner, and I strongly recommend you start with RStudio (and GitHub Client or the git capabilities within RStudio) for a few months before you even consider Eclipse. Its included in this list for the sake of completeness. When installing EGit plug-in, ignore eclipse site and check out this youtube video:http://www.youtube.com/watch?v=I7fbCE5nWPU. Color Oracle simulates the three most common types of color blindness. If you have produce a color graph in a report you develop, check it with Color Oracle (or ask someone else too). If its already installed, it takes less than 10 second to check it against all three types of color blindness. If its not installed, extra work may be necessary if Java isnt already installed. When you download the zip, extract the ColorOracle.exe program where you like. {added Sept 2012} Atom is a text editor, similar to Notepad++. Notepad++ appears more efficient opening large CSVs. Atom is better suited when editing a lot of files in a repository. For finding and replacing across a lot of files, it is superior to Notepad++ and RStudio; it permits regexes and has a great GUI preview of the potential replacements. Productivity is enhanced with the following Atom packages: Sublime Style Column Selection: Enable Sublime style Column Selection. Just hold alt while you select, or select using your middle mouse button. atom-language-r allows Atom to recognize files as R. This prevents spell checking indicators and enable syntax highlighting. When you need to browse through a lot of scattered R files quickly, Atoms tree panel (on the left) works well. An older alternative is language-r. language-csv: Adds syntax highlighting to CSV files. The highlighting is nice, and it automatically disables spell checking lines. atom-beautify: Beautify HTML, CSS, JavaScript, PHP, Python, Ruby, Java, C, C++, C#, Objective-C, CoffeeScript, TypeScript, Coldfusion, SQL, and more in Atom. atom-wrap-in-tag: wraps tag around selection; just select a word or phrase and hit Alt + Shift + w. minimap: A preview of the full source code (in the right margin). script: Run scripts based on file name, a selection of code, or by line number. git-plus: Do git things without the terminal (I dont think this is necessary anymore). The packages can be installed through Atom, or through the apm utility in the command line: apm install sublime-style-column-selection atom-language-r language-csv atom-beautify atom-wrap-in-tag minimap script And the following settings keep files consistent among developers. File | Settings | Editor | Tab Length: 2 (As opposed to 3 or 4, used in other conventions) File | Settings | Editor | Tab Type: soft (This inserts 2 spaces instead of a tab when Tab is pressed) As an alternative to the Gist, run the local R script install-packages.R (located in the utility/ directory) that lives in this repository. The workhorse of this function is OuhscMunge::package_janitor(). "],["tools.html", "Chapter 19 Considerations when Selecting Tools 19.1 General 19.2 Languages 19.3 R Packages 19.4 Database 19.5 Additional Resources", " Chapter 19 Considerations when Selecting Tools 19.1 General 19.1.1 The Components Goal While discussing the advantages and disadvantages of tools, a colleague once said, Tidyverse packages dont do anything that I cant already do in Base R, and sometimes it even requires more lines of code. Regardless if I agree, I feel these two points are irrelevant. Sometimes the advantage of a tool isnt to expand existing capabilities, but rather to facilitate development and maintenance for the same capability. Likewise, I care less about the line count, and more about the readability. Id prefer to maintain a 20-line chunk that is familiar and readable than a 10-line chunk with dense phrases and unfamiliar functions. The bottleneck for most of our projects is human time, not execution time. 19.1.2 Current Skillset of Team 19.1.3 Desired Future Skillset of Team 19.1.4 Skillset of Audience 19.2 Languages 19.3 R Packages When developing a codebase used by many people, choose packages both on their functionality, as well as their ease of installation and maintainability. For example, the rJava package is a powerful package that allows R package developers to leverage the widespread Java framework and many popular Java packages. However, installing Java and setting the appropriate path or registry settings can be error-prone, especially for non-developers. Therefore when considering between two functions with comparable capabilities (e.g., xlsx::read.xlsx() and readxl::read_excel()), avoid the package that requires a proper installation and configuration of Java and rJava. If the more intensive choice is required (say, you need to a capability in xslx missing from readxl), take: 20 minutes to start a markdown file that enumerates the packages direct and indirect dependencies that require manual configuration (e.g., rJava and Java), where to download them, and the typical installation steps. 5 minutes to create a GitHub Issue that (a) announces the new requirement, (b) describes who/what needs to install the requirement, (c) points to the markdown documentation, and (d) encourages teammates to post their problems, recommendations, and solutions in this issue. Weve found that a dedicated Issue helps communicate that the package dependency necessitates some intention and encourages people to assist other peoples troubleshooting. When something potentially useful is posted in the Issue, move it to the markdown document. Make sure the document and the issue hyperlink to each other. 15 minutes every year to re-evaluate the landscape. Confirm that the package is still actively maintained, and that no newer (and easily- maintained) package offers the desired capability.8 If better fit now exists, evaluate if the effort to transition to the new package is worth the benefit. Be more willing to transition is the project is relatively green, and more development is upcoming. Be more willing to transition if the transition is relatively in-place, and will not require much modification of code or training of people. Finally, consider how much traffic passes through the dependency A brittle dependency will not be too disruptive if isolated in a downstream analysis file run by only one statistician. On the other hand, be very protective in the middle of the pipeline where typically most of your team runs. 19.4 Database Ease of installation &amp; maintenance Support from IT which database engine are they most comfortable supporting. Integration with LDAP, Active Directory, or Shibboleth. Warehouse vs transactional performance 19.5 Additional Resources (Colin Gillespie 2017), particularly the Package selection section. H References "],["team.html", "Chapter 20 Growing a Team 20.1 Recruiting 20.2 Training to Data Science 20.3 Bridges Outside the Team", " Chapter 20 Growing a Team 20.1 Recruiting 20.2 Training to Data Science Starting with a Researcher Starting with a Statistician Starting with a DBA Starting with a Software Developer 20.3 Bridges Outside the Team Monthly User Groups Annual Conferences "],["redcap-user.html", "Chapter 21 Material for REDCap Users 21.1 Login 21.2 Developing Reports", " Chapter 21 Material for REDCap Users 21.1 Login 21.2 Developing Reports Please first read Login "],["redcap-developer.html", "Chapter 22 Material for REDCap Developers", " Chapter 22 Material for REDCap Developers "],["redcap-admin.html", "Chapter 23 Material for REDCap Admins", " Chapter 23 Material for REDCap Admins "],["git.html", "A Git &amp; GitHub A.1 for Code Development A.2 for Collaboration A.3 for Stability A.4 for New Collaborators A.5 Steps for Contributing to Repo", " A Git &amp; GitHub A.1 for Code Development Jenny Bryan and Jim Hester have published a thorough description of using Git from a data scientists perspective (Happy Git and GitHub for the useR), and we recommend following their guidance. It is consistent with our approach, with a few exceptions noted below. A complementary resource is Team Geek, which has insightful advice for the human and collaborative aspects of version control. Other Resources Setting up a CI/CD Process on GitHub with Travis CI. Travis-CI blob from August 2019. A.2 for Collaboration Somewhat separate from its version control capabilities, GitHub provides built-in tools for coordinating projects across people and time. This tools revolves around GitHub Issues, which allow teammates to track issues assigned to them and others search if other teammates have encountered similar problems that their facing now (e.g., the new computer cant install the rJava package). Theres nothing magical about GitHub issues, but if you dont use them, consider using a similar or more capable tools like those offered by Atlassian, Asana, Basecamp, and many others. Here are some tips from our experiences with projects involving between 2 and 10 statisticians are working with an upcoming deadline. If you create an error that describes a problem blocking your progress, include both the raw text (e.g., error: JAVA_HOME cannot be determined from the Registry) and possibly a screenshot. The text allows the problem to be more easily searched by people later; the screenshot usually provides extra context that allows other to understand the situation and help more quickly. Include enough broad context and enough specific details that teammates can quickly understand the problem. Ideally they can even run your code and debug it. Good recommendations can be found in the Stack Overflow posts, How to make a great R reproducible example and How do I ask a good question?. The issues dont need to be as thorough, because your teammates start with more context than a Stack Overflow reader. We typically include a description of the problem or fishy behavior. the exact error message (or a good description of the fishy behavior). a snippet of the 1-10 lines of code suspected of causing the problem. a link to the codes file (and ideally the line number, such as https://github.com/OuhscBbmc/REDCapR/blob/master/R/redcap-version.R#L40) so the reader can hop over to the entire file. references to similar GitHub Issues or Stack Overflow questions that could aid troubleshooting. A.3 for Stability Review Git commits closely No unintended functional difference (e.g., !match accidentally changed to match). No PHI snuck in (e.g., a patient ID used while isolating and debugging). The metadata format didnt change (e.g., Excel sometimes changes the string 010 to the number 10). See the appendix for a longer discussion about the problems that Excel typically introduces. A.4 for New Collaborators A.5 Steps for Contributing to Repo A.5.1 Regular Contributions A.5.1.1 Keep your dev branch fresh We recommend doing this at least every day you write code in a repo. Perhaps more frequently if a lot of developers are pushing code (e.g., right before a reporting deadline). Update master on your local machine (from the GitHub server) Merge master into your local dev branch Push your local dev branch to the GitHub server A.5.1.2 Make your code contributions available to other analysts At least every few days, push your changes to the master branch so teammates can benefit from your work. Especially if you are improving the pipeline code (e.g. Ellises or REDCap Arches) Make sure you dev branch is updated immediately before you create a Pull Request. Follow the steps above. Verify the merged code still works as expected. In other words, make sure that when your new code is blended with the newest master code, nothing breaks. Depending on the repo, these steps might include Build and Check the repo (assuming the rep is also a package). Run any code that verifys the basic functionality of the repo. (For example, our MIECHV team should run high-school-funnel.R and verify the assertions passed). Commit changes in your dev branch and push to the GitHub server. Create a Pull Request (otherwise known as a PR) and assign a reviewer. (For example, developers in the MIECHV team are paired together to review each others code.) The reviewer will pull your dev branch on to their local machine and run the same checks and verification (that you did on the 2nd step above). This duplicate effort helps verify that your code likely works for everyone on their own machines. The reviewer then accepts the PR and the master branch now contains your changes and are available to teammates. {Transfer &amp; update the material from https://github.com/OuhscBbmc/BbmcResources/blob/master/instructions/github.md} "],["snippets.html", "B Snippets B.1 Reading External Data B.2 Grooming B.3 Identification B.4 Correspondence with Collaborators", " B Snippets B.1 Reading External Data B.1.1 Reading from Excel Background: Avoid Excel for the reasons previously discussed. But if there isnt another good option, be protective. readxl::read_excel() allows you to specify column types, but not column order. The names of col_types is ignored by readxl::read_excel(). To defend against roaming columns (e.g., the files changed over time), tesit::assert() that the order is what you expect. Last Modified: 2019-12-12 by Will # ---- declare-globals --------------------------------------------------------- config &lt;- config::get() # cat(sprintf(&#39; `%s` = &quot;text&quot;,\\n&#39;, colnames(ds)), sep=&quot;&quot;) # &#39;text&#39; by default --then change where appropriate. col_types &lt;- c( `Med Rec Num` = &quot;text&quot;, `Admit Date` = &quot;date&quot;, `Tot Cash Pymt` = &quot;numeric&quot; ) # ---- load-data --------------------------------------------------------------- ds &lt;- readxl::read_excel( path = config$path_admission_charge, col_types = col_types # sheet = &quot;dont-use-sheets-if-possible&quot; ) testit::assert( &quot;The order of column names must match the expected list.&quot;, names(col_types) == colnames(ds) ) B.1.2 Removing Trailing Comma from Header Background: Occasionally a Meditech Extract will have an extra comma at the end of the 1st line. For each subsequent line, readr:read_csv() appropriately throws a new warning that it is missing a column. This warning flood can mask real problems. Explanation: This snippet (a) reads the csv as plain text, (b) removes the final comma, and (c) passes the plain text to readr::read_csv() to convert it into a data.frame. Instruction: Modify Dx50 Name to the name of the final (real) column. Real Example: truong-pharmacist-transition-1 (Accessible to only CDW members.) Last Modified: 2019-12-12 by Will # The next two lines remove the trailing comma at the end of the 1st line. raw_text &lt;- readr::read_file(path_in) raw_text &lt;- sub(&quot;^(.+Dx50 Name),&quot;, &quot;\\\\1&quot;, raw_text) ds &lt;- readr::read_csv(raw_text, col_types=col_types) B.1.3 Removing Trailing Comma from Header Background: When incoming data files are on the large side to comfortably accept with readr, we use vroom. The two packages are develoepd by the same group and might be combined in the future. Explanation: This snippet defines the col_types list with names to mimic our approach of using readr. There are some small differences with our readr approach: 1. col_types is a list instead of a readr::cols_only object. 1. The call to vroom::vroom() passes col_names = names(col_types) explicitly. 1. If the data file contains columns we dont need, we define them in col_types anyway; vroom needs to know the file structure if its missing a header row. Real Example: akande-medically-complex-1 (Accessible to only CDW members.) Thesee files did not have a header of variable names; the first line of the file is the first data row. Last Modified: 2020-08-21 by Will # ---- declare-globals --------------------------------------------------------- config &lt;- config::get() col_types &lt;- list( sak = vroom::col_integer(), # &quot;system-assigned key&quot; aid_category_id = vroom::col_character(), age = vroom::col_integer(), service_date_first = vroom::col_date(&quot;%m/%d/%Y&quot;), service_date_lasst = vroom::col_date(&quot;%m/%d/%Y&quot;), claim_type = vroom::col_character(), provider_id = vroom::col_character(), provider_lat = vroom::col_double(), provider_long = vroom::col_double(), provider_zip = vroom::col_character(), cpt = vroom::col_integer(), revenue_code = vroom::col_integer(), icd_code = vroom::col_character(), icd_sequence = vroom::col_integer(), vocabulary_coarse_id = vroom::col_integer() ) # ---- load-data --------------------------------------------------------------- ds &lt;- vroom::vroom( file = config$path_ohca_patient, delim = &quot;\\t&quot;, col_names = names(col_types), col_types = col_types ) rm(col_types) B.2 Grooming B.2.1 Correct for misinterpreted two-digit year Background: Sometimes the Meditech dates are specified like 1/6/54 instead of 1/6/1954. readr::read_csv() has to choose if the year is supposed to be 1954 or 2054. A human can use context to guess a birth date is in the past (so it guesses 1954), but readr cant (so it guesses 2054). For avoid this and other problems, request dates in an ISO-8601 format. Explanation: Correct for this in a dplyr::mutate() clause; compare the date value against today. If the date is today or before, use it; if the day is in the future, subtract 100 years. Instruction: For future dates such as loan payments, the direction will flip. Last Modified: 2019-12-12 by Will ds %&gt;% dplyr::mutate( dob = dplyr::if_else(dob &lt;= Sys.Date(), dob, dob - lubridate::years(100)) ) B.3 Identification B.3.1 Generating tags Background: When you need to generate unique identification values for future people/clients/patients, as described in the style guide. Explanation: This snippet will create a 5-row csv with random 7-character tags to send to the research team collecting patients. The Instruction: Set pt_count, tag_length, path_out, and execute. Add and rename the columns to be more appropriate for your domain (e.g., change patient tag to store tag). Last Modified: 2019-12-30 by Will pt_count &lt;- 5L # The number of rows in the dataset. tag_length &lt;- 7L # The number of characters in each tag. path_out &lt;- &quot;data-private/derived/pt-pool.csv&quot; draw_tag &lt;- function (tag_length = 4L, urn = c(0:9, letters)) { paste(sample(urn, size = tag_length, replace = T), collapse = &quot;&quot;) } ds_pt_pool &lt;- tibble::tibble( pt_index = seq_len(pt_count), pt_tag = vapply(rep(tag_length, pt_count), draw_tag, character(1)), assigned = FALSE, name_last = &quot;--&quot;, name_first = &quot;--&quot; ) readr::write_csv(ds_pt_pool, path_out) The resulting dataset will look like this, but with different randomly-generated tags. # A tibble: 5 x 5 pt_index pt_tag assigned name_last name_first &lt;int&gt; &lt;chr&gt; &lt;lgl&gt; &lt;chr&gt; &lt;chr&gt; 1 1 seikyfr FALSE -- -- 2 2 voiix4l FALSE -- -- 3 3 wosn4w2 FALSE -- -- 4 4 jl0dg84 FALSE -- -- 5 5 r5ei5ph FALSE -- -- B.4 Correspondence with Collaborators B.4.1 Excel files Receiving and storing Excel files should almost always be avoided for the reasons explained in this letter. We receive extracts as Excel files frequently, and have the following request ready to email the person sending us Excel files. Adapt the bold values like 109.19 to your situation. If you are familiar with their tools, suggest an alternative for saving the file as a csv. Once presented with these Excel gotchas, almost everyone has an aha moment and recognizes the problem. Unfortunately, not everyone has flexible software and can adapt easily. [Start of the letter] Sorry to be tedious, but could you please resend the extract as a csv file? Please call me if you have questions. Excel is being too helpful with some of the values, and essentially corrupting them. For example, values like 109.19 is interpreted as a number, not a character code (e.g., see cell L14). Because of limitations of finite precision, this becomes 109.18999999999999773. We cant round it, because there are other values in this column that cannot be cast to numbers, such as V55.0. Furthermore, the Es in some codes are incorrectly interpreted as the exponent operator (e.g., 4E5 is converted to 400,000). Finally, values like 41.0 are being converted to a number and the trailing zero is dropped (so cells like 41 are not distinguishable from 41.0). Unfortunately the problems exist in the Excel file itself. When we import the columns as text, the values are already in their corrupted state. Please compress/zip the csv if the file is be too large to email. Weve found that an Excel file is typically 5-10 times larger than a compressed csv. As much as Excel interferes with our medical variables, were lucky. It has messed with other branches of science much worse. Genomics were using it far too late before they realized their mistakes. What happened? By default, Excel and other popular spreadsheet applications convert some gene symbols to dates and numbers. For example, instead of writing out Membrane-Associated Ring Finger (C3HC4) 1, E3 Ubiquitin Protein Ligase, researchers have dubbed the gene MARCH1. Excel converts this into a date03/01/2016, saybecause thats probably what the majority of spreadsheet users mean when they type it into a cell. Similarly, gene identifiers like 2310009E13 are converted to exponential numbers (2.31E+19). In both cases, the conversions strip out valuable information about the genes in question. [End of the letter] "],["presentations.html", "C Presentations C.1 CDW C.2 REDCap C.3 Reproducible Research &amp; Visualization C.4 Data Management C.5 GitHub C.6 Software C.7 Architectures C.8 Components", " C Presentations Here is a collection of presentations by the BBMC and friends that may help demonstrate concepts discussed in the previous chapters. C.1 CDW prairie-outpost-public: Documentation and starter files for OUHSCs Clinical Data Warehouse. OUHSC CDW C.2 REDCap REDCap Systems Integration. REDCap Con 2015, Portland, Oregon. Literate Programming Patterns and Practices with REDCap REDCap Con 2014, Park City, Utah. Interacting with the REDCap API using the REDCapR Package REDCap Con 2014, Park City, Utah. Optimizing Study Management using REDCap, R, and other software tools. SCUG 2013. C.3 Reproducible Research &amp; Visualization Building pipelines and dashboards for practitioners: Mobilizing knowledge with reproducible reporting. Displaying Health Data Colloquium 2018, University of Victoria. Interactive reports and webpages with R &amp; Shiny. SCUG 2015. Big data, big analysis: a collaborative framework for multistudy replication. Conventional of Canadian Psychological Association, Victoria BC, 2016. WATS: wrap-around time series: Code to accompany WATS Plot article, 2014. C.4 Data Management BBMC Validator: catch and communicate data errors. SCUG 2016. Text manipulation with Regular Expressions, Part 1 and Part 2. SCUG 2016. Time and Effort Data Synthesis. SCUG 2015. C.5 GitHub Scientific Collaboration with GitHub. OU Bioinformatics Breakfast Club 2015. C.6 Software REDCapR: Interaction Between R and REDCap. OuhscMunge: Data manipulation operations commonly used by the Biomedical and Behavioral Methodology Core within the Department of Pediatrics of the University of Oklahoma Health Sciences Center. codified: Produce standard/formalized demographics tables. usnavy billets: Optimally assigning naval officers to billets. C.7 Architectures Linear Pipeline of the R Analysis Skeleton . Many-to-many Pipeline of the R Analysis Skeleton . Immunization transfer . IALSA: A Collaborative Modeling Framework for Multi-study Replication . POPS: Automated daily screening eligibility for rare and understudied prescriptions. . C.8 Components Customizing display tables: using css with DT and kableExtra. SCUG 2018. yaml and expandable trees that selectively show subsets of hierarchy, 2017. "],["scratch-pad.html", "D Scratch Pad of Loose Ideas D.1 Chapters &amp; Sections to Form D.2 Practices D.3 Good Sites", " D Scratch Pad of Loose Ideas D.1 Chapters &amp; Sections to Form Tools to Consider tidyverse odbc ggplot2 use factors for explanatory variables when you want to keep the order consistent across graphs. (genevamarshall) automation on a remote server or VDI Theres always a chance that my machine is configured a little differently than yours, which may affect results. Will you glance at those results too? I forgot what this project is about, and I wouldnt be able to spot problems like you can. The S drive file and the tables dont seem to have any obvious problems public reports (and dashboards) when developing a report for a external audience (ie, people outside your immediate research team), choose one or two pals who are unfamiliar with your aims/methods as an impromptu focus group. Ask them what things need to be redesigned/reframed/reformated/further-explained. (genevamarshall) plots plot labels/axes variable names units of measurement (eg, proportion vs percentage on the y axis) documentation - bookdown Bookdown has worked well for us so far. Its basically independent markdown documents stored on a dedicated git repo. Then you click build in RStudio and it converts all the markdown files to static html files. Because GitHub is essentially serving as the backend, everyone can make changes to sections and we dont have to be too worried about Heres a version thats hosted publicly, but I tested that it can be hosted on our shared file server. (Its possible because the html files are so static.) If this is what you guys want for OUs collective CDW, please tell me: who you want to be able to edit the documents without review. Ill add them to the GitHub repo. who you want to be able to view the documents. Ill add them to a dedicate file server space. https://ouhscbbmc.github.io/data-science-practices-1/workstation.html#installation-required I was thinking that each individual database gets it own chapter. The BBMC has ~4 databases in this sense: a Centricity staging database, a GECB staging database, the central warehouse, and the (fledgling) downstream OMOP database. Then there are ~3 sections within each chapter: (a) a black-and-white description of the tables, columns, &amp; indexes (written mostly for consumers), (b) recommendations how to use each table (written mostly for consumers), and (c) a description of the ETL process (written mostly for developers &amp; admins). My proposal uses GitHub and Markdown because theyre so universal (no knowledge of R is required really you could write it with any text editor &amp; commit, and let someone else click build in RStudio on their machine). But Im very flexible on all this. Ill support &amp; contribute to any system that you guys feel will work well across the teams. developing packages R packages by Hadley Wickham http://mangothecat.github.io/goodpractice/ Cargo cult programming is a style of computer programming characterized by the ritual inclusion of code or program structures that serve no real purpose. (Wikipedia) Your team should decide which elements of a file prototype and repo prototype are best for you. D.2 Practices on.exit() should have add = TRUE (Wickham (2019), Exit handlers). D.3 Good Sites Posts on these sites are almost always worth your time reading. The frequently improve how you develop with the common components used in our data pipelines. Yihui Xie, created knitr and other important contributions to reproducible research. RStudio, in addition to their IDE, many of the packages used here were created by their developers. Explain xkcd because its good. Occasionally skim the titles on these sites and pick a few relevant to your interests. We think it helps keep you aware of developments in the field, so your skills continually grow and our approaches dont become stagnant. OReillys Data science ideas and resources Towards Data Science These books havent been referenced (yet), but have good guidance and could be worth your time skimming to see what is relevant. The Tidynomicon by Dhavide Aruliah &amp; Greg Wilson Efficient R programming by Colin Gillespie &amp; Robin Lovelace Mastering Software Development in R H References "],["example-dashboard.html", "E Example Dashboard E.1 Example E.2 Style Guide E.3 Architecture", " E Example Dashboard Communicating quantitative trends to a community with a quantitative phobia can be difficult. This appendix showcases a dashboard style that has evolved during the past few years of OSDH Home Visiting, where twelve local programs practitioners implemented their own intervention ideas tailored to their interests and community. Over 50 dashboards have been developed: a custom dashboard is developed for each programs cycle, and a three additional dashboards communicate the results of program-agnostic investigations. A style guide is an important tool when managing this many unique investigations For a program-specific dashboard, its more important to meet the needs of the individual PDSA than to conform to a guide. However, we aim to make the dashboards as consistent as possible for several reasons: Its less work for the practitioners. A familiar presentation will help the practitioners grow comfortable with their new cycles dashboard. Recall most will use at least five dashboards in only a few years. Its less work for the analysts/developers. Within a cycle, a consistent format (with relatively interchangeable features) means that one analyst can more easily contribute and trouble shoot a colleagues dashboard. The lessons weve learned (and mistakes weve made) can be applied to later dashboards. The quality should improve and the development should quicken. Just like our CQI grant encourages an HV program to learn from its history and to learn from others, we as analysts should too. As we work with the programs to design a PDSA, each one analyst will learn about the strengths and weaknesses of our current dashboard style, and propose improvements. E.1 Example A example dashboard that mimic the real CQI is available at https://ouhscbbmc.github.io/data-science-practices-1/dashboard-1.html. The dashboard source code is available in the analysis/dashboard-1 directory of the R Analysis Skeleton repository; this repo contains the code and documents the entire pipeline leading up to this dashboard. Weve had success developing and distributing dashboards as self-contained html files. They are portable and dont have dependencies on local data files or remote databases, yet the JavaScript and CSS provide a modest amount of interactivity. The dashboards principal components are flexdashboard, plotly, ggplot2, and R Markdown. In this dashboard of synthetic data, a cognitive measure is tracked across 14 years in three home visiting counties. E.2 Style Guide This section describes a set of practices that the BBMC analysts have decided are best for the CQI dashboards used in our MIECHV evaluations. In a sense, this CQI dashboard guide supplements our overall style guide. The MIECHV CQI dashboards are based on RStudios flexdashboard package, which uses rmarkdown, JavaScript, and CSS. flexdashboard has a great website that should be read by anyone adapting this guide for their own CQI projects. E.2.1 Headline page The dashboards greeting should be a good blend of (a) orientating the user to the context and (b) being welcoming but not overwhelming. For the second PDSA cycle, try to have only one or two important and impactful graphs on the first page; specialized graphs have their own pages later. Left column: Text qualified with {.tabset} Notes tab: text that provides info about the dashboards dataset, such as Count of (a) models, (b) programs, (c) clients, and (d) observations Date range The specific program_codes. Even though a PDSA is focused on a specific program, ideally other programs are included so they have a feel for what others are doing. Right column: Headline Graph(s) optionally qualified with {.tabset}. Ideally starts with an overall graph, with no longitudinal component. Show data only from the program, not the overall model. E.2.2 Tables page The tables provide exactness, especially the exactness of (a) the actual y value and (b) the frequency of the longitudinal values. These tables make it easier to see if youre inadvertently plotting multiple values for the same month, or if some month is missing. In the future, we can add a Download as CSV button if anyone requests it. Another advantage of the tables is that all measures are visible in the same screen. A typical program-month table will have at least 6 columns: program_code, month, model, outcome measure, process measure, and disruptor measure. If this is difficult to do, then the upstream scribe probably isnt doing its job well. These tables should be almost untouched from the rds files created in the load-data chunk. Each tab should represent a different unit of analysis (e.g., a single row summarizing the completed visits for a program-month). Use all the tabs below that are appropriate for the PDSA. Go from biggest unit (e.g., model) to smallest unit (e.g., Provider-Week). Unnamed column qualified with {.tabset}. Model tab Program tab Program-Month tab Program-Week tab Provider-Week tab Spaghetti Annotation tab If your spaghetti plots use faint vertical lines to mark events (e.g., the start of a PDSA intervention), include the events here too. E.2.3 Graphs page The graphs plots should provide the user with a feel of the trends. One graph focuses on one measure, so ideally a max of three spaghetti plots. Ideally the change over time (for the PDSAs program) is compared to the other programs during the same period. If a PSDA has multiple Process Measures, give them separate tabs labeled Process Measure 1 &amp; Process Measure 2. Unnamed column qualified with {.tabset}. Outcome Measure tab Process Measure tab Disruptor Measure tab If a spaghetti plot depicts a proportion/percentage measure, then include a visual layer the count/denominator behind each proportion (instead of a separate spaghetti plot dedicated to the denominator). This may include one or more of the following: geom_point where presence/absence denotes a nonzero/zero denominator geom_point where size denotes denominators size. geom_text (in place of geom_point) that explicitly shows denominators size geom_text along the bottom axis that explicitly shows denominators size use spaghetti_2() located in display-1.R. (not yet developed.) Add hover text to each spaghetti. E.2.4 Marginal Graphs page The marginal histograms provide context. Single column, qualified with {.tabset}. Contains a marginal/univariate graph of all variables in the analysis. Marginal graph of outcome measure Marginal graph of process measure Marginal graph of disruptor measure Show data only from the program, not the overall model. Use histogram_2() located in display-1.R (this link is accessible only to Oklahomas MIECHV evaluation team). Add hover text to each histogram. If all datasets are the same unit of analysis (e.g., program-month), then dont use an H3 tab. Use (H3) tabs if you have marginals more than one level (e.g., visit date at program-month, visit date at program-week, visit date at provider-week). But avoid multiple levels, if possible; especially if program isnt fluent with a single level. histograms have a more specific y-axis. For example, Count of Months instead of Frequency E.2.5 Documentation page The documentation should be self-contained in the same html file, so its easier for the practitioner to quickly get the explanation and return to the trends. Sometimes its best to place an explanation/annotation right next to the relevant content, but other times its distracting. And its always more work to maintain the explanations if theyre spread-out across the interface. So lets try keeping almost everything under one or two tabs in the Documentation page. To help beyond that, lets try to reuse as many documentation tabs as possible. The first tab will be specific to the methodology and displays of the PDSA. The remaining tabs will reference common Rmd files; the content will automatically update when the dashboard is rendered next. Unnamed column qualified with {.tabset}. Explanation Current PDSA Explanation All CQI Dashboards Glossary Tips Config E.2.6 Miscellaneous Notes The hierarchy level in this outline indicates the HTML-heading level. Numbers are H1 (i.e., ======) that specify pages, roman numerals are H2 (i.e., ------) that specify columns, and letters are H3 (i.e., ###) that specify tabs. Cosmetics connote the type of dashboard. Specify using the theme or css yaml keywords in the Rmd header. Common measures: theme: simplex uses a red banner. 1st cycle PDSAs (i.e., initial cycle of MIECHV 3): theme: cosmo uses a blue banner. This default is used if no theme is specified. 2nd cycle PDSAs: theme: flatly uses a turquoise banner. 3rd cycle PDSAs: theme: journal uses a light red banner. 4th cycle PDSAs (i.e., initial cycle of MIECHV 5): custom css with a purple banner (a public copy of this css is available). Instead of a theme, the below line (with four leading spaces, because the yaml entry is nested under output and flexdashboard::flex_dashboard) css: ../../common/style-cqi-cycle-4.css E.3 Architecture The dashboard is only one piece of a large workflow. The design and construction of this workflow are discussed in this book, which are highlighted below. . E.3.1 Data from External System E.3.2 Groomed Data in Warehouse E.3.3 Analysis-Ready Dataset Very little data manipulation should occur in the dashboard. The upstream scribe should produce an analysis-ready rds file. The dashboard should be concerned only with presenting the graphs, tables, summary text, and documentation. Include a common measure if the PDSA explicitly mentions it. Try to show measures only if theyre directly related to the PDSA. The PDSA dashboard will have less exposure to change (which makes it easier to maintain). If a program needs context for their measures, they can look at the common measure dashboard. "],["example-chapter.html", "F Example Chapter", " F Example Chapter This intro was copied from the 1st chapter of the example bookdown repo. Im keeping it temporarily for reference. You can label chapter and section titles using {#label} after them, e.g., we can reference the Intro Chapter. If you do not manually label them, there will be automatic labels anyway Figures and tables with captions will be placed in figure and table environments, respectively. par(mar = c(4, 4, .1, .1)) plot(pressure, type = &#39;b&#39;, pch = 19) Figure F.1: Here is a nice figure! Reference a figure by its code chunk label with the fig: prefix, e.g., see Figure F.1. Similarly, you can reference tables generated from knitr::kable(), e.g., see Table F.1. knitr::kable( head(iris, 20), caption = &#39;Here is a nice table!&#39;, booktabs = TRUE ) Table F.1: Here is a nice table! Sepal.Length Sepal.Width Petal.Length Petal.Width Species 5.1 3.5 1.4 0.2 setosa 4.9 3.0 1.4 0.2 setosa 4.7 3.2 1.3 0.2 setosa 4.6 3.1 1.5 0.2 setosa 5.0 3.6 1.4 0.2 setosa 5.4 3.9 1.7 0.4 setosa 4.6 3.4 1.4 0.3 setosa 5.0 3.4 1.5 0.2 setosa 4.4 2.9 1.4 0.2 setosa 4.9 3.1 1.5 0.1 setosa 5.4 3.7 1.5 0.2 setosa 4.8 3.4 1.6 0.2 setosa 4.8 3.0 1.4 0.1 setosa 4.3 3.0 1.1 0.1 setosa 5.8 4.0 1.2 0.2 setosa 5.7 4.4 1.5 0.4 setosa 5.4 3.9 1.3 0.4 setosa 5.1 3.5 1.4 0.3 setosa 5.7 3.8 1.7 0.3 setosa 5.1 3.8 1.5 0.3 setosa You can write citations, too. For example, we are using the bookdown package (Xie 2021) in this sample book, which was built on top of R Markdown and knitr (Xie 2015). H References "],["acknowledgements.html", "G Acknowledgements", " G Acknowledgements The authors thank all our colleagues for the discussions and experiences about data science that lead to this book. At OUHSC, this includes @adrose, @aggie-dbc, @ARPeters, @Ashley-Jorgensen, @athumann, @atreat1, @caston60, @chanukyalakamsani, @CWilliamsOUHSC, @DavidBard, @evoss1, @genevamarshall, @Maleeha, @man9472, @rmatkins, @sbohora, @thomasnwilson, @vimleshbavadiya, @waleboro, @YuiYamaoka, @yutiantang. Outside the OUHSC, this includes @andkov, @ben519, @cscherrer, @cmodzelewski, @jimquallen, @mhunter1, @probinso, @russelljonas, and @spopovych. `r if (knitr::is_html_output())  "],["references.html", "H References", " H References "]]
diff --git a/docs/security.html b/docs/security.html
index 8aed43a..96ee0df 100644
--- a/docs/security.html
+++ b/docs/security.html
@@ -24,7 +24,7 @@
 <meta name="author" content="Will Beasley" />
 
 
-<meta name="date" content="2021-05-20" />
+<meta name="date" content="2021-06-08" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1" />
   <meta name="apple-mobile-web-app-capable" content="yes" />
@@ -456,6 +456,13 @@
 <li class="chapter" data-level="20.2" data-path="team.html"><a href="team.html#training-to-data-science"><i class="fa fa-check"></i><b>20.2</b> Training to Data Science</a></li>
 <li class="chapter" data-level="20.3" data-path="team.html"><a href="team.html#bridges-outside-the-team"><i class="fa fa-check"></i><b>20.3</b> Bridges Outside the Team</a></li>
 </ul></li>
+<li class="chapter" data-level="21" data-path="redcap-user.html"><a href="redcap-user.html"><i class="fa fa-check"></i><b>21</b> Material for REDCap Users</a>
+<ul>
+<li class="chapter" data-level="21.1" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-login"><i class="fa fa-check"></i><b>21.1</b> Login</a></li>
+<li class="chapter" data-level="21.2" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-report-develop"><i class="fa fa-check"></i><b>21.2</b> Developing Reports</a></li>
+</ul></li>
+<li class="chapter" data-level="22" data-path="redcap-developer.html"><a href="redcap-developer.html"><i class="fa fa-check"></i><b>22</b> Material for REDCap Developers</a></li>
+<li class="chapter" data-level="23" data-path="redcap-admin.html"><a href="redcap-admin.html"><i class="fa fa-check"></i><b>23</b> Material for REDCap Admins</a></li>
 <li class="appendix"><span><b>Appendix</b></span></li>
 <li class="chapter" data-level="A" data-path="git.html"><a href="git.html"><i class="fa fa-check"></i><b>A</b> Git &amp; GitHub</a>
 <ul>
diff --git a/docs/snippets.html b/docs/snippets.html
index 62dc933..f233b3d 100644
--- a/docs/snippets.html
+++ b/docs/snippets.html
@@ -24,7 +24,7 @@
 <meta name="author" content="Will Beasley" />
 
 
-<meta name="date" content="2021-05-20" />
+<meta name="date" content="2021-06-08" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1" />
   <meta name="apple-mobile-web-app-capable" content="yes" />
@@ -456,6 +456,13 @@
 <li class="chapter" data-level="20.2" data-path="team.html"><a href="team.html#training-to-data-science"><i class="fa fa-check"></i><b>20.2</b> Training to Data Science</a></li>
 <li class="chapter" data-level="20.3" data-path="team.html"><a href="team.html#bridges-outside-the-team"><i class="fa fa-check"></i><b>20.3</b> Bridges Outside the Team</a></li>
 </ul></li>
+<li class="chapter" data-level="21" data-path="redcap-user.html"><a href="redcap-user.html"><i class="fa fa-check"></i><b>21</b> Material for REDCap Users</a>
+<ul>
+<li class="chapter" data-level="21.1" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-login"><i class="fa fa-check"></i><b>21.1</b> Login</a></li>
+<li class="chapter" data-level="21.2" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-report-develop"><i class="fa fa-check"></i><b>21.2</b> Developing Reports</a></li>
+</ul></li>
+<li class="chapter" data-level="22" data-path="redcap-developer.html"><a href="redcap-developer.html"><i class="fa fa-check"></i><b>22</b> Material for REDCap Developers</a></li>
+<li class="chapter" data-level="23" data-path="redcap-admin.html"><a href="redcap-admin.html"><i class="fa fa-check"></i><b>23</b> Material for REDCap Admins</a></li>
 <li class="appendix"><span><b>Appendix</b></span></li>
 <li class="chapter" data-level="A" data-path="git.html"><a href="git.html"><i class="fa fa-check"></i><b>A</b> Git &amp; GitHub</a>
 <ul>
@@ -548,50 +555,50 @@ <h1>
           <div class="page-inner">
 
             <section class="normal" id="section-">
-<div id="snippets" class="section level1" number="22">
+<div id="snippets" class="section level1" number="25">
 <h1><span class="header-section-number">B</span> Snippets</h1>
-<div id="snippets-reading" class="section level2" number="22.1">
+<div id="snippets-reading" class="section level2" number="25.1">
 <h2><span class="header-section-number">B.1</span> Reading External Data</h2>
-<div id="snippets-reading-excel" class="section level3" number="22.1.1">
+<div id="snippets-reading-excel" class="section level3" number="25.1.1">
 <h3><span class="header-section-number">B.1.1</span> Reading from Excel</h3>
 <p><em>Background</em>: Avoid Excel for the <a href="%7B#data-containers-avoid">reasons previously discussed</a>. But if there isn’t another good option, be protective. <a href="https://readxl.tidyverse.org/reference/read_excel.html"><code>readxl::read_excel()</code></a> allows you to specify column types, but not column order. The names of <code>col_types</code> is ignored by <code>readxl::read_excel()</code>. To defend against roaming columns (<em>e.g.</em>, the files changed over time), <code>tesit::assert()</code> that the order is what you expect.</p>
 <p><em>Last Modified</em>: 2019-12-12 by Will</p>
-<div class="sourceCode" id="cb68"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb68-1"><a href="snippets.html#cb68-1" aria-hidden="true" tabindex="-1"></a><span class="co"># ---- declare-globals ---------------------------------------------------------</span></span>
-<span id="cb68-2"><a href="snippets.html#cb68-2" aria-hidden="true" tabindex="-1"></a>config                         <span class="ot">&lt;-</span> config<span class="sc">::</span><span class="fu">get</span>()</span>
-<span id="cb68-3"><a href="snippets.html#cb68-3" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb68-4"><a href="snippets.html#cb68-4" aria-hidden="true" tabindex="-1"></a><span class="co"># cat(sprintf(&#39;  `%s`             = &quot;text&quot;,\n&#39;, colnames(ds)), sep=&quot;&quot;) # &#39;text&#39; by default --then change where appropriate.</span></span>
-<span id="cb68-5"><a href="snippets.html#cb68-5" aria-hidden="true" tabindex="-1"></a>col_types <span class="ot">&lt;-</span> <span class="fu">c</span>(</span>
-<span id="cb68-6"><a href="snippets.html#cb68-6" aria-hidden="true" tabindex="-1"></a>  <span class="st">`</span><span class="at">Med Rec Num</span><span class="st">`</span>     <span class="ot">=</span> <span class="st">&quot;text&quot;</span>,</span>
-<span id="cb68-7"><a href="snippets.html#cb68-7" aria-hidden="true" tabindex="-1"></a>  <span class="st">`</span><span class="at">Admit Date</span><span class="st">`</span>      <span class="ot">=</span> <span class="st">&quot;date&quot;</span>,</span>
-<span id="cb68-8"><a href="snippets.html#cb68-8" aria-hidden="true" tabindex="-1"></a>  <span class="st">`</span><span class="at">Tot Cash Pymt</span><span class="st">`</span>   <span class="ot">=</span> <span class="st">&quot;numeric&quot;</span></span>
-<span id="cb68-9"><a href="snippets.html#cb68-9" aria-hidden="true" tabindex="-1"></a>)</span>
-<span id="cb68-10"><a href="snippets.html#cb68-10" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb68-11"><a href="snippets.html#cb68-11" aria-hidden="true" tabindex="-1"></a><span class="co"># ---- load-data ---------------------------------------------------------------</span></span>
-<span id="cb68-12"><a href="snippets.html#cb68-12" aria-hidden="true" tabindex="-1"></a>ds <span class="ot">&lt;-</span> readxl<span class="sc">::</span><span class="fu">read_excel</span>(</span>
-<span id="cb68-13"><a href="snippets.html#cb68-13" aria-hidden="true" tabindex="-1"></a>  <span class="at">path      =</span> config<span class="sc">$</span>path_admission_charge,</span>
-<span id="cb68-14"><a href="snippets.html#cb68-14" aria-hidden="true" tabindex="-1"></a>  <span class="at">col_types =</span> col_types</span>
-<span id="cb68-15"><a href="snippets.html#cb68-15" aria-hidden="true" tabindex="-1"></a>  <span class="co"># sheet   = &quot;dont-use-sheets-if-possible&quot;</span></span>
-<span id="cb68-16"><a href="snippets.html#cb68-16" aria-hidden="true" tabindex="-1"></a>)</span>
-<span id="cb68-17"><a href="snippets.html#cb68-17" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb68-18"><a href="snippets.html#cb68-18" aria-hidden="true" tabindex="-1"></a>testit<span class="sc">::</span><span class="fu">assert</span>(</span>
-<span id="cb68-19"><a href="snippets.html#cb68-19" aria-hidden="true" tabindex="-1"></a>  <span class="st">&quot;The order of column names must match the expected list.&quot;</span>,</span>
-<span id="cb68-20"><a href="snippets.html#cb68-20" aria-hidden="true" tabindex="-1"></a>  <span class="fu">names</span>(col_types) <span class="sc">==</span> <span class="fu">colnames</span>(ds)</span>
-<span id="cb68-21"><a href="snippets.html#cb68-21" aria-hidden="true" tabindex="-1"></a>)</span></code></pre></div>
+<div class="sourceCode" id="cb69"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb69-1"><a href="snippets.html#cb69-1" aria-hidden="true" tabindex="-1"></a><span class="co"># ---- declare-globals ---------------------------------------------------------</span></span>
+<span id="cb69-2"><a href="snippets.html#cb69-2" aria-hidden="true" tabindex="-1"></a>config                         <span class="ot">&lt;-</span> config<span class="sc">::</span><span class="fu">get</span>()</span>
+<span id="cb69-3"><a href="snippets.html#cb69-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb69-4"><a href="snippets.html#cb69-4" aria-hidden="true" tabindex="-1"></a><span class="co"># cat(sprintf(&#39;  `%s`             = &quot;text&quot;,\n&#39;, colnames(ds)), sep=&quot;&quot;) # &#39;text&#39; by default --then change where appropriate.</span></span>
+<span id="cb69-5"><a href="snippets.html#cb69-5" aria-hidden="true" tabindex="-1"></a>col_types <span class="ot">&lt;-</span> <span class="fu">c</span>(</span>
+<span id="cb69-6"><a href="snippets.html#cb69-6" aria-hidden="true" tabindex="-1"></a>  <span class="st">`</span><span class="at">Med Rec Num</span><span class="st">`</span>     <span class="ot">=</span> <span class="st">&quot;text&quot;</span>,</span>
+<span id="cb69-7"><a href="snippets.html#cb69-7" aria-hidden="true" tabindex="-1"></a>  <span class="st">`</span><span class="at">Admit Date</span><span class="st">`</span>      <span class="ot">=</span> <span class="st">&quot;date&quot;</span>,</span>
+<span id="cb69-8"><a href="snippets.html#cb69-8" aria-hidden="true" tabindex="-1"></a>  <span class="st">`</span><span class="at">Tot Cash Pymt</span><span class="st">`</span>   <span class="ot">=</span> <span class="st">&quot;numeric&quot;</span></span>
+<span id="cb69-9"><a href="snippets.html#cb69-9" aria-hidden="true" tabindex="-1"></a>)</span>
+<span id="cb69-10"><a href="snippets.html#cb69-10" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb69-11"><a href="snippets.html#cb69-11" aria-hidden="true" tabindex="-1"></a><span class="co"># ---- load-data ---------------------------------------------------------------</span></span>
+<span id="cb69-12"><a href="snippets.html#cb69-12" aria-hidden="true" tabindex="-1"></a>ds <span class="ot">&lt;-</span> readxl<span class="sc">::</span><span class="fu">read_excel</span>(</span>
+<span id="cb69-13"><a href="snippets.html#cb69-13" aria-hidden="true" tabindex="-1"></a>  <span class="at">path      =</span> config<span class="sc">$</span>path_admission_charge,</span>
+<span id="cb69-14"><a href="snippets.html#cb69-14" aria-hidden="true" tabindex="-1"></a>  <span class="at">col_types =</span> col_types</span>
+<span id="cb69-15"><a href="snippets.html#cb69-15" aria-hidden="true" tabindex="-1"></a>  <span class="co"># sheet   = &quot;dont-use-sheets-if-possible&quot;</span></span>
+<span id="cb69-16"><a href="snippets.html#cb69-16" aria-hidden="true" tabindex="-1"></a>)</span>
+<span id="cb69-17"><a href="snippets.html#cb69-17" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb69-18"><a href="snippets.html#cb69-18" aria-hidden="true" tabindex="-1"></a>testit<span class="sc">::</span><span class="fu">assert</span>(</span>
+<span id="cb69-19"><a href="snippets.html#cb69-19" aria-hidden="true" tabindex="-1"></a>  <span class="st">&quot;The order of column names must match the expected list.&quot;</span>,</span>
+<span id="cb69-20"><a href="snippets.html#cb69-20" aria-hidden="true" tabindex="-1"></a>  <span class="fu">names</span>(col_types) <span class="sc">==</span> <span class="fu">colnames</span>(ds)</span>
+<span id="cb69-21"><a href="snippets.html#cb69-21" aria-hidden="true" tabindex="-1"></a>)</span></code></pre></div>
 </div>
-<div id="snippets-reading-trailing-comma" class="section level3" number="22.1.2">
+<div id="snippets-reading-trailing-comma" class="section level3" number="25.1.2">
 <h3><span class="header-section-number">B.1.2</span> Removing Trailing Comma from Header</h3>
 <p><em>Background</em>: Occasionally a Meditech Extract will have an extra comma at the end of the 1st line. For each subsequent line, <a href="https://readr.tidyverse.org/reference/read_delim.html"><code>readr:read_csv()</code></a> appropriately throws a new warning that it is missing a column. This warning flood can mask real problems.</p>
 <p><em>Explanation</em>: This snippet (a) reads the csv as plain text, (b) removes the final comma, and (c) passes the plain text to <code>readr::read_csv()</code> to convert it into a data.frame.</p>
 <p><em>Instruction</em>: Modify <code>Dx50 Name</code> to the name of the final (real) column.</p>
 <p><em>Real Example</em>: <a href="https://github.com/OuhscBbmc/truong-pharmacist-transition-1/blob/eec6d7eb8aaa9e3df52dafb826dbc53aaf515c63/manipulation/ellis/dx-ellis.R#L158-L162">truong-pharmacist-transition-1</a> (Accessible to only CDW members.)</p>
 <p><em>Last Modified</em>: 2019-12-12 by Will</p>
-<div class="sourceCode" id="cb69"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb69-1"><a href="snippets.html#cb69-1" aria-hidden="true" tabindex="-1"></a><span class="co"># The next two lines remove the trailing comma at the end of the 1st line.</span></span>
-<span id="cb69-2"><a href="snippets.html#cb69-2" aria-hidden="true" tabindex="-1"></a>raw_text  <span class="ot">&lt;-</span> readr<span class="sc">::</span><span class="fu">read_file</span>(path_in)</span>
-<span id="cb69-3"><a href="snippets.html#cb69-3" aria-hidden="true" tabindex="-1"></a>raw_text  <span class="ot">&lt;-</span> <span class="fu">sub</span>(<span class="st">&quot;^(.+Dx50 Name),&quot;</span>, <span class="st">&quot;</span><span class="sc">\\</span><span class="st">1&quot;</span>, raw_text)</span>
-<span id="cb69-4"><a href="snippets.html#cb69-4" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb69-5"><a href="snippets.html#cb69-5" aria-hidden="true" tabindex="-1"></a>ds        <span class="ot">&lt;-</span> readr<span class="sc">::</span><span class="fu">read_csv</span>(raw_text, <span class="at">col_types=</span>col_types)</span></code></pre></div>
+<div class="sourceCode" id="cb70"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb70-1"><a href="snippets.html#cb70-1" aria-hidden="true" tabindex="-1"></a><span class="co"># The next two lines remove the trailing comma at the end of the 1st line.</span></span>
+<span id="cb70-2"><a href="snippets.html#cb70-2" aria-hidden="true" tabindex="-1"></a>raw_text  <span class="ot">&lt;-</span> readr<span class="sc">::</span><span class="fu">read_file</span>(path_in)</span>
+<span id="cb70-3"><a href="snippets.html#cb70-3" aria-hidden="true" tabindex="-1"></a>raw_text  <span class="ot">&lt;-</span> <span class="fu">sub</span>(<span class="st">&quot;^(.+Dx50 Name),&quot;</span>, <span class="st">&quot;</span><span class="sc">\\</span><span class="st">1&quot;</span>, raw_text)</span>
+<span id="cb70-4"><a href="snippets.html#cb70-4" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb70-5"><a href="snippets.html#cb70-5" aria-hidden="true" tabindex="-1"></a>ds        <span class="ot">&lt;-</span> readr<span class="sc">::</span><span class="fu">read_csv</span>(raw_text, <span class="at">col_types=</span>col_types)</span></code></pre></div>
 </div>
-<div id="snippets-reading-vroom" class="section level3" number="22.1.3">
+<div id="snippets-reading-vroom" class="section level3" number="25.1.3">
 <h3><span class="header-section-number">B.1.3</span> Removing Trailing Comma from Header</h3>
 <p><em>Background</em>: When incoming data files are on the large side to comfortably accept with <a href="https://readr.tidyverse.org/">readr</a>, we use <a href="https://vroom.r-lib.org/">vroom</a>. The two packages are develoepd by the same group and <a href="https://github.com/tidyverse/tidyverse.org/pull/375#issuecomment-564781603">might be combined</a> in the future.</p>
 <p><em>Explanation</em>: This snippet defines the <code>col_types</code> list with names to mimic <a href="https://ouhscbbmc.github.io/data-science-practices-1/file-prototype-r.html#chunk-declare">our approach</a> of using readr. There are some small differences with our readr approach:
@@ -600,78 +607,78 @@ <h3><span class="header-section-number">B.1.3</span> Removing Trailing Comma fro
 1. If the data file contains columns we don’t need, we define them in <code>col_types</code> anyway; vroom needs to know the file structure if it’s missing a header row.</p>
 <p><em>Real Example</em>: <a href="https://github.com/OuhscBbmc/akande-medically-complex-1/tree/master/manipulation/ohca">akande-medically-complex-1</a> (Accessible to only CDW members.) Thesee files did not have a header of variable names; the first line of the file is the first data row.</p>
 <p><em>Last Modified</em>: 2020-08-21 by Will</p>
-<div class="sourceCode" id="cb70"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb70-1"><a href="snippets.html#cb70-1" aria-hidden="true" tabindex="-1"></a><span class="co"># ---- declare-globals ---------------------------------------------------------</span></span>
-<span id="cb70-2"><a href="snippets.html#cb70-2" aria-hidden="true" tabindex="-1"></a>config            <span class="ot">&lt;-</span> config<span class="sc">::</span><span class="fu">get</span>()</span>
-<span id="cb70-3"><a href="snippets.html#cb70-3" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb70-4"><a href="snippets.html#cb70-4" aria-hidden="true" tabindex="-1"></a>col_types <span class="ot">&lt;-</span> <span class="fu">list</span>(</span>
-<span id="cb70-5"><a href="snippets.html#cb70-5" aria-hidden="true" tabindex="-1"></a>  <span class="at">sak                      =</span> vroom<span class="sc">::</span><span class="fu">col_integer</span>(),  <span class="co"># &quot;system-assigned key&quot;</span></span>
-<span id="cb70-6"><a href="snippets.html#cb70-6" aria-hidden="true" tabindex="-1"></a>  <span class="at">aid_category_id          =</span> vroom<span class="sc">::</span><span class="fu">col_character</span>(),</span>
-<span id="cb70-7"><a href="snippets.html#cb70-7" aria-hidden="true" tabindex="-1"></a>  <span class="at">age                      =</span> vroom<span class="sc">::</span><span class="fu">col_integer</span>(),</span>
-<span id="cb70-8"><a href="snippets.html#cb70-8" aria-hidden="true" tabindex="-1"></a>  <span class="at">service_date_first       =</span> vroom<span class="sc">::</span><span class="fu">col_date</span>(<span class="st">&quot;%m/%d/%Y&quot;</span>),</span>
-<span id="cb70-9"><a href="snippets.html#cb70-9" aria-hidden="true" tabindex="-1"></a>  <span class="at">service_date_lasst       =</span> vroom<span class="sc">::</span><span class="fu">col_date</span>(<span class="st">&quot;%m/%d/%Y&quot;</span>),</span>
-<span id="cb70-10"><a href="snippets.html#cb70-10" aria-hidden="true" tabindex="-1"></a>  <span class="at">claim_type               =</span> vroom<span class="sc">::</span><span class="fu">col_character</span>(),</span>
-<span id="cb70-11"><a href="snippets.html#cb70-11" aria-hidden="true" tabindex="-1"></a>  <span class="at">provider_id              =</span> vroom<span class="sc">::</span><span class="fu">col_character</span>(),</span>
-<span id="cb70-12"><a href="snippets.html#cb70-12" aria-hidden="true" tabindex="-1"></a>  <span class="at">provider_lat             =</span> vroom<span class="sc">::</span><span class="fu">col_double</span>(),</span>
-<span id="cb70-13"><a href="snippets.html#cb70-13" aria-hidden="true" tabindex="-1"></a>  <span class="at">provider_long            =</span> vroom<span class="sc">::</span><span class="fu">col_double</span>(),</span>
-<span id="cb70-14"><a href="snippets.html#cb70-14" aria-hidden="true" tabindex="-1"></a>  <span class="at">provider_zip             =</span> vroom<span class="sc">::</span><span class="fu">col_character</span>(),</span>
-<span id="cb70-15"><a href="snippets.html#cb70-15" aria-hidden="true" tabindex="-1"></a>  <span class="at">cpt                      =</span> vroom<span class="sc">::</span><span class="fu">col_integer</span>(),</span>
-<span id="cb70-16"><a href="snippets.html#cb70-16" aria-hidden="true" tabindex="-1"></a>  <span class="at">revenue_code             =</span> vroom<span class="sc">::</span><span class="fu">col_integer</span>(),</span>
-<span id="cb70-17"><a href="snippets.html#cb70-17" aria-hidden="true" tabindex="-1"></a>  <span class="at">icd_code                 =</span> vroom<span class="sc">::</span><span class="fu">col_character</span>(),</span>
-<span id="cb70-18"><a href="snippets.html#cb70-18" aria-hidden="true" tabindex="-1"></a>  <span class="at">icd_sequence             =</span> vroom<span class="sc">::</span><span class="fu">col_integer</span>(),</span>
-<span id="cb70-19"><a href="snippets.html#cb70-19" aria-hidden="true" tabindex="-1"></a>  <span class="at">vocabulary_coarse_id     =</span> vroom<span class="sc">::</span><span class="fu">col_integer</span>()</span>
-<span id="cb70-20"><a href="snippets.html#cb70-20" aria-hidden="true" tabindex="-1"></a>)</span>
-<span id="cb70-21"><a href="snippets.html#cb70-21" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb70-22"><a href="snippets.html#cb70-22" aria-hidden="true" tabindex="-1"></a><span class="co"># ---- load-data ---------------------------------------------------------------</span></span>
-<span id="cb70-23"><a href="snippets.html#cb70-23" aria-hidden="true" tabindex="-1"></a>ds <span class="ot">&lt;-</span> vroom<span class="sc">::</span><span class="fu">vroom</span>(</span>
-<span id="cb70-24"><a href="snippets.html#cb70-24" aria-hidden="true" tabindex="-1"></a>  <span class="at">file      =</span> config<span class="sc">$</span>path_ohca_patient,</span>
-<span id="cb70-25"><a href="snippets.html#cb70-25" aria-hidden="true" tabindex="-1"></a>  <span class="at">delim     =</span> <span class="st">&quot;</span><span class="sc">\t</span><span class="st">&quot;</span>,</span>
-<span id="cb70-26"><a href="snippets.html#cb70-26" aria-hidden="true" tabindex="-1"></a>  <span class="at">col_names =</span> <span class="fu">names</span>(col_types),</span>
-<span id="cb70-27"><a href="snippets.html#cb70-27" aria-hidden="true" tabindex="-1"></a>  <span class="at">col_types =</span> col_types</span>
-<span id="cb70-28"><a href="snippets.html#cb70-28" aria-hidden="true" tabindex="-1"></a>)</span>
-<span id="cb70-29"><a href="snippets.html#cb70-29" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb70-30"><a href="snippets.html#cb70-30" aria-hidden="true" tabindex="-1"></a><span class="fu">rm</span>(col_types)</span></code></pre></div>
+<div class="sourceCode" id="cb71"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb71-1"><a href="snippets.html#cb71-1" aria-hidden="true" tabindex="-1"></a><span class="co"># ---- declare-globals ---------------------------------------------------------</span></span>
+<span id="cb71-2"><a href="snippets.html#cb71-2" aria-hidden="true" tabindex="-1"></a>config            <span class="ot">&lt;-</span> config<span class="sc">::</span><span class="fu">get</span>()</span>
+<span id="cb71-3"><a href="snippets.html#cb71-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb71-4"><a href="snippets.html#cb71-4" aria-hidden="true" tabindex="-1"></a>col_types <span class="ot">&lt;-</span> <span class="fu">list</span>(</span>
+<span id="cb71-5"><a href="snippets.html#cb71-5" aria-hidden="true" tabindex="-1"></a>  <span class="at">sak                      =</span> vroom<span class="sc">::</span><span class="fu">col_integer</span>(),  <span class="co"># &quot;system-assigned key&quot;</span></span>
+<span id="cb71-6"><a href="snippets.html#cb71-6" aria-hidden="true" tabindex="-1"></a>  <span class="at">aid_category_id          =</span> vroom<span class="sc">::</span><span class="fu">col_character</span>(),</span>
+<span id="cb71-7"><a href="snippets.html#cb71-7" aria-hidden="true" tabindex="-1"></a>  <span class="at">age                      =</span> vroom<span class="sc">::</span><span class="fu">col_integer</span>(),</span>
+<span id="cb71-8"><a href="snippets.html#cb71-8" aria-hidden="true" tabindex="-1"></a>  <span class="at">service_date_first       =</span> vroom<span class="sc">::</span><span class="fu">col_date</span>(<span class="st">&quot;%m/%d/%Y&quot;</span>),</span>
+<span id="cb71-9"><a href="snippets.html#cb71-9" aria-hidden="true" tabindex="-1"></a>  <span class="at">service_date_lasst       =</span> vroom<span class="sc">::</span><span class="fu">col_date</span>(<span class="st">&quot;%m/%d/%Y&quot;</span>),</span>
+<span id="cb71-10"><a href="snippets.html#cb71-10" aria-hidden="true" tabindex="-1"></a>  <span class="at">claim_type               =</span> vroom<span class="sc">::</span><span class="fu">col_character</span>(),</span>
+<span id="cb71-11"><a href="snippets.html#cb71-11" aria-hidden="true" tabindex="-1"></a>  <span class="at">provider_id              =</span> vroom<span class="sc">::</span><span class="fu">col_character</span>(),</span>
+<span id="cb71-12"><a href="snippets.html#cb71-12" aria-hidden="true" tabindex="-1"></a>  <span class="at">provider_lat             =</span> vroom<span class="sc">::</span><span class="fu">col_double</span>(),</span>
+<span id="cb71-13"><a href="snippets.html#cb71-13" aria-hidden="true" tabindex="-1"></a>  <span class="at">provider_long            =</span> vroom<span class="sc">::</span><span class="fu">col_double</span>(),</span>
+<span id="cb71-14"><a href="snippets.html#cb71-14" aria-hidden="true" tabindex="-1"></a>  <span class="at">provider_zip             =</span> vroom<span class="sc">::</span><span class="fu">col_character</span>(),</span>
+<span id="cb71-15"><a href="snippets.html#cb71-15" aria-hidden="true" tabindex="-1"></a>  <span class="at">cpt                      =</span> vroom<span class="sc">::</span><span class="fu">col_integer</span>(),</span>
+<span id="cb71-16"><a href="snippets.html#cb71-16" aria-hidden="true" tabindex="-1"></a>  <span class="at">revenue_code             =</span> vroom<span class="sc">::</span><span class="fu">col_integer</span>(),</span>
+<span id="cb71-17"><a href="snippets.html#cb71-17" aria-hidden="true" tabindex="-1"></a>  <span class="at">icd_code                 =</span> vroom<span class="sc">::</span><span class="fu">col_character</span>(),</span>
+<span id="cb71-18"><a href="snippets.html#cb71-18" aria-hidden="true" tabindex="-1"></a>  <span class="at">icd_sequence             =</span> vroom<span class="sc">::</span><span class="fu">col_integer</span>(),</span>
+<span id="cb71-19"><a href="snippets.html#cb71-19" aria-hidden="true" tabindex="-1"></a>  <span class="at">vocabulary_coarse_id     =</span> vroom<span class="sc">::</span><span class="fu">col_integer</span>()</span>
+<span id="cb71-20"><a href="snippets.html#cb71-20" aria-hidden="true" tabindex="-1"></a>)</span>
+<span id="cb71-21"><a href="snippets.html#cb71-21" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb71-22"><a href="snippets.html#cb71-22" aria-hidden="true" tabindex="-1"></a><span class="co"># ---- load-data ---------------------------------------------------------------</span></span>
+<span id="cb71-23"><a href="snippets.html#cb71-23" aria-hidden="true" tabindex="-1"></a>ds <span class="ot">&lt;-</span> vroom<span class="sc">::</span><span class="fu">vroom</span>(</span>
+<span id="cb71-24"><a href="snippets.html#cb71-24" aria-hidden="true" tabindex="-1"></a>  <span class="at">file      =</span> config<span class="sc">$</span>path_ohca_patient,</span>
+<span id="cb71-25"><a href="snippets.html#cb71-25" aria-hidden="true" tabindex="-1"></a>  <span class="at">delim     =</span> <span class="st">&quot;</span><span class="sc">\t</span><span class="st">&quot;</span>,</span>
+<span id="cb71-26"><a href="snippets.html#cb71-26" aria-hidden="true" tabindex="-1"></a>  <span class="at">col_names =</span> <span class="fu">names</span>(col_types),</span>
+<span id="cb71-27"><a href="snippets.html#cb71-27" aria-hidden="true" tabindex="-1"></a>  <span class="at">col_types =</span> col_types</span>
+<span id="cb71-28"><a href="snippets.html#cb71-28" aria-hidden="true" tabindex="-1"></a>)</span>
+<span id="cb71-29"><a href="snippets.html#cb71-29" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb71-30"><a href="snippets.html#cb71-30" aria-hidden="true" tabindex="-1"></a><span class="fu">rm</span>(col_types)</span></code></pre></div>
 </div>
 </div>
-<div id="snippets-grooming" class="section level2" number="22.2">
+<div id="snippets-grooming" class="section level2" number="25.2">
 <h2><span class="header-section-number">B.2</span> Grooming</h2>
-<div id="snippets-grooming-two-year" class="section level3" number="22.2.1">
+<div id="snippets-grooming-two-year" class="section level3" number="25.2.1">
 <h3><span class="header-section-number">B.2.1</span> Correct for misinterpreted two-digit year</h3>
 <p><em>Background</em>: Sometimes the Meditech dates are specified like <code>1/6/54</code> instead of <code>1/6/1954</code>. <code>readr::read_csv()</code> has to choose if the year is supposed to be ‘1954’ or ‘2054.’ A human can use context to guess a birth date is in the past (so it guesses 1954), but readr can’t (so it guesses 2054). For avoid this and other problems, request dates in an <a href="https://www.explainxkcd.com/wiki/index.php/1179:_ISO_8601">ISO-8601</a> format.</p>
 <p><em>Explanation</em>: Correct for this in a <code>dplyr::mutate()</code> clause; compare the date value against today. If the date is today or before, use it; if the day is in the future, subtract 100 years.</p>
 <p><em>Instruction</em>: For future dates such as loan payments, the direction will flip.</p>
 <p><em>Last Modified</em>: 2019-12-12 by Will</p>
-<div class="sourceCode" id="cb71"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb71-1"><a href="snippets.html#cb71-1" aria-hidden="true" tabindex="-1"></a> ds <span class="sc">%&gt;%</span></span>
-<span id="cb71-2"><a href="snippets.html#cb71-2" aria-hidden="true" tabindex="-1"></a> dplyr<span class="sc">::</span><span class="fu">mutate</span>(</span>
-<span id="cb71-3"><a href="snippets.html#cb71-3" aria-hidden="true" tabindex="-1"></a>    <span class="at">dob =</span> dplyr<span class="sc">::</span><span class="fu">if_else</span>(dob <span class="sc">&lt;=</span> <span class="fu">Sys.Date</span>(), dob, dob <span class="sc">-</span> lubridate<span class="sc">::</span><span class="fu">years</span>(<span class="dv">100</span>))</span>
-<span id="cb71-4"><a href="snippets.html#cb71-4" aria-hidden="true" tabindex="-1"></a>  )</span></code></pre></div>
+<div class="sourceCode" id="cb72"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb72-1"><a href="snippets.html#cb72-1" aria-hidden="true" tabindex="-1"></a> ds <span class="sc">%&gt;%</span></span>
+<span id="cb72-2"><a href="snippets.html#cb72-2" aria-hidden="true" tabindex="-1"></a> dplyr<span class="sc">::</span><span class="fu">mutate</span>(</span>
+<span id="cb72-3"><a href="snippets.html#cb72-3" aria-hidden="true" tabindex="-1"></a>    <span class="at">dob =</span> dplyr<span class="sc">::</span><span class="fu">if_else</span>(dob <span class="sc">&lt;=</span> <span class="fu">Sys.Date</span>(), dob, dob <span class="sc">-</span> lubridate<span class="sc">::</span><span class="fu">years</span>(<span class="dv">100</span>))</span>
+<span id="cb72-4"><a href="snippets.html#cb72-4" aria-hidden="true" tabindex="-1"></a>  )</span></code></pre></div>
 </div>
 </div>
-<div id="snippets-identification" class="section level2" number="22.3">
+<div id="snippets-identification" class="section level2" number="25.3">
 <h2><span class="header-section-number">B.3</span> Identification</h2>
-<div id="snippets-identification-tags" class="section level3" number="22.3.1">
+<div id="snippets-identification-tags" class="section level3" number="25.3.1">
 <h3><span class="header-section-number">B.3.1</span> Generating “tags”</h3>
 <p><em>Background</em>: When you need to generate unique identification values for future people/clients/patients, as described in the <a href="style.html#style-number">style guide</a>.</p>
 <p><em>Explanation</em>: This snippet will create a 5-row csv with random 7-character “tags” to send to the research team collecting patients. The</p>
 <p><em>Instruction</em>: Set <code>pt_count</code>, <code>tag_length</code>, <code>path_out</code>, and execute. Add and rename the columns to be more appropriate for your domain (<em>e.g.</em>, change “patient tag” to “store tag”).</p>
 <p><em>Last Modified</em>: 2019-12-30 by Will</p>
-<div class="sourceCode" id="cb72"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb72-1"><a href="snippets.html#cb72-1" aria-hidden="true" tabindex="-1"></a>pt_count    <span class="ot">&lt;-</span> 5L   <span class="co"># The number of rows in the dataset.</span></span>
-<span id="cb72-2"><a href="snippets.html#cb72-2" aria-hidden="true" tabindex="-1"></a>tag_length  <span class="ot">&lt;-</span> 7L   <span class="co"># The number of characters in each tag.</span></span>
-<span id="cb72-3"><a href="snippets.html#cb72-3" aria-hidden="true" tabindex="-1"></a>path_out    <span class="ot">&lt;-</span> <span class="st">&quot;data-private/derived/pt-pool.csv&quot;</span></span>
-<span id="cb72-4"><a href="snippets.html#cb72-4" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb72-5"><a href="snippets.html#cb72-5" aria-hidden="true" tabindex="-1"></a>draw_tag <span class="ot">&lt;-</span> <span class="cf">function</span> (<span class="at">tag_length =</span> 4L, <span class="at">urn =</span> <span class="fu">c</span>(<span class="dv">0</span><span class="sc">:</span><span class="dv">9</span>, letters)) {</span>
-<span id="cb72-6"><a href="snippets.html#cb72-6" aria-hidden="true" tabindex="-1"></a>  <span class="fu">paste</span>(<span class="fu">sample</span>(urn, <span class="at">size =</span> tag_length, <span class="at">replace =</span> T), <span class="at">collapse =</span> <span class="st">&quot;&quot;</span>)</span>
-<span id="cb72-7"><a href="snippets.html#cb72-7" aria-hidden="true" tabindex="-1"></a>}</span>
-<span id="cb72-8"><a href="snippets.html#cb72-8" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb72-9"><a href="snippets.html#cb72-9" aria-hidden="true" tabindex="-1"></a>ds_pt_pool <span class="ot">&lt;-</span></span>
-<span id="cb72-10"><a href="snippets.html#cb72-10" aria-hidden="true" tabindex="-1"></a>  tibble<span class="sc">::</span><span class="fu">tibble</span>(</span>
-<span id="cb72-11"><a href="snippets.html#cb72-11" aria-hidden="true" tabindex="-1"></a>    <span class="at">pt_index    =</span> <span class="fu">seq_len</span>(pt_count),</span>
-<span id="cb72-12"><a href="snippets.html#cb72-12" aria-hidden="true" tabindex="-1"></a>    <span class="at">pt_tag      =</span> <span class="fu">vapply</span>(<span class="fu">rep</span>(tag_length, pt_count), draw_tag, <span class="fu">character</span>(<span class="dv">1</span>)),</span>
-<span id="cb72-13"><a href="snippets.html#cb72-13" aria-hidden="true" tabindex="-1"></a>    <span class="at">assigned    =</span> <span class="cn">FALSE</span>,</span>
-<span id="cb72-14"><a href="snippets.html#cb72-14" aria-hidden="true" tabindex="-1"></a>    <span class="at">name_last   =</span> <span class="st">&quot;--&quot;</span>,</span>
-<span id="cb72-15"><a href="snippets.html#cb72-15" aria-hidden="true" tabindex="-1"></a>    <span class="at">name_first  =</span> <span class="st">&quot;--&quot;</span></span>
-<span id="cb72-16"><a href="snippets.html#cb72-16" aria-hidden="true" tabindex="-1"></a>  )</span>
-<span id="cb72-17"><a href="snippets.html#cb72-17" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb72-18"><a href="snippets.html#cb72-18" aria-hidden="true" tabindex="-1"></a>readr<span class="sc">::</span><span class="fu">write_csv</span>(ds_pt_pool, path_out)</span></code></pre></div>
+<div class="sourceCode" id="cb73"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb73-1"><a href="snippets.html#cb73-1" aria-hidden="true" tabindex="-1"></a>pt_count    <span class="ot">&lt;-</span> 5L   <span class="co"># The number of rows in the dataset.</span></span>
+<span id="cb73-2"><a href="snippets.html#cb73-2" aria-hidden="true" tabindex="-1"></a>tag_length  <span class="ot">&lt;-</span> 7L   <span class="co"># The number of characters in each tag.</span></span>
+<span id="cb73-3"><a href="snippets.html#cb73-3" aria-hidden="true" tabindex="-1"></a>path_out    <span class="ot">&lt;-</span> <span class="st">&quot;data-private/derived/pt-pool.csv&quot;</span></span>
+<span id="cb73-4"><a href="snippets.html#cb73-4" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb73-5"><a href="snippets.html#cb73-5" aria-hidden="true" tabindex="-1"></a>draw_tag <span class="ot">&lt;-</span> <span class="cf">function</span> (<span class="at">tag_length =</span> 4L, <span class="at">urn =</span> <span class="fu">c</span>(<span class="dv">0</span><span class="sc">:</span><span class="dv">9</span>, letters)) {</span>
+<span id="cb73-6"><a href="snippets.html#cb73-6" aria-hidden="true" tabindex="-1"></a>  <span class="fu">paste</span>(<span class="fu">sample</span>(urn, <span class="at">size =</span> tag_length, <span class="at">replace =</span> T), <span class="at">collapse =</span> <span class="st">&quot;&quot;</span>)</span>
+<span id="cb73-7"><a href="snippets.html#cb73-7" aria-hidden="true" tabindex="-1"></a>}</span>
+<span id="cb73-8"><a href="snippets.html#cb73-8" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb73-9"><a href="snippets.html#cb73-9" aria-hidden="true" tabindex="-1"></a>ds_pt_pool <span class="ot">&lt;-</span></span>
+<span id="cb73-10"><a href="snippets.html#cb73-10" aria-hidden="true" tabindex="-1"></a>  tibble<span class="sc">::</span><span class="fu">tibble</span>(</span>
+<span id="cb73-11"><a href="snippets.html#cb73-11" aria-hidden="true" tabindex="-1"></a>    <span class="at">pt_index    =</span> <span class="fu">seq_len</span>(pt_count),</span>
+<span id="cb73-12"><a href="snippets.html#cb73-12" aria-hidden="true" tabindex="-1"></a>    <span class="at">pt_tag      =</span> <span class="fu">vapply</span>(<span class="fu">rep</span>(tag_length, pt_count), draw_tag, <span class="fu">character</span>(<span class="dv">1</span>)),</span>
+<span id="cb73-13"><a href="snippets.html#cb73-13" aria-hidden="true" tabindex="-1"></a>    <span class="at">assigned    =</span> <span class="cn">FALSE</span>,</span>
+<span id="cb73-14"><a href="snippets.html#cb73-14" aria-hidden="true" tabindex="-1"></a>    <span class="at">name_last   =</span> <span class="st">&quot;--&quot;</span>,</span>
+<span id="cb73-15"><a href="snippets.html#cb73-15" aria-hidden="true" tabindex="-1"></a>    <span class="at">name_first  =</span> <span class="st">&quot;--&quot;</span></span>
+<span id="cb73-16"><a href="snippets.html#cb73-16" aria-hidden="true" tabindex="-1"></a>  )</span>
+<span id="cb73-17"><a href="snippets.html#cb73-17" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb73-18"><a href="snippets.html#cb73-18" aria-hidden="true" tabindex="-1"></a>readr<span class="sc">::</span><span class="fu">write_csv</span>(ds_pt_pool, path_out)</span></code></pre></div>
 <p>The resulting dataset will look like this, but with different randomly-generated tags.</p>
 <pre class="csv"><code># A tibble: 5 x 5
   pt_index pt_tag  assigned name_last name_first
@@ -683,9 +690,9 @@ <h3><span class="header-section-number">B.3.1</span> Generating “tags”</h3>
 5        5 r5ei5ph FALSE    --        --</code></pre>
 </div>
 </div>
-<div id="snippets-correspondence" class="section level2" number="22.4">
+<div id="snippets-correspondence" class="section level2" number="25.4">
 <h2><span class="header-section-number">B.4</span> Correspondence with Collaborators</h2>
-<div id="snippets-correspondence-excel" class="section level3" number="22.4.1">
+<div id="snippets-correspondence-excel" class="section level3" number="25.4.1">
 <h3><span class="header-section-number">B.4.1</span> Excel files</h3>
 <p>Receiving and storing <a href="rest.html#data-containers-avoid">Excel files should almost always be avoided</a> for the reasons explained in this letter.</p>
 <p>We receive extracts as Excel files frequently, and have the following request ready to email the person sending us Excel files. Adapt the bold values like “109.19” to your situation. If you are familiar with their tools, suggest an alternative for saving the file as a csv. Once presented with these Excel gotchas, almost everyone has an ‘aha’ moment and recognizes the problem. Unfortunately, not everyone has flexible software and can adapt easily.</p>
diff --git a/docs/style.html b/docs/style.html
index 0584f67..7973212 100644
--- a/docs/style.html
+++ b/docs/style.html
@@ -24,7 +24,7 @@
 <meta name="author" content="Will Beasley" />
 
 
-<meta name="date" content="2021-05-20" />
+<meta name="date" content="2021-06-08" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1" />
   <meta name="apple-mobile-web-app-capable" content="yes" />
@@ -456,6 +456,13 @@
 <li class="chapter" data-level="20.2" data-path="team.html"><a href="team.html#training-to-data-science"><i class="fa fa-check"></i><b>20.2</b> Training to Data Science</a></li>
 <li class="chapter" data-level="20.3" data-path="team.html"><a href="team.html#bridges-outside-the-team"><i class="fa fa-check"></i><b>20.3</b> Bridges Outside the Team</a></li>
 </ul></li>
+<li class="chapter" data-level="21" data-path="redcap-user.html"><a href="redcap-user.html"><i class="fa fa-check"></i><b>21</b> Material for REDCap Users</a>
+<ul>
+<li class="chapter" data-level="21.1" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-login"><i class="fa fa-check"></i><b>21.1</b> Login</a></li>
+<li class="chapter" data-level="21.2" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-report-develop"><i class="fa fa-check"></i><b>21.2</b> Developing Reports</a></li>
+</ul></li>
+<li class="chapter" data-level="22" data-path="redcap-developer.html"><a href="redcap-developer.html"><i class="fa fa-check"></i><b>22</b> Material for REDCap Developers</a></li>
+<li class="chapter" data-level="23" data-path="redcap-admin.html"><a href="redcap-admin.html"><i class="fa fa-check"></i><b>23</b> Material for REDCap Admins</a></li>
 <li class="appendix"><span><b>Appendix</b></span></li>
 <li class="chapter" data-level="A" data-path="git.html"><a href="git.html"><i class="fa fa-check"></i><b>A</b> Git &amp; GitHub</a>
 <ul>
diff --git a/docs/team.html b/docs/team.html
index 9538d0e..a9fd43f 100644
--- a/docs/team.html
+++ b/docs/team.html
@@ -24,7 +24,7 @@
 <meta name="author" content="Will Beasley" />
 
 
-<meta name="date" content="2021-05-20" />
+<meta name="date" content="2021-06-08" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1" />
   <meta name="apple-mobile-web-app-capable" content="yes" />
@@ -32,7 +32,7 @@
   
   
 <link rel="prev" href="tools.html"/>
-<link rel="next" href="git.html"/>
+<link rel="next" href="redcap-user.html"/>
 <script src="libs/header-attrs-2.8/header-attrs.js"></script>
 <script src="libs/jquery-2.2.3/jquery.min.js"></script>
 <link href="libs/gitbook-2.6.7/css/style.css" rel="stylesheet" />
@@ -456,6 +456,13 @@
 <li class="chapter" data-level="20.2" data-path="team.html"><a href="team.html#training-to-data-science"><i class="fa fa-check"></i><b>20.2</b> Training to Data Science</a></li>
 <li class="chapter" data-level="20.3" data-path="team.html"><a href="team.html#bridges-outside-the-team"><i class="fa fa-check"></i><b>20.3</b> Bridges Outside the Team</a></li>
 </ul></li>
+<li class="chapter" data-level="21" data-path="redcap-user.html"><a href="redcap-user.html"><i class="fa fa-check"></i><b>21</b> Material for REDCap Users</a>
+<ul>
+<li class="chapter" data-level="21.1" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-login"><i class="fa fa-check"></i><b>21.1</b> Login</a></li>
+<li class="chapter" data-level="21.2" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-report-develop"><i class="fa fa-check"></i><b>21.2</b> Developing Reports</a></li>
+</ul></li>
+<li class="chapter" data-level="22" data-path="redcap-developer.html"><a href="redcap-developer.html"><i class="fa fa-check"></i><b>22</b> Material for REDCap Developers</a></li>
+<li class="chapter" data-level="23" data-path="redcap-admin.html"><a href="redcap-admin.html"><i class="fa fa-check"></i><b>23</b> Material for REDCap Admins</a></li>
 <li class="appendix"><span><b>Appendix</b></span></li>
 <li class="chapter" data-level="A" data-path="git.html"><a href="git.html"><i class="fa fa-check"></i><b>A</b> Git &amp; GitHub</a>
 <ul>
@@ -571,16 +578,13 @@ <h2><span class="header-section-number">20.3</span> Bridges Outside the Team</h2
 
 </div>
 </div>
-
-
-
             </section>
 
           </div>
         </div>
       </div>
 <a href="tools.html" class="navigation navigation-prev " aria-label="Previous page"><i class="fa fa-angle-left"></i></a>
-<a href="git.html" class="navigation navigation-next " aria-label="Next page"><i class="fa fa-angle-right"></i></a>
+<a href="redcap-user.html" class="navigation navigation-next " aria-label="Next page"><i class="fa fa-angle-right"></i></a>
     </div>
   </div>
 <script src="libs/gitbook-2.6.7/js/app.min.js"></script>
diff --git a/docs/testing-and-validation.html b/docs/testing-and-validation.html
index edc1059..13a8faf 100644
--- a/docs/testing-and-validation.html
+++ b/docs/testing-and-validation.html
@@ -24,7 +24,7 @@
 <meta name="author" content="Will Beasley" />
 
 
-<meta name="date" content="2021-05-20" />
+<meta name="date" content="2021-06-08" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1" />
   <meta name="apple-mobile-web-app-capable" content="yes" />
@@ -456,6 +456,13 @@
 <li class="chapter" data-level="20.2" data-path="team.html"><a href="team.html#training-to-data-science"><i class="fa fa-check"></i><b>20.2</b> Training to Data Science</a></li>
 <li class="chapter" data-level="20.3" data-path="team.html"><a href="team.html#bridges-outside-the-team"><i class="fa fa-check"></i><b>20.3</b> Bridges Outside the Team</a></li>
 </ul></li>
+<li class="chapter" data-level="21" data-path="redcap-user.html"><a href="redcap-user.html"><i class="fa fa-check"></i><b>21</b> Material for REDCap Users</a>
+<ul>
+<li class="chapter" data-level="21.1" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-login"><i class="fa fa-check"></i><b>21.1</b> Login</a></li>
+<li class="chapter" data-level="21.2" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-report-develop"><i class="fa fa-check"></i><b>21.2</b> Developing Reports</a></li>
+</ul></li>
+<li class="chapter" data-level="22" data-path="redcap-developer.html"><a href="redcap-developer.html"><i class="fa fa-check"></i><b>22</b> Material for REDCap Developers</a></li>
+<li class="chapter" data-level="23" data-path="redcap-admin.html"><a href="redcap-admin.html"><i class="fa fa-check"></i><b>23</b> Material for REDCap Admins</a></li>
 <li class="appendix"><span><b>Appendix</b></span></li>
 <li class="chapter" data-level="A" data-path="git.html"><a href="git.html"><i class="fa fa-check"></i><b>A</b> Git &amp; GitHub</a>
 <ul>
diff --git a/docs/tools.html b/docs/tools.html
index 031b648..31d0bcd 100644
--- a/docs/tools.html
+++ b/docs/tools.html
@@ -24,7 +24,7 @@
 <meta name="author" content="Will Beasley" />
 
 
-<meta name="date" content="2021-05-20" />
+<meta name="date" content="2021-06-08" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1" />
   <meta name="apple-mobile-web-app-capable" content="yes" />
@@ -456,6 +456,13 @@
 <li class="chapter" data-level="20.2" data-path="team.html"><a href="team.html#training-to-data-science"><i class="fa fa-check"></i><b>20.2</b> Training to Data Science</a></li>
 <li class="chapter" data-level="20.3" data-path="team.html"><a href="team.html#bridges-outside-the-team"><i class="fa fa-check"></i><b>20.3</b> Bridges Outside the Team</a></li>
 </ul></li>
+<li class="chapter" data-level="21" data-path="redcap-user.html"><a href="redcap-user.html"><i class="fa fa-check"></i><b>21</b> Material for REDCap Users</a>
+<ul>
+<li class="chapter" data-level="21.1" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-login"><i class="fa fa-check"></i><b>21.1</b> Login</a></li>
+<li class="chapter" data-level="21.2" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-report-develop"><i class="fa fa-check"></i><b>21.2</b> Developing Reports</a></li>
+</ul></li>
+<li class="chapter" data-level="22" data-path="redcap-developer.html"><a href="redcap-developer.html"><i class="fa fa-check"></i><b>22</b> Material for REDCap Developers</a></li>
+<li class="chapter" data-level="23" data-path="redcap-admin.html"><a href="redcap-admin.html"><i class="fa fa-check"></i><b>23</b> Material for REDCap Admins</a></li>
 <li class="appendix"><span><b>Appendix</b></span></li>
 <li class="chapter" data-level="A" data-path="git.html"><a href="git.html"><i class="fa fa-check"></i><b>A</b> Git &amp; GitHub</a>
 <ul>
diff --git a/docs/troubleshooting.html b/docs/troubleshooting.html
index e0702fb..a469df3 100644
--- a/docs/troubleshooting.html
+++ b/docs/troubleshooting.html
@@ -24,7 +24,7 @@
 <meta name="author" content="Will Beasley" />
 
 
-<meta name="date" content="2021-05-20" />
+<meta name="date" content="2021-06-08" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1" />
   <meta name="apple-mobile-web-app-capable" content="yes" />
@@ -456,6 +456,13 @@
 <li class="chapter" data-level="20.2" data-path="team.html"><a href="team.html#training-to-data-science"><i class="fa fa-check"></i><b>20.2</b> Training to Data Science</a></li>
 <li class="chapter" data-level="20.3" data-path="team.html"><a href="team.html#bridges-outside-the-team"><i class="fa fa-check"></i><b>20.3</b> Bridges Outside the Team</a></li>
 </ul></li>
+<li class="chapter" data-level="21" data-path="redcap-user.html"><a href="redcap-user.html"><i class="fa fa-check"></i><b>21</b> Material for REDCap Users</a>
+<ul>
+<li class="chapter" data-level="21.1" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-login"><i class="fa fa-check"></i><b>21.1</b> Login</a></li>
+<li class="chapter" data-level="21.2" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-report-develop"><i class="fa fa-check"></i><b>21.2</b> Developing Reports</a></li>
+</ul></li>
+<li class="chapter" data-level="22" data-path="redcap-developer.html"><a href="redcap-developer.html"><i class="fa fa-check"></i><b>22</b> Material for REDCap Developers</a></li>
+<li class="chapter" data-level="23" data-path="redcap-admin.html"><a href="redcap-admin.html"><i class="fa fa-check"></i><b>23</b> Material for REDCap Admins</a></li>
 <li class="appendix"><span><b>Appendix</b></span></li>
 <li class="chapter" data-level="A" data-path="git.html"><a href="git.html"><i class="fa fa-check"></i><b>A</b> Git &amp; GitHub</a>
 <ul>
diff --git a/docs/workstation.html b/docs/workstation.html
index b6e1226..cb44835 100644
--- a/docs/workstation.html
+++ b/docs/workstation.html
@@ -24,7 +24,7 @@
 <meta name="author" content="Will Beasley" />
 
 
-<meta name="date" content="2021-05-20" />
+<meta name="date" content="2021-06-08" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1" />
   <meta name="apple-mobile-web-app-capable" content="yes" />
@@ -456,6 +456,13 @@
 <li class="chapter" data-level="20.2" data-path="team.html"><a href="team.html#training-to-data-science"><i class="fa fa-check"></i><b>20.2</b> Training to Data Science</a></li>
 <li class="chapter" data-level="20.3" data-path="team.html"><a href="team.html#bridges-outside-the-team"><i class="fa fa-check"></i><b>20.3</b> Bridges Outside the Team</a></li>
 </ul></li>
+<li class="chapter" data-level="21" data-path="redcap-user.html"><a href="redcap-user.html"><i class="fa fa-check"></i><b>21</b> Material for REDCap Users</a>
+<ul>
+<li class="chapter" data-level="21.1" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-login"><i class="fa fa-check"></i><b>21.1</b> Login</a></li>
+<li class="chapter" data-level="21.2" data-path="redcap-user.html"><a href="redcap-user.html#redcap-user-report-develop"><i class="fa fa-check"></i><b>21.2</b> Developing Reports</a></li>
+</ul></li>
+<li class="chapter" data-level="22" data-path="redcap-developer.html"><a href="redcap-developer.html"><i class="fa fa-check"></i><b>22</b> Material for REDCap Developers</a></li>
+<li class="chapter" data-level="23" data-path="redcap-admin.html"><a href="redcap-admin.html"><i class="fa fa-check"></i><b>23</b> Material for REDCap Admins</a></li>
 <li class="appendix"><span><b>Appendix</b></span></li>
 <li class="chapter" data-level="A" data-path="git.html"><a href="git.html"><i class="fa fa-check"></i><b>A</b> Git &amp; GitHub</a>
 <ul>
@@ -619,6 +626,17 @@ <h3><span class="header-section-number">18.2.3</span> Azure Data Studio</h3>
 <li>Data | Sql | <strong>Show Connection Info In Title: uncheck</strong> {<code>"sql.showConnectionInfoInTitle": false</code>}</li>
 <li>Data | Sql | <strong>Copy Include Headers: check</strong> {<code>"sql.copyIncludeHeaders": true</code>}</li>
 </ol>
+<div class="sourceCode" id="cb60"><pre class="sourceCode json"><code class="sourceCode json"><span id="cb60-1"><a href="workstation.html#cb60-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
+<span id="cb60-2"><a href="workstation.html#cb60-2" aria-hidden="true" tabindex="-1"></a>  <span class="dt">&quot;workbench.enablePreviewFeatures&quot;</span><span class="fu">:</span> <span class="kw">true</span><span class="fu">,</span></span>
+<span id="cb60-3"><a href="workstation.html#cb60-3" aria-hidden="true" tabindex="-1"></a>  <span class="dt">&quot;workbench.colorTheme&quot;</span><span class="fu">:</span> <span class="st">&quot;Default Dark Azure Data Studio&quot;</span><span class="fu">,</span></span>
+<span id="cb60-4"><a href="workstation.html#cb60-4" aria-hidden="true" tabindex="-1"></a>  <span class="dt">&quot;editor.tabSize&quot;</span><span class="fu">:</span> <span class="dv">2</span><span class="fu">,</span></span>
+<span id="cb60-5"><a href="workstation.html#cb60-5" aria-hidden="true" tabindex="-1"></a>  <span class="dt">&quot;editor.detectIndentation&quot;</span><span class="fu">:</span> <span class="kw">false</span><span class="fu">,</span></span>
+<span id="cb60-6"><a href="workstation.html#cb60-6" aria-hidden="true" tabindex="-1"></a>  <span class="dt">&quot;files.insertFinalNewline&quot;</span><span class="fu">:</span> <span class="kw">true</span><span class="fu">,</span></span>
+<span id="cb60-7"><a href="workstation.html#cb60-7" aria-hidden="true" tabindex="-1"></a>  <span class="dt">&quot;files.trimFinalNewlines&quot;</span><span class="fu">:</span> <span class="kw">true</span><span class="fu">,</span></span>
+<span id="cb60-8"><a href="workstation.html#cb60-8" aria-hidden="true" tabindex="-1"></a>  <span class="dt">&quot;files.trimTrailingWhitespace&quot;</span><span class="fu">:</span> <span class="kw">true</span><span class="fu">,</span></span>
+<span id="cb60-9"><a href="workstation.html#cb60-9" aria-hidden="true" tabindex="-1"></a>  <span class="dt">&quot;queryEditor.showConnectionInfoInTitle&quot;</span><span class="fu">:</span> <span class="kw">false</span><span class="fu">,</span></span>
+<span id="cb60-10"><a href="workstation.html#cb60-10" aria-hidden="true" tabindex="-1"></a>  <span class="dt">&quot;queryEditor.results.copyIncludeHeaders&quot;</span><span class="fu">:</span> <span class="kw">true</span></span>
+<span id="cb60-11"><a href="workstation.html#cb60-11" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code></pre></div>
 </div>
 <div id="workstation-vscode" class="section level3" number="18.2.4">
 <h3><span class="header-section-number">18.2.4</span> Visual Studio Code</h3>
@@ -633,48 +651,48 @@ <h3><span class="header-section-number">18.2.4</span> Visual Studio Code</h3>
 <li><p><a href="https://marketplace.visualstudio.com/items?itemName=DavidAnson.vscode-markdownlint">markdownlint</a> has linting and style checking.</p></li>
 </ul>
 <p>These extensions <a href="https://code.visualstudio.com/docs/editor/command-line#_working-with-extensions">can be installed by command line</a>.</p>
-<div class="sourceCode" id="cb60"><pre class="sourceCode sh"><code class="sourceCode bash"><span id="cb60-1"><a href="workstation.html#cb60-1" aria-hidden="true" tabindex="-1"></a><span class="ex">code</span> <span class="at">--list-extensions</span></span>
-<span id="cb60-2"><a href="workstation.html#cb60-2" aria-hidden="true" tabindex="-1"></a><span class="ex">code</span> <span class="at">--install-extension</span> GrapeCity.gc-excelviewer</span>
-<span id="cb60-3"><a href="workstation.html#cb60-3" aria-hidden="true" tabindex="-1"></a><span class="ex">code</span> <span class="at">--install-extension</span> mechatroner.rainbow-csv</span>
-<span id="cb60-4"><a href="workstation.html#cb60-4" aria-hidden="true" tabindex="-1"></a><span class="ex">code</span> <span class="at">--install-extension</span> ms-mssql.mssql</span>
-<span id="cb60-5"><a href="workstation.html#cb60-5" aria-hidden="true" tabindex="-1"></a><span class="ex">code</span> <span class="at">--install-extension</span> streetsidesoftware.code-spell-checker</span>
-<span id="cb60-6"><a href="workstation.html#cb60-6" aria-hidden="true" tabindex="-1"></a><span class="ex">code</span> <span class="at">--install-extension</span> yzhang.markdown-all-in-one</span>
-<span id="cb60-7"><a href="workstation.html#cb60-7" aria-hidden="true" tabindex="-1"></a><span class="ex">code</span> <span class="at">--install-extension</span> yzane.markdown-pdf</span>
-<span id="cb60-8"><a href="workstation.html#cb60-8" aria-hidden="true" tabindex="-1"></a><span class="ex">code</span> <span class="at">--install-extension</span> DavidAnson.vscode-markdownlint</span></code></pre></div>
+<div class="sourceCode" id="cb61"><pre class="sourceCode sh"><code class="sourceCode bash"><span id="cb61-1"><a href="workstation.html#cb61-1" aria-hidden="true" tabindex="-1"></a><span class="ex">code</span> <span class="at">--list-extensions</span></span>
+<span id="cb61-2"><a href="workstation.html#cb61-2" aria-hidden="true" tabindex="-1"></a><span class="ex">code</span> <span class="at">--install-extension</span> GrapeCity.gc-excelviewer</span>
+<span id="cb61-3"><a href="workstation.html#cb61-3" aria-hidden="true" tabindex="-1"></a><span class="ex">code</span> <span class="at">--install-extension</span> mechatroner.rainbow-csv</span>
+<span id="cb61-4"><a href="workstation.html#cb61-4" aria-hidden="true" tabindex="-1"></a><span class="ex">code</span> <span class="at">--install-extension</span> ms-mssql.mssql</span>
+<span id="cb61-5"><a href="workstation.html#cb61-5" aria-hidden="true" tabindex="-1"></a><span class="ex">code</span> <span class="at">--install-extension</span> streetsidesoftware.code-spell-checker</span>
+<span id="cb61-6"><a href="workstation.html#cb61-6" aria-hidden="true" tabindex="-1"></a><span class="ex">code</span> <span class="at">--install-extension</span> yzhang.markdown-all-in-one</span>
+<span id="cb61-7"><a href="workstation.html#cb61-7" aria-hidden="true" tabindex="-1"></a><span class="ex">code</span> <span class="at">--install-extension</span> yzane.markdown-pdf</span>
+<span id="cb61-8"><a href="workstation.html#cb61-8" aria-hidden="true" tabindex="-1"></a><span class="ex">code</span> <span class="at">--install-extension</span> DavidAnson.vscode-markdownlint</span></code></pre></div>
 <p>Note: here are some non-default changes that facilitate our workflow. Either copy this configuration into <a href="https://code.visualstudio.com/docs/getstarted/tips-and-tricks#_tune-your-settings"><code>settings.json</code></a>, or manually specify the options with the <a href="https://code.visualstudio.com/docs/getstarted/settings">settings editor</a>.</p>
-<div class="sourceCode" id="cb61"><pre class="sourceCode json"><code class="sourceCode json"><span id="cb61-1"><a href="workstation.html#cb61-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
-<span id="cb61-2"><a href="workstation.html#cb61-2" aria-hidden="true" tabindex="-1"></a>  <span class="dt">&quot;diffEditor.ignoreTrimWhitespace&quot;</span><span class="fu">:</span> <span class="kw">false</span><span class="fu">,</span></span>
-<span id="cb61-3"><a href="workstation.html#cb61-3" aria-hidden="true" tabindex="-1"></a>  <span class="dt">&quot;diffEditor.maxComputationTime&quot;</span><span class="fu">:</span> <span class="dv">0</span><span class="fu">,</span></span>
-<span id="cb61-4"><a href="workstation.html#cb61-4" aria-hidden="true" tabindex="-1"></a>  <span class="dt">&quot;editor.acceptSuggestionOnEnter&quot;</span><span class="fu">:</span> <span class="st">&quot;off&quot;</span><span class="fu">,</span></span>
-<span id="cb61-5"><a href="workstation.html#cb61-5" aria-hidden="true" tabindex="-1"></a>  <span class="dt">&quot;editor.renderWhitespace&quot;</span><span class="fu">:</span> <span class="st">&quot;all&quot;</span><span class="fu">,</span></span>
-<span id="cb61-6"><a href="workstation.html#cb61-6" aria-hidden="true" tabindex="-1"></a>  <span class="dt">&quot;explorer.confirmDragAndDrop&quot;</span><span class="fu">:</span> <span class="kw">false</span><span class="fu">,</span></span>
-<span id="cb61-7"><a href="workstation.html#cb61-7" aria-hidden="true" tabindex="-1"></a>  <span class="dt">&quot;files.associations&quot;</span><span class="fu">:</span> <span class="fu">{</span></span>
-<span id="cb61-8"><a href="workstation.html#cb61-8" aria-hidden="true" tabindex="-1"></a>      <span class="dt">&quot;*.Rmd&quot;</span><span class="fu">:</span> <span class="st">&quot;markdown&quot;</span></span>
-<span id="cb61-9"><a href="workstation.html#cb61-9" aria-hidden="true" tabindex="-1"></a>  <span class="fu">},</span></span>
-<span id="cb61-10"><a href="workstation.html#cb61-10" aria-hidden="true" tabindex="-1"></a>  <span class="dt">&quot;files.trimFinalNewlines&quot;</span><span class="fu">:</span> <span class="kw">true</span><span class="fu">,</span></span>
-<span id="cb61-11"><a href="workstation.html#cb61-11" aria-hidden="true" tabindex="-1"></a>  <span class="dt">&quot;files.trimTrailingWhitespace&quot;</span><span class="fu">:</span> <span class="kw">true</span><span class="fu">,</span></span>
-<span id="cb61-12"><a href="workstation.html#cb61-12" aria-hidden="true" tabindex="-1"></a>  <span class="dt">&quot;git.autofetch&quot;</span><span class="fu">:</span> <span class="kw">true</span><span class="fu">,</span></span>
-<span id="cb61-13"><a href="workstation.html#cb61-13" aria-hidden="true" tabindex="-1"></a>  <span class="dt">&quot;git.confirmSync&quot;</span><span class="fu">:</span> <span class="kw">false</span><span class="fu">,</span></span>
-<span id="cb61-14"><a href="workstation.html#cb61-14" aria-hidden="true" tabindex="-1"></a>  <span class="dt">&quot;window.zoomLevel&quot;</span><span class="fu">:</span> <span class="dv">2</span><span class="fu">,</span></span>
-<span id="cb61-15"><a href="workstation.html#cb61-15" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb61-16"><a href="workstation.html#cb61-16" aria-hidden="true" tabindex="-1"></a>  <span class="dt">&quot;markdown.extension.orderedList.autoRenumber&quot;</span><span class="fu">:</span> <span class="kw">false</span><span class="fu">,</span></span>
-<span id="cb61-17"><a href="workstation.html#cb61-17" aria-hidden="true" tabindex="-1"></a>  <span class="dt">&quot;markdown.extension.orderedList.marker&quot;</span><span class="fu">:</span> <span class="st">&quot;one&quot;</span><span class="fu">,</span></span>
-<span id="cb61-18"><a href="workstation.html#cb61-18" aria-hidden="true" tabindex="-1"></a>  <span class="dt">&quot;markdownlint.config&quot;</span><span class="fu">:</span> <span class="fu">{</span></span>
-<span id="cb61-19"><a href="workstation.html#cb61-19" aria-hidden="true" tabindex="-1"></a>      <span class="dt">&quot;MD003&quot;</span><span class="fu">:</span> <span class="fu">{</span> <span class="dt">&quot;style&quot;</span><span class="fu">:</span> <span class="st">&quot;setext_with_atx&quot;</span> <span class="fu">},</span></span>
-<span id="cb61-20"><a href="workstation.html#cb61-20" aria-hidden="true" tabindex="-1"></a>      <span class="dt">&quot;MD007&quot;</span><span class="fu">:</span> <span class="fu">{</span> <span class="dt">&quot;indent&quot;</span><span class="fu">:</span> <span class="dv">2</span> <span class="fu">},</span></span>
-<span id="cb61-21"><a href="workstation.html#cb61-21" aria-hidden="true" tabindex="-1"></a>      <span class="dt">&quot;MD022&quot;</span><span class="fu">:</span> <span class="fu">{</span> <span class="dt">&quot;lines_above&quot;</span><span class="fu">:</span> <span class="dv">1</span><span class="fu">,</span></span>
-<span id="cb61-22"><a href="workstation.html#cb61-22" aria-hidden="true" tabindex="-1"></a>                  <span class="dt">&quot;lines_below&quot;</span><span class="fu">:</span> <span class="dv">1</span> <span class="fu">},</span></span>
-<span id="cb61-23"><a href="workstation.html#cb61-23" aria-hidden="true" tabindex="-1"></a>      <span class="dt">&quot;MD024&quot;</span><span class="fu">:</span> <span class="fu">{</span> <span class="dt">&quot;siblings_only&quot;</span><span class="fu">:</span> <span class="kw">true</span> <span class="fu">},</span></span>
-<span id="cb61-24"><a href="workstation.html#cb61-24" aria-hidden="true" tabindex="-1"></a>      <span class="dt">&quot;no-bare-urls&quot;</span><span class="fu">:</span> <span class="kw">false</span><span class="fu">,</span></span>
-<span id="cb61-25"><a href="workstation.html#cb61-25" aria-hidden="true" tabindex="-1"></a>      <span class="dt">&quot;no-inline-html&quot;</span><span class="fu">:</span> <span class="fu">{</span></span>
-<span id="cb61-26"><a href="workstation.html#cb61-26" aria-hidden="true" tabindex="-1"></a>        <span class="dt">&quot;allowed_elements&quot;</span><span class="fu">:</span> <span class="ot">[</span></span>
-<span id="cb61-27"><a href="workstation.html#cb61-27" aria-hidden="true" tabindex="-1"></a>          <span class="st">&quot;mermaid&quot;</span><span class="ot">,</span></span>
-<span id="cb61-28"><a href="workstation.html#cb61-28" aria-hidden="true" tabindex="-1"></a>          <span class="st">&quot;a&quot;</span><span class="ot">,</span></span>
-<span id="cb61-29"><a href="workstation.html#cb61-29" aria-hidden="true" tabindex="-1"></a>          <span class="st">&quot;img&quot;</span></span>
-<span id="cb61-30"><a href="workstation.html#cb61-30" aria-hidden="true" tabindex="-1"></a>        <span class="ot">]</span></span>
-<span id="cb61-31"><a href="workstation.html#cb61-31" aria-hidden="true" tabindex="-1"></a>      <span class="fu">}</span></span>
-<span id="cb61-32"><a href="workstation.html#cb61-32" aria-hidden="true" tabindex="-1"></a>  <span class="fu">}</span></span>
-<span id="cb61-33"><a href="workstation.html#cb61-33" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code></pre></div>
+<div class="sourceCode" id="cb62"><pre class="sourceCode json"><code class="sourceCode json"><span id="cb62-1"><a href="workstation.html#cb62-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
+<span id="cb62-2"><a href="workstation.html#cb62-2" aria-hidden="true" tabindex="-1"></a>  <span class="dt">&quot;diffEditor.ignoreTrimWhitespace&quot;</span><span class="fu">:</span> <span class="kw">false</span><span class="fu">,</span></span>
+<span id="cb62-3"><a href="workstation.html#cb62-3" aria-hidden="true" tabindex="-1"></a>  <span class="dt">&quot;diffEditor.maxComputationTime&quot;</span><span class="fu">:</span> <span class="dv">0</span><span class="fu">,</span></span>
+<span id="cb62-4"><a href="workstation.html#cb62-4" aria-hidden="true" tabindex="-1"></a>  <span class="dt">&quot;editor.acceptSuggestionOnEnter&quot;</span><span class="fu">:</span> <span class="st">&quot;off&quot;</span><span class="fu">,</span></span>
+<span id="cb62-5"><a href="workstation.html#cb62-5" aria-hidden="true" tabindex="-1"></a>  <span class="dt">&quot;editor.renderWhitespace&quot;</span><span class="fu">:</span> <span class="st">&quot;all&quot;</span><span class="fu">,</span></span>
+<span id="cb62-6"><a href="workstation.html#cb62-6" aria-hidden="true" tabindex="-1"></a>  <span class="dt">&quot;explorer.confirmDragAndDrop&quot;</span><span class="fu">:</span> <span class="kw">false</span><span class="fu">,</span></span>
+<span id="cb62-7"><a href="workstation.html#cb62-7" aria-hidden="true" tabindex="-1"></a>  <span class="dt">&quot;files.associations&quot;</span><span class="fu">:</span> <span class="fu">{</span></span>
+<span id="cb62-8"><a href="workstation.html#cb62-8" aria-hidden="true" tabindex="-1"></a>      <span class="dt">&quot;*.Rmd&quot;</span><span class="fu">:</span> <span class="st">&quot;markdown&quot;</span></span>
+<span id="cb62-9"><a href="workstation.html#cb62-9" aria-hidden="true" tabindex="-1"></a>  <span class="fu">},</span></span>
+<span id="cb62-10"><a href="workstation.html#cb62-10" aria-hidden="true" tabindex="-1"></a>  <span class="dt">&quot;files.trimFinalNewlines&quot;</span><span class="fu">:</span> <span class="kw">true</span><span class="fu">,</span></span>
+<span id="cb62-11"><a href="workstation.html#cb62-11" aria-hidden="true" tabindex="-1"></a>  <span class="dt">&quot;files.trimTrailingWhitespace&quot;</span><span class="fu">:</span> <span class="kw">true</span><span class="fu">,</span></span>
+<span id="cb62-12"><a href="workstation.html#cb62-12" aria-hidden="true" tabindex="-1"></a>  <span class="dt">&quot;git.autofetch&quot;</span><span class="fu">:</span> <span class="kw">true</span><span class="fu">,</span></span>
+<span id="cb62-13"><a href="workstation.html#cb62-13" aria-hidden="true" tabindex="-1"></a>  <span class="dt">&quot;git.confirmSync&quot;</span><span class="fu">:</span> <span class="kw">false</span><span class="fu">,</span></span>
+<span id="cb62-14"><a href="workstation.html#cb62-14" aria-hidden="true" tabindex="-1"></a>  <span class="dt">&quot;window.zoomLevel&quot;</span><span class="fu">:</span> <span class="dv">2</span><span class="fu">,</span></span>
+<span id="cb62-15"><a href="workstation.html#cb62-15" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb62-16"><a href="workstation.html#cb62-16" aria-hidden="true" tabindex="-1"></a>  <span class="dt">&quot;markdown.extension.orderedList.autoRenumber&quot;</span><span class="fu">:</span> <span class="kw">false</span><span class="fu">,</span></span>
+<span id="cb62-17"><a href="workstation.html#cb62-17" aria-hidden="true" tabindex="-1"></a>  <span class="dt">&quot;markdown.extension.orderedList.marker&quot;</span><span class="fu">:</span> <span class="st">&quot;one&quot;</span><span class="fu">,</span></span>
+<span id="cb62-18"><a href="workstation.html#cb62-18" aria-hidden="true" tabindex="-1"></a>  <span class="dt">&quot;markdownlint.config&quot;</span><span class="fu">:</span> <span class="fu">{</span></span>
+<span id="cb62-19"><a href="workstation.html#cb62-19" aria-hidden="true" tabindex="-1"></a>      <span class="dt">&quot;MD003&quot;</span><span class="fu">:</span> <span class="fu">{</span> <span class="dt">&quot;style&quot;</span><span class="fu">:</span> <span class="st">&quot;setext_with_atx&quot;</span> <span class="fu">},</span></span>
+<span id="cb62-20"><a href="workstation.html#cb62-20" aria-hidden="true" tabindex="-1"></a>      <span class="dt">&quot;MD007&quot;</span><span class="fu">:</span> <span class="fu">{</span> <span class="dt">&quot;indent&quot;</span><span class="fu">:</span> <span class="dv">2</span> <span class="fu">},</span></span>
+<span id="cb62-21"><a href="workstation.html#cb62-21" aria-hidden="true" tabindex="-1"></a>      <span class="dt">&quot;MD022&quot;</span><span class="fu">:</span> <span class="fu">{</span> <span class="dt">&quot;lines_above&quot;</span><span class="fu">:</span> <span class="dv">1</span><span class="fu">,</span></span>
+<span id="cb62-22"><a href="workstation.html#cb62-22" aria-hidden="true" tabindex="-1"></a>                  <span class="dt">&quot;lines_below&quot;</span><span class="fu">:</span> <span class="dv">1</span> <span class="fu">},</span></span>
+<span id="cb62-23"><a href="workstation.html#cb62-23" aria-hidden="true" tabindex="-1"></a>      <span class="dt">&quot;MD024&quot;</span><span class="fu">:</span> <span class="fu">{</span> <span class="dt">&quot;siblings_only&quot;</span><span class="fu">:</span> <span class="kw">true</span> <span class="fu">},</span></span>
+<span id="cb62-24"><a href="workstation.html#cb62-24" aria-hidden="true" tabindex="-1"></a>      <span class="dt">&quot;no-bare-urls&quot;</span><span class="fu">:</span> <span class="kw">false</span><span class="fu">,</span></span>
+<span id="cb62-25"><a href="workstation.html#cb62-25" aria-hidden="true" tabindex="-1"></a>      <span class="dt">&quot;no-inline-html&quot;</span><span class="fu">:</span> <span class="fu">{</span></span>
+<span id="cb62-26"><a href="workstation.html#cb62-26" aria-hidden="true" tabindex="-1"></a>        <span class="dt">&quot;allowed_elements&quot;</span><span class="fu">:</span> <span class="ot">[</span></span>
+<span id="cb62-27"><a href="workstation.html#cb62-27" aria-hidden="true" tabindex="-1"></a>          <span class="st">&quot;mermaid&quot;</span><span class="ot">,</span></span>
+<span id="cb62-28"><a href="workstation.html#cb62-28" aria-hidden="true" tabindex="-1"></a>          <span class="st">&quot;a&quot;</span><span class="ot">,</span></span>
+<span id="cb62-29"><a href="workstation.html#cb62-29" aria-hidden="true" tabindex="-1"></a>          <span class="st">&quot;img&quot;</span></span>
+<span id="cb62-30"><a href="workstation.html#cb62-30" aria-hidden="true" tabindex="-1"></a>        <span class="ot">]</span></span>
+<span id="cb62-31"><a href="workstation.html#cb62-31" aria-hidden="true" tabindex="-1"></a>      <span class="fu">}</span></span>
+<span id="cb62-32"><a href="workstation.html#cb62-32" aria-hidden="true" tabindex="-1"></a>  <span class="fu">}</span></span>
+<span id="cb62-33"><a href="workstation.html#cb62-33" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code></pre></div>
 <ol style="list-style-type: decimal">
 <li>Settings | Extensions |Markdown All in One | Ordered List | <strong>Auto Renumber: false</strong> {<code>"markdown.extension.orderedList.autoRenumber": false</code>}</li>
 <li>Settings | Extensions |Markdown All in One | Ordered List | <strong>Marker: one</strong> {<code>"markdown.extension.orderedList.marker": "one"</code>}</li>
@@ -702,7 +720,7 @@ <h3><span class="header-section-number">18.3.4</span> Python</h3>
 <p><a href="https://www.python.org/">Python</a> is used by some analysts. The prototypical installation involves two options.</p>
 <ul>
 <li><p><strong><a href="https://www.anaconda.com/distribution/#download-section">Anaconda</a></strong>, which include Jupyter Notebooks, Jupyter Lab, and Spyder. Plus two programs that are already on this list: RStudio and VS Code. In Windows, open “Anaconda Prompt” with administrative privileges</p>
-<div class="sourceCode" id="cb62"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb62-1"><a href="workstation.html#cb62-1" aria-hidden="true" tabindex="-1"></a>conda install numpy pandas scikit<span class="op">-</span>learn matplotlib</span></code></pre></div></li>
+<div class="sourceCode" id="cb63"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb63-1"><a href="workstation.html#cb63-1" aria-hidden="true" tabindex="-1"></a>conda install numpy pandas scikit<span class="op">-</span>learn matplotlib</span></code></pre></div></li>
 <li><p><strong><a href="https://www.python.org/downloads/">Standard Python</a></strong>, while installing packages through pip3 in the terminal. If the <code>pip3</code> command is unrecognized because it’s missing from the OS <a href="https://en.wikipedia.org/wiki/PATH_(variable)">path</a> variable, an alternative is <code>py -3 -mpip install pysftp</code>; this calls pip through the <code>py</code> command which is sometimes in the path variable after installation.</p></li>
 </ul>
 </div>
@@ -769,98 +787,98 @@ <h2><span class="header-section-number">18.6</span> Installation Troubleshooting
 <h2><span class="header-section-number">18.7</span> Ubuntu Installation</h2>
 <p>Ubuntu desktop 19.04 follows <a href="https://askubuntu.com/a/862520/153921">these instructions</a> for the R and RStudio and required these debian packages to be installed before the R packages. The <code>--yes</code> option avoids manual confirmation for each line, so you can copy &amp; paste this into the terminal.</p>
 <p>Add the following to the sources with <code>sudo nano /etc/apt/sources.list</code>. The ‘eoan’ version may be updated; The ‘metrocast’ part could be modified too from <a href="https://launchpad.net/ubuntu/+archivemirrors">this list</a>. I found it worked better for a new Ubuntu release than ‘cloud.r-project.org.’</p>
-<div class="sourceCode" id="cb63"><pre class="sourceCode sh"><code class="sourceCode bash"><span id="cb63-1"><a href="workstation.html#cb63-1" aria-hidden="true" tabindex="-1"></a><span class="co"># For R 4.0</span></span>
-<span id="cb63-2"><a href="workstation.html#cb63-2" aria-hidden="true" tabindex="-1"></a><span class="ex">deb</span> https://cloud.r-project.org/bin/linux/ubuntu focal-cran40/</span>
-<span id="cb63-3"><a href="workstation.html#cb63-3" aria-hidden="true" tabindex="-1"></a><span class="ex">deb</span> http://mirror.genesisadaptive.com/ubuntu/ focal-backports main restricted universe</span>
-<span id="cb63-4"><a href="workstation.html#cb63-4" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb63-5"><a href="workstation.html#cb63-5" aria-hidden="true" tabindex="-1"></a><span class="co"># For R 3.5 &amp; #.6</span></span>
-<span id="cb63-6"><a href="workstation.html#cb63-6" aria-hidden="true" tabindex="-1"></a><span class="ex">deb</span> https://cloud.r-project/bin/linux/ubuntu/ eoan-cran35/</span>
-<span id="cb63-7"><a href="workstation.html#cb63-7" aria-hidden="true" tabindex="-1"></a><span class="ex">deb-src</span> https://cloud.r-project/bin/linux/ubuntu/ eoan-cran35/</span>
-<span id="cb63-8"><a href="workstation.html#cb63-8" aria-hidden="true" tabindex="-1"></a><span class="ex">deb</span> http://mirror.metrocast.net/ubuntu/ eoan-backports main restricted universe</span></code></pre></div>
+<div class="sourceCode" id="cb64"><pre class="sourceCode sh"><code class="sourceCode bash"><span id="cb64-1"><a href="workstation.html#cb64-1" aria-hidden="true" tabindex="-1"></a><span class="co"># For R 4.0</span></span>
+<span id="cb64-2"><a href="workstation.html#cb64-2" aria-hidden="true" tabindex="-1"></a><span class="ex">deb</span> https://cloud.r-project.org/bin/linux/ubuntu focal-cran40/</span>
+<span id="cb64-3"><a href="workstation.html#cb64-3" aria-hidden="true" tabindex="-1"></a><span class="ex">deb</span> http://mirror.genesisadaptive.com/ubuntu/ focal-backports main restricted universe</span>
+<span id="cb64-4"><a href="workstation.html#cb64-4" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb64-5"><a href="workstation.html#cb64-5" aria-hidden="true" tabindex="-1"></a><span class="co"># For R 3.5 &amp; #.6</span></span>
+<span id="cb64-6"><a href="workstation.html#cb64-6" aria-hidden="true" tabindex="-1"></a><span class="ex">deb</span> https://cloud.r-project/bin/linux/ubuntu/ eoan-cran35/</span>
+<span id="cb64-7"><a href="workstation.html#cb64-7" aria-hidden="true" tabindex="-1"></a><span class="ex">deb-src</span> https://cloud.r-project/bin/linux/ubuntu/ eoan-cran35/</span>
+<span id="cb64-8"><a href="workstation.html#cb64-8" aria-hidden="true" tabindex="-1"></a><span class="ex">deb</span> http://mirror.metrocast.net/ubuntu/ eoan-backports main restricted universe</span></code></pre></div>
 <p>This next block can be copied and pasted (ctrl-shift-v) into the console <a href="https://stackoverflow.com/a/43164204">entirely</a>. Or lines can be pasted individual (without the <code>( function install-packages {</code> line, or the last three lines).</p>
-<div class="sourceCode" id="cb64"><pre class="sourceCode sh"><code class="sourceCode bash"><span id="cb64-1"><a href="workstation.html#cb64-1" aria-hidden="true" tabindex="-1"></a><span class="kw">(</span> <span class="kw">function</span><span class="fu"> install-packages</span> <span class="kw">{</span></span>
-<span id="cb64-2"><a href="workstation.html#cb64-2" aria-hidden="true" tabindex="-1"></a>  <span class="co">### Add the key, update the list, then install base R.</span></span>
-<span id="cb64-3"><a href="workstation.html#cb64-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-key adv <span class="at">--keyserver</span> keyserver.ubuntu.com <span class="at">--recv-keys</span> E298A3A825C0D65DFD57CBB651716619E084DAB9</span>
-<span id="cb64-4"><a href="workstation.html#cb64-4" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-get update</span>
-<span id="cb64-5"><a href="workstation.html#cb64-5" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-get install r-base r-base-dev</span>
-<span id="cb64-6"><a href="workstation.html#cb64-6" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb64-7"><a href="workstation.html#cb64-7" aria-hidden="true" tabindex="-1"></a>  <span class="co">### Git</span></span>
-<span id="cb64-8"><a href="workstation.html#cb64-8" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-get install git-core</span>
-<span id="cb64-9"><a href="workstation.html#cb64-9" aria-hidden="true" tabindex="-1"></a>  <span class="fu">git</span> config <span class="at">--global</span> user.email <span class="st">&quot;wibeasley@hotmail.com&quot;</span></span>
-<span id="cb64-10"><a href="workstation.html#cb64-10" aria-hidden="true" tabindex="-1"></a>  <span class="fu">git</span> config <span class="at">--global</span> user.name <span class="st">&quot;Will Beasley&quot;</span></span>
-<span id="cb64-11"><a href="workstation.html#cb64-11" aria-hidden="true" tabindex="-1"></a>  <span class="fu">git</span> config <span class="at">--global</span> credential.helper <span class="st">&#39;cache --timeout=3600000&#39;</span></span>
-<span id="cb64-12"><a href="workstation.html#cb64-12" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb64-13"><a href="workstation.html#cb64-13" aria-hidden="true" tabindex="-1"></a>  <span class="co">### Ubuntu &amp; Bioconductor packages that are indirectly needed for packages and BBMC scripts</span></span>
-<span id="cb64-14"><a href="workstation.html#cb64-14" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb64-15"><a href="workstation.html#cb64-15" aria-hidden="true" tabindex="-1"></a>  <span class="co"># Supports the `locate` command in bash</span></span>
-<span id="cb64-16"><a href="workstation.html#cb64-16" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-get install mlocate</span>
-<span id="cb64-17"><a href="workstation.html#cb64-17" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb64-18"><a href="workstation.html#cb64-18" aria-hidden="true" tabindex="-1"></a>  <span class="co"># The genefilter package is needed for &#39;modeest&#39; on CRAN.</span></span>
-<span id="cb64-19"><a href="workstation.html#cb64-19" aria-hidden="true" tabindex="-1"></a>  <span class="co"># No longer a modeest dependency: Rscript -e &#39;BiocManager::install(&quot;genefilter&quot;)&#39;</span></span>
-<span id="cb64-20"><a href="workstation.html#cb64-20" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb64-21"><a href="workstation.html#cb64-21" aria-hidden="true" tabindex="-1"></a>  <span class="co">### CRAN packages that are also on the Ubuntu repositories</span></span>
-<span id="cb64-22"><a href="workstation.html#cb64-22" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb64-23"><a href="workstation.html#cb64-23" aria-hidden="true" tabindex="-1"></a>  <span class="co"># The &#39;xml2&#39; package; https://CRAN.R-project.org/package=xml2</span></span>
-<span id="cb64-24"><a href="workstation.html#cb64-24" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-get <span class="at">--yes</span> install libxml2-dev r-cran-xml</span>
-<span id="cb64-25"><a href="workstation.html#cb64-25" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb64-26"><a href="workstation.html#cb64-26" aria-hidden="true" tabindex="-1"></a>  <span class="co"># The &#39;curl&#39; package, and others; https://CRAN.R-project.org/package=curl</span></span>
-<span id="cb64-27"><a href="workstation.html#cb64-27" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-get <span class="at">--yes</span> install libssl-dev libcurl4-openssl-dev</span>
-<span id="cb64-28"><a href="workstation.html#cb64-28" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb64-29"><a href="workstation.html#cb64-29" aria-hidden="true" tabindex="-1"></a>  <span class="co"># The &#39;udunits2&#39; package: https://cran.r-project.org/web/packages/udunits2/index.html</span></span>
-<span id="cb64-30"><a href="workstation.html#cb64-30" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-get <span class="at">--yes</span> install libudunits2-dev</span>
-<span id="cb64-31"><a href="workstation.html#cb64-31" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb64-32"><a href="workstation.html#cb64-32" aria-hidden="true" tabindex="-1"></a>  <span class="co"># The &#39;odbc&#39; package: https://github.com/r-dbi/odbc#linux---debian--ubuntu</span></span>
-<span id="cb64-33"><a href="workstation.html#cb64-33" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-get <span class="at">--yes</span> install unixodbc-dev tdsodbc odbc-postgresql libsqliteodbc</span>
-<span id="cb64-34"><a href="workstation.html#cb64-34" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb64-35"><a href="workstation.html#cb64-35" aria-hidden="true" tabindex="-1"></a>  <span class="co"># The &#39;rgl&#39; package; https://stackoverflow.com/a/39952771/1082435</span></span>
-<span id="cb64-36"><a href="workstation.html#cb64-36" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-get <span class="at">--yes</span> install libcgal-dev libglu1-mesa-dev</span>
-<span id="cb64-37"><a href="workstation.html#cb64-37" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb64-38"><a href="workstation.html#cb64-38" aria-hidden="true" tabindex="-1"></a>  <span class="co"># The &#39;magick&#39; package; https://docs.ropensci.org/magick/articles/intro.html#build-from-source</span></span>
-<span id="cb64-39"><a href="workstation.html#cb64-39" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-get <span class="at">--yes</span> install <span class="st">&#39;libmagick++-dev&#39;</span></span>
-<span id="cb64-40"><a href="workstation.html#cb64-40" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb64-41"><a href="workstation.html#cb64-41" aria-hidden="true" tabindex="-1"></a>  <span class="co"># To compress vignettes when building a package; https://kalimu.github.io/post/checklist-for-r-package-submission-to-cran/</span></span>
-<span id="cb64-42"><a href="workstation.html#cb64-42" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-get <span class="at">--yes</span> install qpdf</span>
-<span id="cb64-43"><a href="workstation.html#cb64-43" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb64-44"><a href="workstation.html#cb64-44" aria-hidden="true" tabindex="-1"></a>  <span class="co"># The &#39;pdftools&#39; and &#39;Rpoppler&#39; packages, which involve PDFs</span></span>
-<span id="cb64-45"><a href="workstation.html#cb64-45" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-get <span class="at">--yes</span> install libpoppler-cpp-dev libpoppler-glib-dev</span>
-<span id="cb64-46"><a href="workstation.html#cb64-46" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb64-47"><a href="workstation.html#cb64-47" aria-hidden="true" tabindex="-1"></a>  <span class="co"># The &#39;sys&#39; package</span></span>
-<span id="cb64-48"><a href="workstation.html#cb64-48" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-get <span class="at">--yes</span> install libapparmor-dev</span>
-<span id="cb64-49"><a href="workstation.html#cb64-49" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb64-50"><a href="workstation.html#cb64-50" aria-hidden="true" tabindex="-1"></a>  <span class="co"># The &#39;sf&#39; and other spatial packages: https://github.com/r-spatial/sf#ubuntu; https://github.com/r-spatial/sf/pull/1208</span></span>
-<span id="cb64-51"><a href="workstation.html#cb64-51" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-get <span class="at">--yes</span> install libudunits2-dev libgdal-dev libgeos-dev libproj-dev libgeos++-dev</span>
-<span id="cb64-52"><a href="workstation.html#cb64-52" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb64-53"><a href="workstation.html#cb64-53" aria-hidden="true" tabindex="-1"></a>  <span class="co"># For Cairo package, a dependency of Shiny &amp; plotly; https://gykovacsblog.wordpress.com/2017/05/15/installing-cairo-for-r-on-ubuntu-17-04/</span></span>
-<span id="cb64-54"><a href="workstation.html#cb64-54" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-get <span class="at">--yes</span> install libcairo2-dev</span>
-<span id="cb64-55"><a href="workstation.html#cb64-55" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb64-56"><a href="workstation.html#cb64-56" aria-hidden="true" tabindex="-1"></a>  <span class="co"># &#39;rJava&#39; and others; https://www.r-bloggers.com/installing-rjava-on-ubuntu/</span></span>
-<span id="cb64-57"><a href="workstation.html#cb64-57" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-get <span class="at">--yes</span> install default-jre default-jdk</span>
-<span id="cb64-58"><a href="workstation.html#cb64-58" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> R CMD javareconf</span>
-<span id="cb64-59"><a href="workstation.html#cb64-59" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-get <span class="at">--yes</span> install r-cran-rjava</span>
-<span id="cb64-60"><a href="workstation.html#cb64-60" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb64-61"><a href="workstation.html#cb64-61" aria-hidden="true" tabindex="-1"></a>  <span class="co"># For reprex and sometimes ssh keys; https://github.com/tidyverse/reprex#installation</span></span>
-<span id="cb64-62"><a href="workstation.html#cb64-62" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-get <span class="at">--yes</span> install xclip</span>
-<span id="cb64-63"><a href="workstation.html#cb64-63" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb64-64"><a href="workstation.html#cb64-64" aria-hidden="true" tabindex="-1"></a>  <span class="co"># gifski -apparently the rust compiler is necessary</span></span>
-<span id="cb64-65"><a href="workstation.html#cb64-65" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-get <span class="at">--yes</span> install cargo</span>
-<span id="cb64-66"><a href="workstation.html#cb64-66" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb64-67"><a href="workstation.html#cb64-67" aria-hidden="true" tabindex="-1"></a>  <span class="co"># For databases</span></span>
-<span id="cb64-68"><a href="workstation.html#cb64-68" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-get <span class="at">--yes</span> install sqlite sqliteman</span>
-<span id="cb64-69"><a href="workstation.html#cb64-69" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-get <span class="at">--yes</span> install postgresql postgresql-contrib pgadmin3</span>
-<span id="cb64-70"><a href="workstation.html#cb64-70" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb64-71"><a href="workstation.html#cb64-71" aria-hidden="true" tabindex="-1"></a>  <span class="co"># pandoc</span></span>
-<span id="cb64-72"><a href="workstation.html#cb64-72" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-get <span class="at">--yes</span> install pandoc</span>
-<span id="cb64-73"><a href="workstation.html#cb64-73" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb64-74"><a href="workstation.html#cb64-74" aria-hidden="true" tabindex="-1"></a>  <span class="co"># For checking packages. Avoid `/usr/bin/texi2dvi: not found` warning.</span></span>
-<span id="cb64-75"><a href="workstation.html#cb64-75" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-get install texinfo</span>
-<span id="cb64-76"><a href="workstation.html#cb64-76" aria-hidden="true" tabindex="-1"></a><span class="kw">}</span></span>
-<span id="cb64-77"><a href="workstation.html#cb64-77" aria-hidden="true" tabindex="-1"></a><span class="ex">install-packages</span></span>
-<span id="cb64-78"><a href="workstation.html#cb64-78" aria-hidden="true" tabindex="-1"></a><span class="er">)</span></span></code></pre></div>
+<div class="sourceCode" id="cb65"><pre class="sourceCode sh"><code class="sourceCode bash"><span id="cb65-1"><a href="workstation.html#cb65-1" aria-hidden="true" tabindex="-1"></a><span class="kw">(</span> <span class="kw">function</span><span class="fu"> install-packages</span> <span class="kw">{</span></span>
+<span id="cb65-2"><a href="workstation.html#cb65-2" aria-hidden="true" tabindex="-1"></a>  <span class="co">### Add the key, update the list, then install base R.</span></span>
+<span id="cb65-3"><a href="workstation.html#cb65-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-key adv <span class="at">--keyserver</span> keyserver.ubuntu.com <span class="at">--recv-keys</span> E298A3A825C0D65DFD57CBB651716619E084DAB9</span>
+<span id="cb65-4"><a href="workstation.html#cb65-4" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-get update</span>
+<span id="cb65-5"><a href="workstation.html#cb65-5" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-get install r-base r-base-dev</span>
+<span id="cb65-6"><a href="workstation.html#cb65-6" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb65-7"><a href="workstation.html#cb65-7" aria-hidden="true" tabindex="-1"></a>  <span class="co">### Git</span></span>
+<span id="cb65-8"><a href="workstation.html#cb65-8" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-get install git-core</span>
+<span id="cb65-9"><a href="workstation.html#cb65-9" aria-hidden="true" tabindex="-1"></a>  <span class="fu">git</span> config <span class="at">--global</span> user.email <span class="st">&quot;wibeasley@hotmail.com&quot;</span></span>
+<span id="cb65-10"><a href="workstation.html#cb65-10" aria-hidden="true" tabindex="-1"></a>  <span class="fu">git</span> config <span class="at">--global</span> user.name <span class="st">&quot;Will Beasley&quot;</span></span>
+<span id="cb65-11"><a href="workstation.html#cb65-11" aria-hidden="true" tabindex="-1"></a>  <span class="fu">git</span> config <span class="at">--global</span> credential.helper <span class="st">&#39;cache --timeout=3600000&#39;</span></span>
+<span id="cb65-12"><a href="workstation.html#cb65-12" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb65-13"><a href="workstation.html#cb65-13" aria-hidden="true" tabindex="-1"></a>  <span class="co">### Ubuntu &amp; Bioconductor packages that are indirectly needed for packages and BBMC scripts</span></span>
+<span id="cb65-14"><a href="workstation.html#cb65-14" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb65-15"><a href="workstation.html#cb65-15" aria-hidden="true" tabindex="-1"></a>  <span class="co"># Supports the `locate` command in bash</span></span>
+<span id="cb65-16"><a href="workstation.html#cb65-16" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-get install mlocate</span>
+<span id="cb65-17"><a href="workstation.html#cb65-17" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb65-18"><a href="workstation.html#cb65-18" aria-hidden="true" tabindex="-1"></a>  <span class="co"># The genefilter package is needed for &#39;modeest&#39; on CRAN.</span></span>
+<span id="cb65-19"><a href="workstation.html#cb65-19" aria-hidden="true" tabindex="-1"></a>  <span class="co"># No longer a modeest dependency: Rscript -e &#39;BiocManager::install(&quot;genefilter&quot;)&#39;</span></span>
+<span id="cb65-20"><a href="workstation.html#cb65-20" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb65-21"><a href="workstation.html#cb65-21" aria-hidden="true" tabindex="-1"></a>  <span class="co">### CRAN packages that are also on the Ubuntu repositories</span></span>
+<span id="cb65-22"><a href="workstation.html#cb65-22" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb65-23"><a href="workstation.html#cb65-23" aria-hidden="true" tabindex="-1"></a>  <span class="co"># The &#39;xml2&#39; package; https://CRAN.R-project.org/package=xml2</span></span>
+<span id="cb65-24"><a href="workstation.html#cb65-24" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-get <span class="at">--yes</span> install libxml2-dev r-cran-xml</span>
+<span id="cb65-25"><a href="workstation.html#cb65-25" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb65-26"><a href="workstation.html#cb65-26" aria-hidden="true" tabindex="-1"></a>  <span class="co"># The &#39;curl&#39; package, and others; https://CRAN.R-project.org/package=curl</span></span>
+<span id="cb65-27"><a href="workstation.html#cb65-27" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-get <span class="at">--yes</span> install libssl-dev libcurl4-openssl-dev</span>
+<span id="cb65-28"><a href="workstation.html#cb65-28" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb65-29"><a href="workstation.html#cb65-29" aria-hidden="true" tabindex="-1"></a>  <span class="co"># The &#39;udunits2&#39; package: https://cran.r-project.org/web/packages/udunits2/index.html</span></span>
+<span id="cb65-30"><a href="workstation.html#cb65-30" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-get <span class="at">--yes</span> install libudunits2-dev</span>
+<span id="cb65-31"><a href="workstation.html#cb65-31" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb65-32"><a href="workstation.html#cb65-32" aria-hidden="true" tabindex="-1"></a>  <span class="co"># The &#39;odbc&#39; package: https://github.com/r-dbi/odbc#linux---debian--ubuntu</span></span>
+<span id="cb65-33"><a href="workstation.html#cb65-33" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-get <span class="at">--yes</span> install unixodbc-dev tdsodbc odbc-postgresql libsqliteodbc</span>
+<span id="cb65-34"><a href="workstation.html#cb65-34" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb65-35"><a href="workstation.html#cb65-35" aria-hidden="true" tabindex="-1"></a>  <span class="co"># The &#39;rgl&#39; package; https://stackoverflow.com/a/39952771/1082435</span></span>
+<span id="cb65-36"><a href="workstation.html#cb65-36" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-get <span class="at">--yes</span> install libcgal-dev libglu1-mesa-dev</span>
+<span id="cb65-37"><a href="workstation.html#cb65-37" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb65-38"><a href="workstation.html#cb65-38" aria-hidden="true" tabindex="-1"></a>  <span class="co"># The &#39;magick&#39; package; https://docs.ropensci.org/magick/articles/intro.html#build-from-source</span></span>
+<span id="cb65-39"><a href="workstation.html#cb65-39" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-get <span class="at">--yes</span> install <span class="st">&#39;libmagick++-dev&#39;</span></span>
+<span id="cb65-40"><a href="workstation.html#cb65-40" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb65-41"><a href="workstation.html#cb65-41" aria-hidden="true" tabindex="-1"></a>  <span class="co"># To compress vignettes when building a package; https://kalimu.github.io/post/checklist-for-r-package-submission-to-cran/</span></span>
+<span id="cb65-42"><a href="workstation.html#cb65-42" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-get <span class="at">--yes</span> install qpdf</span>
+<span id="cb65-43"><a href="workstation.html#cb65-43" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb65-44"><a href="workstation.html#cb65-44" aria-hidden="true" tabindex="-1"></a>  <span class="co"># The &#39;pdftools&#39; and &#39;Rpoppler&#39; packages, which involve PDFs</span></span>
+<span id="cb65-45"><a href="workstation.html#cb65-45" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-get <span class="at">--yes</span> install libpoppler-cpp-dev libpoppler-glib-dev</span>
+<span id="cb65-46"><a href="workstation.html#cb65-46" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb65-47"><a href="workstation.html#cb65-47" aria-hidden="true" tabindex="-1"></a>  <span class="co"># The &#39;sys&#39; package</span></span>
+<span id="cb65-48"><a href="workstation.html#cb65-48" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-get <span class="at">--yes</span> install libapparmor-dev</span>
+<span id="cb65-49"><a href="workstation.html#cb65-49" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb65-50"><a href="workstation.html#cb65-50" aria-hidden="true" tabindex="-1"></a>  <span class="co"># The &#39;sf&#39; and other spatial packages: https://github.com/r-spatial/sf#ubuntu; https://github.com/r-spatial/sf/pull/1208</span></span>
+<span id="cb65-51"><a href="workstation.html#cb65-51" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-get <span class="at">--yes</span> install libudunits2-dev libgdal-dev libgeos-dev libproj-dev libgeos++-dev</span>
+<span id="cb65-52"><a href="workstation.html#cb65-52" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb65-53"><a href="workstation.html#cb65-53" aria-hidden="true" tabindex="-1"></a>  <span class="co"># For Cairo package, a dependency of Shiny &amp; plotly; https://gykovacsblog.wordpress.com/2017/05/15/installing-cairo-for-r-on-ubuntu-17-04/</span></span>
+<span id="cb65-54"><a href="workstation.html#cb65-54" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-get <span class="at">--yes</span> install libcairo2-dev</span>
+<span id="cb65-55"><a href="workstation.html#cb65-55" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb65-56"><a href="workstation.html#cb65-56" aria-hidden="true" tabindex="-1"></a>  <span class="co"># &#39;rJava&#39; and others; https://www.r-bloggers.com/installing-rjava-on-ubuntu/</span></span>
+<span id="cb65-57"><a href="workstation.html#cb65-57" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-get <span class="at">--yes</span> install default-jre default-jdk</span>
+<span id="cb65-58"><a href="workstation.html#cb65-58" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> R CMD javareconf</span>
+<span id="cb65-59"><a href="workstation.html#cb65-59" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-get <span class="at">--yes</span> install r-cran-rjava</span>
+<span id="cb65-60"><a href="workstation.html#cb65-60" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb65-61"><a href="workstation.html#cb65-61" aria-hidden="true" tabindex="-1"></a>  <span class="co"># For reprex and sometimes ssh keys; https://github.com/tidyverse/reprex#installation</span></span>
+<span id="cb65-62"><a href="workstation.html#cb65-62" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-get <span class="at">--yes</span> install xclip</span>
+<span id="cb65-63"><a href="workstation.html#cb65-63" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb65-64"><a href="workstation.html#cb65-64" aria-hidden="true" tabindex="-1"></a>  <span class="co"># gifski -apparently the rust compiler is necessary</span></span>
+<span id="cb65-65"><a href="workstation.html#cb65-65" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-get <span class="at">--yes</span> install cargo</span>
+<span id="cb65-66"><a href="workstation.html#cb65-66" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb65-67"><a href="workstation.html#cb65-67" aria-hidden="true" tabindex="-1"></a>  <span class="co"># For databases</span></span>
+<span id="cb65-68"><a href="workstation.html#cb65-68" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-get <span class="at">--yes</span> install sqlite sqliteman</span>
+<span id="cb65-69"><a href="workstation.html#cb65-69" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-get <span class="at">--yes</span> install postgresql postgresql-contrib pgadmin3</span>
+<span id="cb65-70"><a href="workstation.html#cb65-70" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb65-71"><a href="workstation.html#cb65-71" aria-hidden="true" tabindex="-1"></a>  <span class="co"># pandoc</span></span>
+<span id="cb65-72"><a href="workstation.html#cb65-72" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-get <span class="at">--yes</span> install pandoc</span>
+<span id="cb65-73"><a href="workstation.html#cb65-73" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb65-74"><a href="workstation.html#cb65-74" aria-hidden="true" tabindex="-1"></a>  <span class="co"># For checking packages. Avoid `/usr/bin/texi2dvi: not found` warning.</span></span>
+<span id="cb65-75"><a href="workstation.html#cb65-75" aria-hidden="true" tabindex="-1"></a>  <span class="fu">sudo</span> apt-get install texinfo</span>
+<span id="cb65-76"><a href="workstation.html#cb65-76" aria-hidden="true" tabindex="-1"></a><span class="kw">}</span></span>
+<span id="cb65-77"><a href="workstation.html#cb65-77" aria-hidden="true" tabindex="-1"></a><span class="ex">install-packages</span></span>
+<span id="cb65-78"><a href="workstation.html#cb65-78" aria-hidden="true" tabindex="-1"></a><span class="er">)</span></span></code></pre></div>
 <p>The version of pandoc from the Ubuntu repository may be delayed. To install the latest version, <a href="https://github.com/jgm/pandoc/releases">download the .deb file</a> then install from the same directory. Finally, verify the version.</p>
-<div class="sourceCode" id="cb65"><pre class="sourceCode sh"><code class="sourceCode bash"><span id="cb65-1"><a href="workstation.html#cb65-1" aria-hidden="true" tabindex="-1"></a><span class="fu">sudo</span> dpkg <span class="at">-i</span> pandoc-<span class="pp">*</span></span>
-<span id="cb65-2"><a href="workstation.html#cb65-2" aria-hidden="true" tabindex="-1"></a><span class="ex">pandoc</span> <span class="at">-v</span></span></code></pre></div>
+<div class="sourceCode" id="cb66"><pre class="sourceCode sh"><code class="sourceCode bash"><span id="cb66-1"><a href="workstation.html#cb66-1" aria-hidden="true" tabindex="-1"></a><span class="fu">sudo</span> dpkg <span class="at">-i</span> pandoc-<span class="pp">*</span></span>
+<span id="cb66-2"><a href="workstation.html#cb66-2" aria-hidden="true" tabindex="-1"></a><span class="ex">pandoc</span> <span class="at">-v</span></span></code></pre></div>
 <p>The Postman native app for Ubuntu is <a href="https://learning.getpostman.com/docs/postman/launching-postman/installation-and-updates/#installing-postman-on-linux">installed</a> through <a href="https://tutorials.ubuntu.com/tutorial/basic-snap-usage">snap</a>, which is <a href="https://tutorials.ubuntu.com/tutorial/basic-snap-usage#2">updated daily automatically</a>.</p>
-<div class="sourceCode" id="cb66"><pre class="sourceCode sh"><code class="sourceCode bash"><span id="cb66-1"><a href="workstation.html#cb66-1" aria-hidden="true" tabindex="-1"></a><span class="ex">snap</span> install postman</span></code></pre></div>
+<div class="sourceCode" id="cb67"><pre class="sourceCode sh"><code class="sourceCode bash"><span id="cb67-1"><a href="workstation.html#cb67-1" aria-hidden="true" tabindex="-1"></a><span class="ex">snap</span> install postman</span></code></pre></div>
 </div>
 <div id="workstation-retired" class="section level2" number="18.8">
 <h2><span class="header-section-number">18.8</span> Retired Tools</h2>
@@ -897,7 +915,7 @@ <h2><span class="header-section-number">18.8</span> Retired Tools</h2>
 <li><a href="https://atom.io/packages/git-plus">git-plus</a>: Do git things without the terminal (I don’t think this is necessary anymore).</li>
 </ol>
 <p>The packages can be installed through Atom, or through the <code>apm</code> utility in the command line:</p>
-<div class="sourceCode" id="cb67"><pre class="sourceCode bash"><code class="sourceCode bash"><span id="cb67-1"><a href="workstation.html#cb67-1" aria-hidden="true" tabindex="-1"></a><span class="fu">apm</span> install sublime-style-column-selection atom-language-r language-csv atom-beautify atom-wrap-in-tag minimap script</span></code></pre></div>
+<div class="sourceCode" id="cb68"><pre class="sourceCode bash"><code class="sourceCode bash"><span id="cb68-1"><a href="workstation.html#cb68-1" aria-hidden="true" tabindex="-1"></a><span class="fu">apm</span> install sublime-style-column-selection atom-language-r language-csv atom-beautify atom-wrap-in-tag minimap script</span></code></pre></div>
 <p>And the following settings keep files consistent among developers.</p>
 <ol style="list-style-type: decimal">
 <li>File | Settings | Editor | Tab Length: 2 (As opposed to 3 or 4, used in other conventions)</li>