From 586c05d265ef4c51b974a7c356027b347a6167cf Mon Sep 17 00:00:00 2001
From: Michael Frasco <mfrasco6@gmail.com>
Date: Thu, 20 Jul 2017 16:32:58 -0700
Subject: [PATCH 01/13] initial structure for retrieve_mapping

---
 r-pkg/R/elasticsearch_eda_funs.R | 73 ++++++++++++++++++++++++++++++++
 r-pkg/R/elasticsearch_parsers.R  |  1 -
 2 files changed, 73 insertions(+), 1 deletion(-)

diff --git a/r-pkg/R/elasticsearch_eda_funs.R b/r-pkg/R/elasticsearch_eda_funs.R
index 5632648..6692513 100644
--- a/r-pkg/R/elasticsearch_eda_funs.R
+++ b/r-pkg/R/elasticsearch_eda_funs.R
@@ -114,3 +114,76 @@ get_counts <- function(field
     
 }
 
+#' @title Retrieve the mapping definitions for an index or index/type in Elasticsearch
+#' @name retrieve_mapping
+#' @description For one or multiple index or index/type, return a data table with
+#'              field names and types.
+#' @importFrom httr GET, content
+#' @importFrom futile.logger flog.fatal
+#' @param es_host A string identifying an Elasticsearch host. This should be of
+#'                the form \code{[transfer_protocol][hostname]:[port]}. For example,
+#'                \code{'http://myindex.thing.com:9200'}.
+#' @param es_index A character vector that contains the names of indices for
+#'                 which to get mappings. Default, is \code{'_all'}, which means
+#'                 get the mapping for all indices.
+#' @param es_type A character vector that contains the names of types for which
+#'                to get mappings. Default is \code{NULL}, which means get the
+#'                mapping for all types in the chosen indices.
+#' @param es_field A character vector that contains the names of fields for which
+#'                 to get mappings, which can be used when the entire mapping is
+#'                 not desired. Default is \code{NULL}, which means get the
+#'                 mapping for all fields in the chosen types.
+#' @export
+#' @return A data table containing the field - definition mapping for the selected
+#'         indices, types, and fields
+retrieve_mapping <- function(es_host
+                             , es_index = '_all'
+                             , es_type = NULL
+                             , es_field = NULL
+) {
+    
+    # Input checking
+    url <- .ValidateAndFormatHost(es_host)
+    
+    # collapse character vectors into comma separated strings. If any arguments
+    # are NULL, create an empty string
+    indices <- paste(es_index, collapse = ',')
+    types <- paste(es_type, collapse = ',')
+    fields <- paste(es_field, collapse = ',')
+    
+    # build the query
+    if (nchar(indices) > 0) {
+        url <- paste(url, indices, '_mapping', sep = '/')
+    } else {
+        msg <- paste("retrive_mapping must be passed a valid es_index."
+                     , "You provided", paste(es_index, collapse = ', ')
+                     , 'which resulted in an empty string')
+        futile.logger::flog.fatal(msg)
+        stop(msg)
+    }
+    
+    if (nchar(types) > 0) {
+        url <- paste(url, types, sep = '/')
+    }
+    
+    if (nchar(fields) > 0) {
+        url <- paste(url, 'field', fields, sep = '/')
+    }
+    
+    # make the query
+    result <- httr::GET(url = url)
+    resultContent <- httr::content(result)
+    
+    # parse the result into a data table
+    
+}
+
+
+
+
+
+
+
+
+
+
diff --git a/r-pkg/R/elasticsearch_parsers.R b/r-pkg/R/elasticsearch_parsers.R
index 29fcf7b..0211ef8 100644
--- a/r-pkg/R/elasticsearch_parsers.R
+++ b/r-pkg/R/elasticsearch_parsers.R
@@ -1038,7 +1038,6 @@ es_search <- function(es_host
     
 }
 
-
 # [title] Execute a Search request against an Elasticsearch cluster
 # [name] .search_request
 # [description] Given a query string (JSON with valid DSL), execute a request

From 49dd58674630e32622df8809923346341ae17820 Mon Sep 17 00:00:00 2001
From: Michael Frasco <mfrasco6@gmail.com>
Date: Fri, 21 Jul 2017 01:27:40 -0700
Subject: [PATCH 02/13] completed initial version of retrieve_mapping.

---
 r-pkg/NAMESPACE                               |  4 +
 r-pkg/R/elasticsearch_eda_funs.R              | 86 ++++++++++---------
 r-pkg/R/uptasticsearch.R                      |  2 +
 r-pkg/man/retrieve_mapping.Rd                 | 35 ++++++++
 .../testthat/test-repo_characteristics.R      |  2 +-
 5 files changed, 88 insertions(+), 41 deletions(-)
 create mode 100644 r-pkg/man/retrieve_mapping.Rd

diff --git a/r-pkg/NAMESPACE b/r-pkg/NAMESPACE
index ff99430..a952324 100644
--- a/r-pkg/NAMESPACE
+++ b/r-pkg/NAMESPACE
@@ -5,6 +5,7 @@ export(chomp_hits)
 export(es_search)
 export(get_counts)
 export(parse_date_time)
+export(retrieve_mapping)
 export(unpack_nested_data)
 importFrom(data.table,":=")
 importFrom(data.table,as.data.table)
@@ -17,6 +18,7 @@ importFrom(data.table,setnames)
 importFrom(futile.logger,flog.fatal)
 importFrom(futile.logger,flog.info)
 importFrom(futile.logger,flog.warn)
+importFrom(httr,GET)
 importFrom(httr,POST)
 importFrom(httr,content)
 importFrom(httr,stop_for_status)
@@ -30,4 +32,6 @@ importFrom(purrr,map2)
 importFrom(purrr,simplify)
 importFrom(purrr,transpose)
 importFrom(stringr,str_extract)
+importFrom(stringr,str_replace)
+importFrom(stringr,str_split_fixed)
 importFrom(uuid,UUIDgenerate)
diff --git a/r-pkg/R/elasticsearch_eda_funs.R b/r-pkg/R/elasticsearch_eda_funs.R
index 6692513..837333e 100644
--- a/r-pkg/R/elasticsearch_eda_funs.R
+++ b/r-pkg/R/elasticsearch_eda_funs.R
@@ -118,28 +118,29 @@ get_counts <- function(field
 #' @name retrieve_mapping
 #' @description For one or multiple index or index/type, return a data table with
 #'              field names and types.
-#' @importFrom httr GET, content
+#' @importFrom data.table := data.table setnames
 #' @importFrom futile.logger flog.fatal
+#' @importFrom httr GET content stop_for_status
+#' @importFrom stringr str_split_fixed str_replace
 #' @param es_host A string identifying an Elasticsearch host. This should be of
 #'                the form \code{[transfer_protocol][hostname]:[port]}. For example,
 #'                \code{'http://myindex.thing.com:9200'}.
-#' @param es_index A character vector that contains the names of indices for
-#'                 which to get mappings. Default, is \code{'_all'}, which means
-#'                 get the mapping for all indices.
-#' @param es_type A character vector that contains the names of types for which
-#'                to get mappings. Default is \code{NULL}, which means get the
-#'                mapping for all types in the chosen indices.
-#' @param es_field A character vector that contains the names of fields for which
-#'                 to get mappings, which can be used when the entire mapping is
-#'                 not desired. Default is \code{NULL}, which means get the
-#'                 mapping for all fields in the chosen types.
+#' @param es_indexes A character vector that contains the names of indexes for
+#'                   which to get mappings. Default, is \code{'_all'}, which means
+#'                   get the mapping for all indexes
+#' @param es_types A character vector that contains the names of types for which
+#'                 to get mappings. Default is \code{NULL}, which means get the
+#'                 mapping for all types in the chosen indexes
 #' @export
-#' @return A data table containing the field - definition mapping for the selected
-#'         indices, types, and fields
+#' @return A data.table containing four columns: index, type, field, and datatype
+#' @examples \dontrun{
+#' # get the mapping for all types in the ticket_sales index
+#' mappingDT <- retrieve_mapping(es_host = "http://es.custdb.mycompany.com:9200"
+#'                               , es_indexes = "ticket_sales")
+#' }
 retrieve_mapping <- function(es_host
-                             , es_index = '_all'
-                             , es_type = NULL
-                             , es_field = NULL
+                             , es_indexes = '_all'
+                             , es_types = NULL
 ) {
     
     # Input checking
@@ -147,43 +148,48 @@ retrieve_mapping <- function(es_host
     
     # collapse character vectors into comma separated strings. If any arguments
     # are NULL, create an empty string
-    indices <- paste(es_index, collapse = ',')
-    types <- paste(es_type, collapse = ',')
-    fields <- paste(es_field, collapse = ',')
+    indexes <- paste(es_indexes, collapse = ',')
+    types <- paste(es_types, collapse = ',')
     
-    # build the query
-    if (nchar(indices) > 0) {
-        url <- paste(url, indices, '_mapping', sep = '/')
+    ########################## build the query ################################
+    if (nchar(indexes) > 0) {
+        url <- paste(url, indexes, '_mapping', sep = '/')
     } else {
-        msg <- paste("retrive_mapping must be passed a valid es_index."
-                     , "You provided", paste(es_index, collapse = ', ')
+        msg <- paste("retrive_mapping must be passed a valid es_indexes."
+                     , "You provided", paste(es_indexes, collapse = ', ')
                      , 'which resulted in an empty string')
         futile.logger::flog.fatal(msg)
         stop(msg)
     }
     
+    # check if the user specified any types
     if (nchar(types) > 0) {
         url <- paste(url, types, sep = '/')
     }
     
-    if (nchar(fields) > 0) {
-        url <- paste(url, 'field', fields, sep = '/')
-    }
-    
-    # make the query
+    ########################## make the query ################################
     result <- httr::GET(url = url)
+    httr::stop_for_status(result)
     resultContent <- httr::content(result)
     
-    # parse the result into a data table
+    ######################### parse the result ###############################
+    # flatten the list object that is returned from the query
+    flattened <- unlist(resultContent)
+    
+    # the names of the flattened object has the index, type, and field name
+    # however, it also has extra terms that we can use to split the name
+    # into three distinct parts
+    mappingCols <- stringr::str_split_fixed(names(flattened), '\\.(mappings|properties)\\.', n = 3)
+    mappingDT <- data.table::as.data.table(mappingCols)
+    data.table::setnames(mappingDT, c('index', 'type', 'field'))
     
+    # if the field is a nested object or has multiple indexes, the field name
+    # have extra terms that we can remove
+    removeRegEx <- '\\.(properties|fields|type)'
+    mappingDT[, field := stringr::str_replace_all(field, removeRegEx, '')]
+    
+    # add the actual data type as a new column in the data table
+    mappingDT[, datatype := as.character(flattened)]
+    
+    return(mappingDT)
 }
-
-
-
-
-
-
-
-
-
-
diff --git a/r-pkg/R/uptasticsearch.R b/r-pkg/R/uptasticsearch.R
index d8277f0..b80f231 100644
--- a/r-pkg/R/uptasticsearch.R
+++ b/r-pkg/R/uptasticsearch.R
@@ -8,4 +8,6 @@
 utils::globalVariables(c('.'
                          , '.I'
                          , '.id'
+                         , 'field'
+                         , 'datatype'
                        ))
\ No newline at end of file
diff --git a/r-pkg/man/retrieve_mapping.Rd b/r-pkg/man/retrieve_mapping.Rd
new file mode 100644
index 0000000..7a26b21
--- /dev/null
+++ b/r-pkg/man/retrieve_mapping.Rd
@@ -0,0 +1,35 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/elasticsearch_eda_funs.R
+\name{retrieve_mapping}
+\alias{retrieve_mapping}
+\title{Retrieve the mapping definitions for an index or index/type in Elasticsearch}
+\usage{
+retrieve_mapping(es_host, es_indexes = "_all", es_types = NULL)
+}
+\arguments{
+\item{es_host}{A string identifying an Elasticsearch host. This should be of
+the form \code{[transfer_protocol][hostname]:[port]}. For example,
+\code{'http://myindex.thing.com:9200'}.}
+
+\item{es_indexes}{A character vector that contains the names of indexes for
+which to get mappings. Default, is \code{'_all'}, which means
+get the mapping for all indexes}
+
+\item{es_types}{A character vector that contains the names of types for which
+to get mappings. Default is \code{NULL}, which means get the
+mapping for all types in the chosen indexes}
+}
+\value{
+A data.table containing four columns: index, type, field, and datatype
+}
+\description{
+For one or multiple index or index/type, return a data table with
+             field names and types.
+}
+\examples{
+\dontrun{
+# get the mapping for all types in the ticket_sales index
+mappingDT <- retrieve_mapping(es_host = "http://es.custdb.mycompany.com:9200"
+                              , es_indexes = "ticket_sales")
+}
+}
diff --git a/r-pkg/tests/testthat/test-repo_characteristics.R b/r-pkg/tests/testthat/test-repo_characteristics.R
index 6b916bd..631ee23 100644
--- a/r-pkg/tests/testthat/test-repo_characteristics.R
+++ b/r-pkg/tests/testthat/test-repo_characteristics.R
@@ -35,7 +35,7 @@ test_that('R CMD check should not return any unexpected errors, warnings, or not
     testthat::skip_on_cran()
     
     # Check the package
-    x <- devtools::check(pkg = '../../../uptasticsearch'
+    x <- devtools::check(pkg = '../../../r-pkg'
                          , document = TRUE
                          , args = '--no-tests --ignore-vignettes'
                          , quiet = FALSE)

From 0cb15d90d22e8b56cd7ca8a4441a7df88baf0a0c Mon Sep 17 00:00:00 2001
From: Michael Frasco <mfrasco6@gmail.com>
Date: Fri, 21 Jul 2017 11:25:42 -0700
Subject: [PATCH 03/13] renamed retrieve_mapping to get_fields, clarified
 documentation, and removed meta data from resultDT

---
 r-pkg/NAMESPACE                               |  3 +-
 r-pkg/R/elasticsearch_eda_funs.R              | 54 +++++++++----------
 r-pkg/R/uptasticsearch.R                      |  1 -
 .../{retrieve_mapping.Rd => get_fields.Rd}    | 22 ++++----
 4 files changed, 35 insertions(+), 45 deletions(-)
 rename r-pkg/man/{retrieve_mapping.Rd => get_fields.Rd} (50%)

diff --git a/r-pkg/NAMESPACE b/r-pkg/NAMESPACE
index a952324..6f57094 100644
--- a/r-pkg/NAMESPACE
+++ b/r-pkg/NAMESPACE
@@ -4,8 +4,8 @@ export(chomp_aggs)
 export(chomp_hits)
 export(es_search)
 export(get_counts)
+export(get_fields)
 export(parse_date_time)
-export(retrieve_mapping)
 export(unpack_nested_data)
 importFrom(data.table,":=")
 importFrom(data.table,as.data.table)
@@ -31,6 +31,7 @@ importFrom(parallel,stopCluster)
 importFrom(purrr,map2)
 importFrom(purrr,simplify)
 importFrom(purrr,transpose)
+importFrom(stringr,str_detect)
 importFrom(stringr,str_extract)
 importFrom(stringr,str_replace)
 importFrom(stringr,str_split_fixed)
diff --git a/r-pkg/R/elasticsearch_eda_funs.R b/r-pkg/R/elasticsearch_eda_funs.R
index 837333e..75ab8c2 100644
--- a/r-pkg/R/elasticsearch_eda_funs.R
+++ b/r-pkg/R/elasticsearch_eda_funs.R
@@ -114,42 +114,38 @@ get_counts <- function(field
     
 }
 
-#' @title Retrieve the mapping definitions for an index or index/type in Elasticsearch
-#' @name retrieve_mapping
-#' @description For one or multiple index or index/type, return a data table with
-#'              field names and types.
-#' @importFrom data.table := data.table setnames
+#' @title Get the names and data types of the indexed fields in an index
+#' @name get_fields
+#' @description For a given Elasticsearch index, return the mapping from field name
+#'              to data type for all indexed fields.
+#' @importFrom data.table := as.data.table setnames
 #' @importFrom futile.logger flog.fatal
 #' @importFrom httr GET content stop_for_status
-#' @importFrom stringr str_split_fixed str_replace
+#' @importFrom stringr str_detect str_split_fixed str_replace
 #' @param es_host A string identifying an Elasticsearch host. This should be of
 #'                the form \code{[transfer_protocol][hostname]:[port]}. For example,
 #'                \code{'http://myindex.thing.com:9200'}.
 #' @param es_indexes A character vector that contains the names of indexes for
 #'                   which to get mappings. Default, is \code{'_all'}, which means
-#'                   get the mapping for all indexes
-#' @param es_types A character vector that contains the names of types for which
-#'                 to get mappings. Default is \code{NULL}, which means get the
-#'                 mapping for all types in the chosen indexes
+#'                   get the mapping for all indexes. Names of indexes can be
+#'                   treated as regular expressions.
 #' @export
 #' @return A data.table containing four columns: index, type, field, and datatype
 #' @examples \dontrun{
-#' # get the mapping for all types in the ticket_sales index
+#' # get the mapping for all types in the ticket_sales and customers indexes
 #' mappingDT <- retrieve_mapping(es_host = "http://es.custdb.mycompany.com:9200"
-#'                               , es_indexes = "ticket_sales")
+#'                               , es_indexes = c("ticket_sales", "indexes"))
 #' }
-retrieve_mapping <- function(es_host
-                             , es_indexes = '_all'
-                             , es_types = NULL
+get_fields <- function(es_host
+                       , es_indexes = '_all'
 ) {
     
     # Input checking
-    url <- .ValidateAndFormatHost(es_host)
+    url <- uptasticsearch:::.ValidateAndFormatHost(es_host)
     
     # collapse character vectors into comma separated strings. If any arguments
     # are NULL, create an empty string
     indexes <- paste(es_indexes, collapse = ',')
-    types <- paste(es_types, collapse = ',')
     
     ########################## build the query ################################
     if (nchar(indexes) > 0) {
@@ -162,11 +158,6 @@ retrieve_mapping <- function(es_host
         stop(msg)
     }
     
-    # check if the user specified any types
-    if (nchar(types) > 0) {
-        url <- paste(url, types, sep = '/')
-    }
-    
     ########################## make the query ################################
     result <- httr::GET(url = url)
     httr::stop_for_status(result)
@@ -180,16 +171,19 @@ retrieve_mapping <- function(es_host
     # however, it also has extra terms that we can use to split the name
     # into three distinct parts
     mappingCols <- stringr::str_split_fixed(names(flattened), '\\.(mappings|properties)\\.', n = 3)
-    mappingDT <- data.table::as.data.table(mappingCols)
-    data.table::setnames(mappingDT, c('index', 'type', 'field'))
     
-    # if the field is a nested object or has multiple indexes, the field name
-    # have extra terms that we can remove
-    removeRegEx <- '\\.(properties|fields|type)'
-    mappingDT[, field := stringr::str_replace_all(field, removeRegEx, '')]
+    # convert to data table and add the data type column
+    mappingDT <- data.table::data.table(mappingCols, as.character(flattened))
+    data.table::setnames(mappingDT, c('index', 'type', 'field', 'datatype'))
+    
+    # remove any rows, where the field does not end in ".type" to remove meta info
+    mappingDT <- mappingDT[stringr::str_detect(field, '\\.type')]
     
-    # add the actual data type as a new column in the data table
-    mappingDT[, datatype := as.character(flattened)]
+    # mappings in nested objects have sub-fields called properties
+    # mappings of fields that are indexed in different ways have multiple fields
+    # we want to remove these terms from the field name
+    metaRegEx <- '\\.(properties|fields|type)'
+    mappingDT[, field := stringr::str_replace_all(field, metaRegEx, '')]
     
     return(mappingDT)
 }
diff --git a/r-pkg/R/uptasticsearch.R b/r-pkg/R/uptasticsearch.R
index b80f231..196d28b 100644
--- a/r-pkg/R/uptasticsearch.R
+++ b/r-pkg/R/uptasticsearch.R
@@ -9,5 +9,4 @@ utils::globalVariables(c('.'
                          , '.I'
                          , '.id'
                          , 'field'
-                         , 'datatype'
                        ))
\ No newline at end of file
diff --git a/r-pkg/man/retrieve_mapping.Rd b/r-pkg/man/get_fields.Rd
similarity index 50%
rename from r-pkg/man/retrieve_mapping.Rd
rename to r-pkg/man/get_fields.Rd
index 7a26b21..e5ead2a 100644
--- a/r-pkg/man/retrieve_mapping.Rd
+++ b/r-pkg/man/get_fields.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/elasticsearch_eda_funs.R
-\name{retrieve_mapping}
-\alias{retrieve_mapping}
-\title{Retrieve the mapping definitions for an index or index/type in Elasticsearch}
+\name{get_fields}
+\alias{get_fields}
+\title{Get the names and data types of the indexed fields in an index}
 \usage{
-retrieve_mapping(es_host, es_indexes = "_all", es_types = NULL)
+get_fields(es_host, es_indexes = "_all")
 }
 \arguments{
 \item{es_host}{A string identifying an Elasticsearch host. This should be of
@@ -13,23 +13,19 @@ the form \code{[transfer_protocol][hostname]:[port]}. For example,
 
 \item{es_indexes}{A character vector that contains the names of indexes for
 which to get mappings. Default, is \code{'_all'}, which means
-get the mapping for all indexes}
-
-\item{es_types}{A character vector that contains the names of types for which
-to get mappings. Default is \code{NULL}, which means get the
-mapping for all types in the chosen indexes}
+get the mapping for all indexes.}
 }
 \value{
 A data.table containing four columns: index, type, field, and datatype
 }
 \description{
-For one or multiple index or index/type, return a data table with
-             field names and types.
+For a given Elasticsearch index, return the mapping from field name
+             to data type for all indexed fields.
 }
 \examples{
 \dontrun{
-# get the mapping for all types in the ticket_sales index
+# get the mapping for all types in the ticket_sales and customers indexes
 mappingDT <- retrieve_mapping(es_host = "http://es.custdb.mycompany.com:9200"
-                              , es_indexes = "ticket_sales")
+                              , es_indexes = c("ticket_sales", "indexes"))
 }
 }

From c699c3a2242e86bce7ba0144466ded843fb09fd1 Mon Sep 17 00:00:00 2001
From: Michael Frasco <mfrasco6@gmail.com>
Date: Fri, 21 Jul 2017 14:11:05 -0700
Subject: [PATCH 04/13] fixed package namespacing and roxygen errors

---
 r-pkg/R/elasticsearch_eda_funs.R | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/r-pkg/R/elasticsearch_eda_funs.R b/r-pkg/R/elasticsearch_eda_funs.R
index 75ab8c2..257276e 100644
--- a/r-pkg/R/elasticsearch_eda_funs.R
+++ b/r-pkg/R/elasticsearch_eda_funs.R
@@ -118,10 +118,10 @@ get_counts <- function(field
 #' @name get_fields
 #' @description For a given Elasticsearch index, return the mapping from field name
 #'              to data type for all indexed fields.
-#' @importFrom data.table := as.data.table setnames
+#' @importFrom data.table := data.table setnames
 #' @importFrom futile.logger flog.fatal
 #' @importFrom httr GET content stop_for_status
-#' @importFrom stringr str_detect str_split_fixed str_replace
+#' @importFrom stringr str_detect str_split_fixed str_replace_all
 #' @param es_host A string identifying an Elasticsearch host. This should be of
 #'                the form \code{[transfer_protocol][hostname]:[port]}. For example,
 #'                \code{'http://myindex.thing.com:9200'}.
@@ -141,7 +141,7 @@ get_fields <- function(es_host
 ) {
     
     # Input checking
-    url <- uptasticsearch:::.ValidateAndFormatHost(es_host)
+    url <- .ValidateAndFormatHost(es_host)
     
     # collapse character vectors into comma separated strings. If any arguments
     # are NULL, create an empty string
@@ -151,7 +151,7 @@ get_fields <- function(es_host
     if (nchar(indexes) > 0) {
         url <- paste(url, indexes, '_mapping', sep = '/')
     } else {
-        msg <- paste("retrive_mapping must be passed a valid es_indexes."
+        msg <- paste("get_fields must be passed a valid es_indexes."
                      , "You provided", paste(es_indexes, collapse = ', ')
                      , 'which resulted in an empty string')
         futile.logger::flog.fatal(msg)

From 4e02a7ce6874900d802ad06b446a6ec9a2f053d3 Mon Sep 17 00:00:00 2001
From: Michael Frasco <mfrasco6@gmail.com>
Date: Fri, 21 Jul 2017 14:19:01 -0700
Subject: [PATCH 05/13] adding documentation

---
 r-pkg/NAMESPACE         | 2 +-
 r-pkg/man/get_fields.Rd | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/r-pkg/NAMESPACE b/r-pkg/NAMESPACE
index 6f57094..2a339a3 100644
--- a/r-pkg/NAMESPACE
+++ b/r-pkg/NAMESPACE
@@ -33,6 +33,6 @@ importFrom(purrr,simplify)
 importFrom(purrr,transpose)
 importFrom(stringr,str_detect)
 importFrom(stringr,str_extract)
-importFrom(stringr,str_replace)
+importFrom(stringr,str_replace_all)
 importFrom(stringr,str_split_fixed)
 importFrom(uuid,UUIDgenerate)
diff --git a/r-pkg/man/get_fields.Rd b/r-pkg/man/get_fields.Rd
index e5ead2a..66ae3ef 100644
--- a/r-pkg/man/get_fields.Rd
+++ b/r-pkg/man/get_fields.Rd
@@ -13,7 +13,8 @@ the form \code{[transfer_protocol][hostname]:[port]}. For example,
 
 \item{es_indexes}{A character vector that contains the names of indexes for
 which to get mappings. Default, is \code{'_all'}, which means
-get the mapping for all indexes.}
+get the mapping for all indexes. Names of indexes can be
+treated as regular expressions.}
 }
 \value{
 A data.table containing four columns: index, type, field, and datatype

From 84f847491559d4f7bef3719fdf1487aeee3a9761 Mon Sep 17 00:00:00 2001
From: Michael Frasco <mfrasco6@gmail.com>
Date: Sat, 22 Jul 2017 18:51:02 -0700
Subject: [PATCH 06/13] addressed comments on get_fields and added unit tests

---
 r-pkg/DESCRIPTION                             |  1 +
 r-pkg/NAMESPACE                               |  1 +
 r-pkg/R/elasticsearch_eda_funs.R              | 35 ++++++++---
 r-pkg/R/uptasticsearch.R                      |  1 +
 r-pkg/inst/testdata/one_index_mapping.json    | 29 +++++++++
 r-pkg/inst/testdata/two_index_mapping.json    | 52 +++++++++++++++
 r-pkg/man/get_fields.Rd                       |  4 +-
 .../testthat/test-elasticsearch_eda_funs.R    | 63 +++++++++++++++++++
 8 files changed, 175 insertions(+), 11 deletions(-)
 create mode 100644 r-pkg/inst/testdata/one_index_mapping.json
 create mode 100644 r-pkg/inst/testdata/two_index_mapping.json
 create mode 100644 r-pkg/tests/testthat/test-elasticsearch_eda_funs.R

diff --git a/r-pkg/DESCRIPTION b/r-pkg/DESCRIPTION
index 38965dd..6675d48 100644
--- a/r-pkg/DESCRIPTION
+++ b/r-pkg/DESCRIPTION
@@ -6,6 +6,7 @@ Authors@R: c(
     person("James", "Lamb", email = "james.lamb@uptake.com", role = c("aut", "cre")),
     person("Nick", "Paras", email = "nick.paras@uptake.com", role = c("aut")),
     person("Austin", "Dickey", email = "austin.dickey@uptake.com", role = c("aut")),
+    person("Michael", "Frasco", email = "mfrasco6@gmail.com", role = c("ctb")),
     person("Uptake Technologies Inc.", role = c("cph")))
 Maintainer: James Lamb <james.lamb@uptake.com>
 Description:
diff --git a/r-pkg/NAMESPACE b/r-pkg/NAMESPACE
index 2a339a3..4fb4dba 100644
--- a/r-pkg/NAMESPACE
+++ b/r-pkg/NAMESPACE
@@ -15,6 +15,7 @@ importFrom(data.table,rbindlist)
 importFrom(data.table,setcolorder)
 importFrom(data.table,setkeyv)
 importFrom(data.table,setnames)
+importFrom(data.table,uniqueN)
 importFrom(futile.logger,flog.fatal)
 importFrom(futile.logger,flog.info)
 importFrom(futile.logger,flog.warn)
diff --git a/r-pkg/R/elasticsearch_eda_funs.R b/r-pkg/R/elasticsearch_eda_funs.R
index 257276e..23cd17b 100644
--- a/r-pkg/R/elasticsearch_eda_funs.R
+++ b/r-pkg/R/elasticsearch_eda_funs.R
@@ -118,10 +118,8 @@ get_counts <- function(field
 #' @name get_fields
 #' @description For a given Elasticsearch index, return the mapping from field name
 #'              to data type for all indexed fields.
-#' @importFrom data.table := data.table setnames
-#' @importFrom futile.logger flog.fatal
+#' @importFrom futile.logger flog.fatal flog.info
 #' @importFrom httr GET content stop_for_status
-#' @importFrom stringr str_detect str_split_fixed str_replace_all
 #' @param es_host A string identifying an Elasticsearch host. This should be of
 #'                the form \code{[transfer_protocol][hostname]:[port]}. For example,
 #'                \code{'http://myindex.thing.com:9200'}.
@@ -130,9 +128,9 @@ get_counts <- function(field
 #'                   get the mapping for all indexes. Names of indexes can be
 #'                   treated as regular expressions.
 #' @export
-#' @return A data.table containing four columns: index, type, field, and datatype
+#' @return A data.table containing four columns: index, type, field, and data_type
 #' @examples \dontrun{
-#' # get the mapping for all types in the ticket_sales and customers indexes
+#' # get the mapping for all indexed fields in the ticket_sales and customers indexes
 #' mappingDT <- retrieve_mapping(es_host = "http://es.custdb.mycompany.com:9200"
 #'                               , es_indexes = c("ticket_sales", "indexes"))
 #' }
@@ -159,13 +157,26 @@ get_fields <- function(es_host
     }
     
     ########################## make the query ################################
+    futile.logger::flog.info(paste('Getting indexed fields for indexes:', indexes))
+    
     result <- httr::GET(url = url)
     httr::stop_for_status(result)
     resultContent <- httr::content(result)
     
+    ##################### return the flattened result #########################
+    return(.flatten_mapping(mapping = resultContent))
+}
+
+# [title] Flatten a mapping list of field name to data type into a data table
+# [mapping] A list of json that is returned from a request to the mappings API
+#' @importFrom data.table := data.table setnames uniqueN
+#' @importFrom futile.logger flog.info
+#' @importFrom stringr str_detect str_split_fixed str_replace_all
+.flatten_mapping <- function(mapping) {
+    
     ######################### parse the result ###############################
     # flatten the list object that is returned from the query
-    flattened <- unlist(resultContent)
+    flattened <- unlist(mapping)
     
     # the names of the flattened object has the index, type, and field name
     # however, it also has extra terms that we can use to split the name
@@ -173,11 +184,12 @@ get_fields <- function(es_host
     mappingCols <- stringr::str_split_fixed(names(flattened), '\\.(mappings|properties)\\.', n = 3)
     
     # convert to data table and add the data type column
-    mappingDT <- data.table::data.table(mappingCols, as.character(flattened))
-    data.table::setnames(mappingDT, c('index', 'type', 'field', 'datatype'))
+    mappingDT <- data.table::data.table(meta = mappingCols, data_type = as.character(flattened))
+    newColNames <- c('index', 'type', 'field', 'data_type')
+    data.table::setnames(mappingDT, old = names(mappingDT), new = newColNames)
     
     # remove any rows, where the field does not end in ".type" to remove meta info
-    mappingDT <- mappingDT[stringr::str_detect(field, '\\.type')]
+    mappingDT <- mappingDT[stringr::str_detect(field, '\\.type$')]
     
     # mappings in nested objects have sub-fields called properties
     # mappings of fields that are indexed in different ways have multiple fields
@@ -185,5 +197,10 @@ get_fields <- function(es_host
     metaRegEx <- '\\.(properties|fields|type)'
     mappingDT[, field := stringr::str_replace_all(field, metaRegEx, '')]
     
+    # log some information about this request to the user
+    numFields <- nrow(mappingDT)
+    numIndex <- mappingDT[, data.table::uniqueN(index)]
+    futile.logger::flog.info(paste('Retrieved', numFields, 'fields across', numIndex, 'indexes'))
+    
     return(mappingDT)
 }
diff --git a/r-pkg/R/uptasticsearch.R b/r-pkg/R/uptasticsearch.R
index 196d28b..b3497dd 100644
--- a/r-pkg/R/uptasticsearch.R
+++ b/r-pkg/R/uptasticsearch.R
@@ -9,4 +9,5 @@ utils::globalVariables(c('.'
                          , '.I'
                          , '.id'
                          , 'field'
+                         , 'index'
                        ))
\ No newline at end of file
diff --git a/r-pkg/inst/testdata/one_index_mapping.json b/r-pkg/inst/testdata/one_index_mapping.json
new file mode 100644
index 0000000..d1c2c68
--- /dev/null
+++ b/r-pkg/inst/testdata/one_index_mapping.json
@@ -0,0 +1,29 @@
+{
+    "basketball": {
+        "mappings": {
+            "players": {
+                "properties": {
+                    "team": {
+                        "type": "keyword"
+                    },
+                    "name": {
+                        "properties": {
+                            "first": {
+                                "type": "text"
+                            },
+                            "last": {
+                                "type": "text"
+                            }
+                        }
+                    },
+                    "age": {
+                        "type": "integer"
+                    },
+                    "position": {
+                        "type": "keyword"
+                    }
+                }
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/r-pkg/inst/testdata/two_index_mapping.json b/r-pkg/inst/testdata/two_index_mapping.json
new file mode 100644
index 0000000..e9f85f1
--- /dev/null
+++ b/r-pkg/inst/testdata/two_index_mapping.json
@@ -0,0 +1,52 @@
+{
+  "company": {
+    "mappings": {
+      "building": {
+        "properties": {
+          "id": {
+            "type": "long"
+          },
+          "address": {
+            "type": "text",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "ignore_above": 256
+              }
+            }
+          }
+        }
+      }
+    }
+  },
+  "hotel": {
+    "mappings": {
+      "bed_room": {
+        "properties": {
+          "num_beds": {
+            "type": "integer"
+          },
+          "description": {
+            "type": "text"
+          }
+        }
+      },
+      "conference_room": {
+        "properties": {
+          "num_people": {
+            "type": "integer"
+          },
+          "purpose": {
+            "type": "text",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "ignore_above": 256
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/r-pkg/man/get_fields.Rd b/r-pkg/man/get_fields.Rd
index 66ae3ef..4b8b40c 100644
--- a/r-pkg/man/get_fields.Rd
+++ b/r-pkg/man/get_fields.Rd
@@ -17,7 +17,7 @@ get the mapping for all indexes. Names of indexes can be
 treated as regular expressions.}
 }
 \value{
-A data.table containing four columns: index, type, field, and datatype
+A data.table containing four columns: index, type, field, and data_type
 }
 \description{
 For a given Elasticsearch index, return the mapping from field name
@@ -25,7 +25,7 @@ For a given Elasticsearch index, return the mapping from field name
 }
 \examples{
 \dontrun{
-# get the mapping for all types in the ticket_sales and customers indexes
+# get the mapping for all indexed fields in the ticket_sales and customers indexes
 mappingDT <- retrieve_mapping(es_host = "http://es.custdb.mycompany.com:9200"
                               , es_indexes = c("ticket_sales", "indexes"))
 }
diff --git a/r-pkg/tests/testthat/test-elasticsearch_eda_funs.R b/r-pkg/tests/testthat/test-elasticsearch_eda_funs.R
new file mode 100644
index 0000000..91abdce
--- /dev/null
+++ b/r-pkg/tests/testthat/test-elasticsearch_eda_funs.R
@@ -0,0 +1,63 @@
+context("Elasticsearch eda functions")
+
+# Configure logger (suppress all logs in testing)
+loggerOptions <- futile.logger::logger.options()
+if (!identical(loggerOptions, list())){
+    origLogThreshold <- loggerOptions[[1]][['threshold']]    
+} else {
+    origLogThreshold <- futile.logger::INFO
+}
+futile.logger::flog.threshold(0)
+
+#--- 1. get_counts
+
+
+
+#--- 2. get_fields
+
+    # Gives an informative error if es_indexes is NULL or an empty string
+    test_that("get_fields should give an informative error if es_indexes is NULL or an empty string",
+              {
+                  expect_error(get_fields(es_host = "http://es.custdb.mycompany.com:9200"
+                                          , es_indexes = NULL),
+                               regexp = "get_fields must be passed a valid es_indexes")
+                  expect_error(get_fields(es_host = "http://es.custdb.mycompany.com:9200"
+                                          , es_indexes = ''),
+                               regexp = "get_fields must be passed a valid es_indexes")
+              }
+    )
+    
+    # Works if one index is passed
+    test_that("get_fields should work if the mapping for one index is provided",
+              {
+                  test_json <- system.file("testdata", "one_index_mapping.json", package = "uptasticsearch")
+                  mapping <- jsonlite::fromJSON(txt = test_json)
+                  mappingDT <- uptasticsearch:::.flatten_mapping(mapping = mapping)
+                  expected <- data.table::data.table(
+                      index = rep('basketball', 5)
+                      , type = rep('players', 5)
+                      , field = c('team', 'name.first', 'name.last', 'age', 'position')
+                      , data_type = c('keyword', 'text', 'text', 'integer', 'keyword')
+                  )
+                  expect_identical(mappingDT, expected)
+              }
+    )
+    
+    # works if multiple indexes are passed
+    test_that("get_fields should work if the mapping for multiple indexes are provided",
+              {
+                  test_json <- system.file("testdata", "two_index_mapping.json", package = "uptasticsearch")
+                  mapping <- jsonlite::fromJSON(txt = test_json)
+                  mappingDT <- uptasticsearch:::.flatten_mapping(mapping = mapping)
+                  expected <- data.table::data.table(
+                      index = c(rep('company', 3), rep('hotel', 5))
+                      , type = c(rep('building', 3), rep('bed_room', 2), rep('conference_room', 3))
+                      , field = c('id', 'address', 'address.keyword', 'num_beds', 'description'
+                                  , 'num_people', 'purpose', 'purpose.keyword')
+                      , data_type = c('long', 'text', 'keyword', 'integer', 'text', 'integer'
+                                      , 'text', 'keyword')
+                  )
+                  expect_identical(mappingDT, expected)
+              }
+    )
+    
\ No newline at end of file

From a00cd2dd8a61578d3893f9e73761e20b57592438 Mon Sep 17 00:00:00 2001
From: Michael Frasco <mfrasco6@gmail.com>
Date: Sun, 23 Jul 2017 20:33:35 -0700
Subject: [PATCH 07/13] added support for aliases

---
 r-pkg/NAMESPACE                               |  2 +
 r-pkg/R/elasticsearch_eda_funs.R              | 76 ++++++++++++++++---
 r-pkg/man/get_fields.Rd                       |  6 +-
 .../testthat/test-elasticsearch_eda_funs.R    | 29 ++++++-
 4 files changed, 100 insertions(+), 13 deletions(-)

diff --git a/r-pkg/NAMESPACE b/r-pkg/NAMESPACE
index 4fb4dba..b741b4a 100644
--- a/r-pkg/NAMESPACE
+++ b/r-pkg/NAMESPACE
@@ -34,6 +34,8 @@ importFrom(purrr,simplify)
 importFrom(purrr,transpose)
 importFrom(stringr,str_detect)
 importFrom(stringr,str_extract)
+importFrom(stringr,str_replace)
 importFrom(stringr,str_replace_all)
+importFrom(stringr,str_split)
 importFrom(stringr,str_split_fixed)
 importFrom(uuid,UUIDgenerate)
diff --git a/r-pkg/R/elasticsearch_eda_funs.R b/r-pkg/R/elasticsearch_eda_funs.R
index 23cd17b..77f95d8 100644
--- a/r-pkg/R/elasticsearch_eda_funs.R
+++ b/r-pkg/R/elasticsearch_eda_funs.R
@@ -120,6 +120,7 @@ get_counts <- function(field
 #'              to data type for all indexed fields.
 #' @importFrom futile.logger flog.fatal flog.info
 #' @importFrom httr GET content stop_for_status
+#' @importFrom data.table := uniqueN
 #' @param es_host A string identifying an Elasticsearch host. This should be of
 #'                the form \code{[transfer_protocol][hostname]:[port]}. For example,
 #'                \code{'http://myindex.thing.com:9200'}.
@@ -127,6 +128,9 @@ get_counts <- function(field
 #'                   which to get mappings. Default, is \code{'_all'}, which means
 #'                   get the mapping for all indexes. Names of indexes can be
 #'                   treated as regular expressions.
+#' @param use_alias A boolean flag that controls whether the true Elasticsearch
+#'                  index name or the aliased name for an index is returned.
+#'                  Default is \code{TRUE}.
 #' @export
 #' @return A data.table containing four columns: index, type, field, and data_type
 #' @examples \dontrun{
@@ -136,6 +140,7 @@ get_counts <- function(field
 #' }
 get_fields <- function(es_host
                        , es_indexes = '_all'
+                       , use_alias = TRUE
 ) {
     
     # Input checking
@@ -163,14 +168,30 @@ get_fields <- function(es_host
     httr::stop_for_status(result)
     resultContent <- httr::content(result)
     
-    ##################### return the flattened result #########################
-    return(.flatten_mapping(mapping = resultContent))
+    ######################### flatten the result ##############################
+    mappingDT <- .flatten_mapping(mapping = resultContent)
+    
+    ##################### get aliases for index names #########################
+    if (use_alias) {
+        aliasDT <- .get_aliases(es_host = es_host)
+        if (!is.null(aliasDT)) {
+            lookup <- aliasDT[['alias']]
+            names(lookup) <- aliasDT[['index']]
+            mappingDT[index %in% names(lookup), index := lookup[index]]
+        }
+    }
+    
+    # log some information about this request to the user
+    numFields <- nrow(mappingDT)
+    numIndex <- mappingDT[, data.table::uniqueN(index)]
+    futile.logger::flog.info(paste('Retrieved', numFields, 'fields across', numIndex, 'indexes'))
+    
+    return(mappingDT)
 }
 
 # [title] Flatten a mapping list of field name to data type into a data table
 # [mapping] A list of json that is returned from a request to the mappings API
-#' @importFrom data.table := data.table setnames uniqueN
-#' @importFrom futile.logger flog.info
+#' @importFrom data.table := data.table setnames
 #' @importFrom stringr str_detect str_split_fixed str_replace_all
 .flatten_mapping <- function(mapping) {
     
@@ -197,10 +218,47 @@ get_fields <- function(es_host
     metaRegEx <- '\\.(properties|fields|type)'
     mappingDT[, field := stringr::str_replace_all(field, metaRegEx, '')]
     
-    # log some information about this request to the user
-    numFields <- nrow(mappingDT)
-    numIndex <- mappingDT[, data.table::uniqueN(index)]
-    futile.logger::flog.info(paste('Retrieved', numFields, 'fields across', numIndex, 'indexes'))
-    
     return(mappingDT)
 }
+
+# [title] Get a data.table containing names of indexes and aliases
+# [es_host] A string identifying an Elasticsearch host.
+#' @importFrom httr content GET stop_for_status
+.get_aliases <- function(es_host) {
+    
+    # construct the url to the alias endpoint
+    url <- paste0(es_host, '/_cat/aliases')
+    
+    # make the request
+    result <- httr::GET(url = url)
+    httr::stop_for_status(result)
+    resultContent <- httr::content(result)
+    
+    if (is.null(resultContent)) {
+        # there are no aliases in this Elasticsearch cluster
+        return(NULL)
+    } else {
+        return(.process_alias(alias_string = resultContent))
+    }
+}
+
+# [title] Process the string returned by the GET alias API into a data table
+# [alias_string] A string returned by the alias API with index and alias name
+#' @importFrom stringr str_replace str_split
+#' @importFrom data.table as.data.table setnames
+.process_alias <- function(alias_string) {
+    # remove the new line at the end of the string, if it exists
+    aliasString <- stringr::str_replace(alias_string, '\n$', '')
+    
+    # split each entry, separated by a new line, into a vector in a list
+    aliases <- stringr::str_split(aliasString, '\n')[[1]]
+    
+    # remove white space and only take the first two entries
+    aliases <- stringr::str_split(aliases, '\\s+')
+    aliases <- lapply(aliases, function(pair) pair[1:2])
+    
+    # create a data table from the resulting list
+    aliasDT <- data.table::as.data.table(matrix(unlist(aliases), byrow = TRUE, ncol = 2))
+    data.table::setnames(aliasDT, old = colnames(aliasDT), new = c('alias', 'index'))
+    return(aliasDT)
+}
diff --git a/r-pkg/man/get_fields.Rd b/r-pkg/man/get_fields.Rd
index 4b8b40c..cb36899 100644
--- a/r-pkg/man/get_fields.Rd
+++ b/r-pkg/man/get_fields.Rd
@@ -4,7 +4,7 @@
 \alias{get_fields}
 \title{Get the names and data types of the indexed fields in an index}
 \usage{
-get_fields(es_host, es_indexes = "_all")
+get_fields(es_host, es_indexes = "_all", use_alias = TRUE)
 }
 \arguments{
 \item{es_host}{A string identifying an Elasticsearch host. This should be of
@@ -15,6 +15,10 @@ the form \code{[transfer_protocol][hostname]:[port]}. For example,
 which to get mappings. Default, is \code{'_all'}, which means
 get the mapping for all indexes. Names of indexes can be
 treated as regular expressions.}
+
+\item{use_alias}{A boolean flag that controls whether the true Elasticsearch
+index name or the aliased name for an index is returned.
+Default is \code{TRUE}.}
 }
 \value{
 A data.table containing four columns: index, type, field, and data_type
diff --git a/r-pkg/tests/testthat/test-elasticsearch_eda_funs.R b/r-pkg/tests/testthat/test-elasticsearch_eda_funs.R
index 91abdce..dd8fdef 100644
--- a/r-pkg/tests/testthat/test-elasticsearch_eda_funs.R
+++ b/r-pkg/tests/testthat/test-elasticsearch_eda_funs.R
@@ -27,8 +27,10 @@ futile.logger::flog.threshold(0)
               }
     )
     
+#--- 3. .flatten_mapping
+    
     # Works if one index is passed
-    test_that("get_fields should work if the mapping for one index is provided",
+    test_that(".flatten_mapping should work if the mapping for one index is provided",
               {
                   test_json <- system.file("testdata", "one_index_mapping.json", package = "uptasticsearch")
                   mapping <- jsonlite::fromJSON(txt = test_json)
@@ -44,7 +46,7 @@ futile.logger::flog.threshold(0)
     )
     
     # works if multiple indexes are passed
-    test_that("get_fields should work if the mapping for multiple indexes are provided",
+    test_that(".flatten_mapping should work if the mapping for multiple indexes are provided",
               {
                   test_json <- system.file("testdata", "two_index_mapping.json", package = "uptasticsearch")
                   mapping <- jsonlite::fromJSON(txt = test_json)
@@ -60,4 +62,25 @@ futile.logger::flog.threshold(0)
                   expect_identical(mappingDT, expected)
               }
     )
-    
\ No newline at end of file
+
+#--- 4. .process_alias
+    
+    # works if one alias is passed
+    test_that(".process_alias works if one alias is included",
+              {
+                  alias_string <- 'dwm shakespeare - - -\n'
+                  aliasDT <- uptasticsearch:::.process_alias(alias_string = alias_string)
+                  expected <- data.table::data.table(alias = 'dwm', index = 'shakespeare')
+                  expect_identical(aliasDT, expected)
+              }
+    )
+    
+    # works if multiple aliases are passed
+    test_that(".process_alias works if one alias is included",
+              {
+                  alias_string <- 'dwm   shakespeare - - -\nmoney bank        - - -\n'
+                  aliasDT <- uptasticsearch:::.process_alias(alias_string = alias_string)
+                  expected <- data.table::data.table(alias = c('dwm', 'money'), index = c('shakespeare', 'bank'))
+                  expect_identical(aliasDT, expected)
+              }
+    )

From 38b2d16e110825050c9d500439a47c5294c4617b Mon Sep 17 00:00:00 2001
From: Michael Frasco <mfrasco6@gmail.com>
Date: Sun, 23 Jul 2017 22:39:00 -0700
Subject: [PATCH 08/13] removed use_alias argument

---
 r-pkg/R/elasticsearch_eda_funs.R | 20 +++++++-------------
 r-pkg/man/get_fields.Rd          |  6 +-----
 2 files changed, 8 insertions(+), 18 deletions(-)

diff --git a/r-pkg/R/elasticsearch_eda_funs.R b/r-pkg/R/elasticsearch_eda_funs.R
index 77f95d8..ed05aa8 100644
--- a/r-pkg/R/elasticsearch_eda_funs.R
+++ b/r-pkg/R/elasticsearch_eda_funs.R
@@ -128,9 +128,6 @@ get_counts <- function(field
 #'                   which to get mappings. Default, is \code{'_all'}, which means
 #'                   get the mapping for all indexes. Names of indexes can be
 #'                   treated as regular expressions.
-#' @param use_alias A boolean flag that controls whether the true Elasticsearch
-#'                  index name or the aliased name for an index is returned.
-#'                  Default is \code{TRUE}.
 #' @export
 #' @return A data.table containing four columns: index, type, field, and data_type
 #' @examples \dontrun{
@@ -140,7 +137,6 @@ get_counts <- function(field
 #' }
 get_fields <- function(es_host
                        , es_indexes = '_all'
-                       , use_alias = TRUE
 ) {
     
     # Input checking
@@ -166,19 +162,17 @@ get_fields <- function(es_host
     
     result <- httr::GET(url = url)
     httr::stop_for_status(result)
-    resultContent <- httr::content(result)
+    resultContent <- httr::content(result, as = 'parsed')
     
     ######################### flatten the result ##############################
     mappingDT <- .flatten_mapping(mapping = resultContent)
     
     ##################### get aliases for index names #########################
-    if (use_alias) {
-        aliasDT <- .get_aliases(es_host = es_host)
-        if (!is.null(aliasDT)) {
-            lookup <- aliasDT[['alias']]
-            names(lookup) <- aliasDT[['index']]
-            mappingDT[index %in% names(lookup), index := lookup[index]]
-        }
+    aliasDT <- .get_aliases(es_host = es_host)
+    if (!is.null(aliasDT)) {
+        lookup <- aliasDT[['alias']]
+        names(lookup) <- aliasDT[['index']]
+        mappingDT[index %in% names(lookup), index := lookup[index]]
     }
     
     # log some information about this request to the user
@@ -232,7 +226,7 @@ get_fields <- function(es_host
     # make the request
     result <- httr::GET(url = url)
     httr::stop_for_status(result)
-    resultContent <- httr::content(result)
+    resultContent <- httr::content(result, as = 'text')
     
     if (is.null(resultContent)) {
         # there are no aliases in this Elasticsearch cluster
diff --git a/r-pkg/man/get_fields.Rd b/r-pkg/man/get_fields.Rd
index cb36899..4b8b40c 100644
--- a/r-pkg/man/get_fields.Rd
+++ b/r-pkg/man/get_fields.Rd
@@ -4,7 +4,7 @@
 \alias{get_fields}
 \title{Get the names and data types of the indexed fields in an index}
 \usage{
-get_fields(es_host, es_indexes = "_all", use_alias = TRUE)
+get_fields(es_host, es_indexes = "_all")
 }
 \arguments{
 \item{es_host}{A string identifying an Elasticsearch host. This should be of
@@ -15,10 +15,6 @@ the form \code{[transfer_protocol][hostname]:[port]}. For example,
 which to get mappings. Default, is \code{'_all'}, which means
 get the mapping for all indexes. Names of indexes can be
 treated as regular expressions.}
-
-\item{use_alias}{A boolean flag that controls whether the true Elasticsearch
-index name or the aliased name for an index is returned.
-Default is \code{TRUE}.}
 }
 \value{
 A data.table containing four columns: index, type, field, and data_type

From 91b4c56d9ac186431ad875396d2fca49ba8007f0 Mon Sep 17 00:00:00 2001
From: Michael Frasco <mfrasco6@gmail.com>
Date: Tue, 25 Jul 2017 11:54:13 -0700
Subject: [PATCH 09/13] added mocked test for get_fields

---
 .../testthat/test-elasticsearch_eda_funs.R    | 30 +++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/r-pkg/tests/testthat/test-elasticsearch_eda_funs.R b/r-pkg/tests/testthat/test-elasticsearch_eda_funs.R
index dd8fdef..74b2958 100644
--- a/r-pkg/tests/testthat/test-elasticsearch_eda_funs.R
+++ b/r-pkg/tests/testthat/test-elasticsearch_eda_funs.R
@@ -27,6 +27,36 @@ futile.logger::flog.threshold(0)
               }
     )
     
+    # works as expected when mocked
+    test_that('get_fields works as expected when mocked',
+              {
+                  test_json <- system.file("testdata", "two_index_mapping.json", package = "uptasticsearch")
+                  aliasDT <- data.table::data.table(alias = c('alias1', 'alias2')
+                                                    , index = c('company', 'otherIndex'))
+                  testthat::with_mock(
+                      `httr::stop_for_status` = function(...) {return(NULL)},
+                      `httr::GET` = function(...) {return(NULL)},
+                      `httr::content` = function(...) {return(jsonlite::fromJSON(txt = test_json))},
+                      `uptasticsearch::.get_aliases` = function(...) {return(aliasDT)},
+                      {
+                          outDT <- get_fields(es_host = 'http://db.mycompany.com:9200'
+                                              , es_indexes = c('company', 'hotel'))
+                          data.table::setkey(outDT, NULL)
+                          expected <- data.table::data.table(
+                              index = c(rep('alias1', 3), rep('hotel', 5))
+                              , type = c(rep('building', 3), rep('bed_room', 2), rep('conference_room', 3))
+                              , field = c('id', 'address', 'address.keyword', 'num_beds', 'description'
+                                          , 'num_people', 'purpose', 'purpose.keyword')
+                              , data_type = c('long', 'text', 'keyword', 'integer', 'text', 'integer'
+                                              , 'text', 'keyword')
+                          )
+                          expect_identical(outDT, expected)
+                      }
+                  )
+              }
+    )
+    
+    
 #--- 3. .flatten_mapping
     
     # Works if one index is passed

From d25b6c646b17803d3ef6619fdad28b9889e654e0 Mon Sep 17 00:00:00 2001
From: Michael Frasco <mfrasco6@gmail.com>
Date: Wed, 26 Jul 2017 19:38:41 -0700
Subject: [PATCH 10/13] changed indexes to indices

---
 r-pkg/R/elasticsearch_eda_funs.R              | 26 +++++++++----------
 r-pkg/man/get_fields.Rd                       | 10 +++----
 .../testthat/test-elasticsearch_eda_funs.R    | 18 ++++++-------
 3 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/r-pkg/R/elasticsearch_eda_funs.R b/r-pkg/R/elasticsearch_eda_funs.R
index ed05aa8..00061c0 100644
--- a/r-pkg/R/elasticsearch_eda_funs.R
+++ b/r-pkg/R/elasticsearch_eda_funs.R
@@ -124,19 +124,19 @@ get_counts <- function(field
 #' @param es_host A string identifying an Elasticsearch host. This should be of
 #'                the form \code{[transfer_protocol][hostname]:[port]}. For example,
 #'                \code{'http://myindex.thing.com:9200'}.
-#' @param es_indexes A character vector that contains the names of indexes for
+#' @param es_indices A character vector that contains the names of indices for
 #'                   which to get mappings. Default, is \code{'_all'}, which means
-#'                   get the mapping for all indexes. Names of indexes can be
+#'                   get the mapping for all indices Names of indices can be
 #'                   treated as regular expressions.
 #' @export
 #' @return A data.table containing four columns: index, type, field, and data_type
 #' @examples \dontrun{
-#' # get the mapping for all indexed fields in the ticket_sales and customers indexes
+#' # get the mapping for all indexed fields in the ticket_sales and customers indices
 #' mappingDT <- retrieve_mapping(es_host = "http://es.custdb.mycompany.com:9200"
-#'                               , es_indexes = c("ticket_sales", "indexes"))
+#'                               , es_indices = c("ticket_sales", "customers"))
 #' }
 get_fields <- function(es_host
-                       , es_indexes = '_all'
+                       , es_indices = '_all'
 ) {
     
     # Input checking
@@ -144,21 +144,21 @@ get_fields <- function(es_host
     
     # collapse character vectors into comma separated strings. If any arguments
     # are NULL, create an empty string
-    indexes <- paste(es_indexes, collapse = ',')
+    indices <- paste(es_indices, collapse = ',')
     
     ########################## build the query ################################
-    if (nchar(indexes) > 0) {
-        url <- paste(url, indexes, '_mapping', sep = '/')
+    if (nchar(indices) > 0) {
+        url <- paste(url, indices, '_mapping', sep = '/')
     } else {
-        msg <- paste("get_fields must be passed a valid es_indexes."
-                     , "You provided", paste(es_indexes, collapse = ', ')
+        msg <- paste("get_fields must be passed a valid es_indices."
+                     , "You provided", paste(es_indices, collapse = ', ')
                      , 'which resulted in an empty string')
         futile.logger::flog.fatal(msg)
         stop(msg)
     }
     
     ########################## make the query ################################
-    futile.logger::flog.info(paste('Getting indexed fields for indexes:', indexes))
+    futile.logger::flog.info(paste('Getting indexed fields for indices:', indices))
     
     result <- httr::GET(url = url)
     httr::stop_for_status(result)
@@ -178,7 +178,7 @@ get_fields <- function(es_host
     # log some information about this request to the user
     numFields <- nrow(mappingDT)
     numIndex <- mappingDT[, data.table::uniqueN(index)]
-    futile.logger::flog.info(paste('Retrieved', numFields, 'fields across', numIndex, 'indexes'))
+    futile.logger::flog.info(paste('Retrieved', numFields, 'fields across', numIndex, 'indices'))
     
     return(mappingDT)
 }
@@ -215,7 +215,7 @@ get_fields <- function(es_host
     return(mappingDT)
 }
 
-# [title] Get a data.table containing names of indexes and aliases
+# [title] Get a data.table containing names of indices and aliases
 # [es_host] A string identifying an Elasticsearch host.
 #' @importFrom httr content GET stop_for_status
 .get_aliases <- function(es_host) {
diff --git a/r-pkg/man/get_fields.Rd b/r-pkg/man/get_fields.Rd
index 4b8b40c..3d32dd1 100644
--- a/r-pkg/man/get_fields.Rd
+++ b/r-pkg/man/get_fields.Rd
@@ -4,16 +4,16 @@
 \alias{get_fields}
 \title{Get the names and data types of the indexed fields in an index}
 \usage{
-get_fields(es_host, es_indexes = "_all")
+get_fields(es_host, es_indices = "_all")
 }
 \arguments{
 \item{es_host}{A string identifying an Elasticsearch host. This should be of
 the form \code{[transfer_protocol][hostname]:[port]}. For example,
 \code{'http://myindex.thing.com:9200'}.}
 
-\item{es_indexes}{A character vector that contains the names of indexes for
+\item{es_indices}{A character vector that contains the names of indices for
 which to get mappings. Default, is \code{'_all'}, which means
-get the mapping for all indexes. Names of indexes can be
+get the mapping for all indices Names of indices can be
 treated as regular expressions.}
 }
 \value{
@@ -25,8 +25,8 @@ For a given Elasticsearch index, return the mapping from field name
 }
 \examples{
 \dontrun{
-# get the mapping for all indexed fields in the ticket_sales and customers indexes
+# get the mapping for all indexed fields in the ticket_sales and customers indices
 mappingDT <- retrieve_mapping(es_host = "http://es.custdb.mycompany.com:9200"
-                              , es_indexes = c("ticket_sales", "indexes"))
+                              , es_indices = c("ticket_sales", "customers"))
 }
 }
diff --git a/r-pkg/tests/testthat/test-elasticsearch_eda_funs.R b/r-pkg/tests/testthat/test-elasticsearch_eda_funs.R
index 74b2958..186e8b3 100644
--- a/r-pkg/tests/testthat/test-elasticsearch_eda_funs.R
+++ b/r-pkg/tests/testthat/test-elasticsearch_eda_funs.R
@@ -15,15 +15,15 @@ futile.logger::flog.threshold(0)
 
 #--- 2. get_fields
 
-    # Gives an informative error if es_indexes is NULL or an empty string
-    test_that("get_fields should give an informative error if es_indexes is NULL or an empty string",
+    # Gives an informative error if es_indices is NULL or an empty string
+    test_that("get_fields should give an informative error if es_indices is NULL or an empty string",
               {
                   expect_error(get_fields(es_host = "http://es.custdb.mycompany.com:9200"
-                                          , es_indexes = NULL),
-                               regexp = "get_fields must be passed a valid es_indexes")
+                                          , es_indices = NULL),
+                               regexp = "get_fields must be passed a valid es_indices")
                   expect_error(get_fields(es_host = "http://es.custdb.mycompany.com:9200"
-                                          , es_indexes = ''),
-                               regexp = "get_fields must be passed a valid es_indexes")
+                                          , es_indices = ''),
+                               regexp = "get_fields must be passed a valid es_indices")
               }
     )
     
@@ -40,7 +40,7 @@ futile.logger::flog.threshold(0)
                       `uptasticsearch::.get_aliases` = function(...) {return(aliasDT)},
                       {
                           outDT <- get_fields(es_host = 'http://db.mycompany.com:9200'
-                                              , es_indexes = c('company', 'hotel'))
+                                              , es_indices = c('company', 'hotel'))
                           data.table::setkey(outDT, NULL)
                           expected <- data.table::data.table(
                               index = c(rep('alias1', 3), rep('hotel', 5))
@@ -75,8 +75,8 @@ futile.logger::flog.threshold(0)
               }
     )
     
-    # works if multiple indexes are passed
-    test_that(".flatten_mapping should work if the mapping for multiple indexes are provided",
+    # works if multiple indices are passed
+    test_that(".flatten_mapping should work if the mapping for multiple indices are provided",
               {
                   test_json <- system.file("testdata", "two_index_mapping.json", package = "uptasticsearch")
                   mapping <- jsonlite::fromJSON(txt = test_json)

From 944d5e1285d22a0465697d62fbc00002500923e1 Mon Sep 17 00:00:00 2001
From: Michael Frasco <mfrasco6@gmail.com>
Date: Fri, 28 Jul 2017 07:39:57 -0700
Subject: [PATCH 11/13] fixed typo in roxygen documentation

---
 r-pkg/R/elasticsearch_eda_funs.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/r-pkg/R/elasticsearch_eda_funs.R b/r-pkg/R/elasticsearch_eda_funs.R
index 00061c0..a587e87 100644
--- a/r-pkg/R/elasticsearch_eda_funs.R
+++ b/r-pkg/R/elasticsearch_eda_funs.R
@@ -125,8 +125,8 @@ get_counts <- function(field
 #'                the form \code{[transfer_protocol][hostname]:[port]}. For example,
 #'                \code{'http://myindex.thing.com:9200'}.
 #' @param es_indices A character vector that contains the names of indices for
-#'                   which to get mappings. Default, is \code{'_all'}, which means
-#'                   get the mapping for all indices Names of indices can be
+#'                   which to get mappings. Default is \code{'_all'}, which means
+#'                   get the mapping for all indices. Names of indices can be
 #'                   treated as regular expressions.
 #' @export
 #' @return A data.table containing four columns: index, type, field, and data_type

From 055982d51b6a815406a8425e51666726971c1059 Mon Sep 17 00:00:00 2001
From: Michael Frasco <mfrasco6@gmail.com>
Date: Fri, 28 Jul 2017 14:24:18 -0700
Subject: [PATCH 12/13] simplied process_alias to use read.table instead of
 string parsing

---
 r-pkg/NAMESPACE                  |  3 +--
 r-pkg/R/elasticsearch_eda_funs.R | 21 ++++++---------------
 r-pkg/R/uptasticsearch.R         |  2 ++
 r-pkg/man/get_fields.Rd          |  4 ++--
 4 files changed, 11 insertions(+), 19 deletions(-)

diff --git a/r-pkg/NAMESPACE b/r-pkg/NAMESPACE
index b741b4a..2b7aab3 100644
--- a/r-pkg/NAMESPACE
+++ b/r-pkg/NAMESPACE
@@ -34,8 +34,7 @@ importFrom(purrr,simplify)
 importFrom(purrr,transpose)
 importFrom(stringr,str_detect)
 importFrom(stringr,str_extract)
-importFrom(stringr,str_replace)
 importFrom(stringr,str_replace_all)
-importFrom(stringr,str_split)
 importFrom(stringr,str_split_fixed)
+importFrom(utils,read.table)
 importFrom(uuid,UUIDgenerate)
diff --git a/r-pkg/R/elasticsearch_eda_funs.R b/r-pkg/R/elasticsearch_eda_funs.R
index a587e87..50556da 100644
--- a/r-pkg/R/elasticsearch_eda_funs.R
+++ b/r-pkg/R/elasticsearch_eda_funs.R
@@ -238,21 +238,12 @@ get_fields <- function(es_host
 
 # [title] Process the string returned by the GET alias API into a data table
 # [alias_string] A string returned by the alias API with index and alias name
-#' @importFrom stringr str_replace str_split
-#' @importFrom data.table as.data.table setnames
+#' @importFrom data.table data.table
+#' @importFrom utils read.table
 .process_alias <- function(alias_string) {
-    # remove the new line at the end of the string, if it exists
-    aliasString <- stringr::str_replace(alias_string, '\n$', '')
+    # process the string provided by the /_cat/aliases API into a data.frame and then a data.table
+    aliasDT <- data.table::data.table(utils::read.table(text = alias_string, stringsAsFactors = FALSE))
     
-    # split each entry, separated by a new line, into a vector in a list
-    aliases <- stringr::str_split(aliasString, '\n')[[1]]
-    
-    # remove white space and only take the first two entries
-    aliases <- stringr::str_split(aliases, '\\s+')
-    aliases <- lapply(aliases, function(pair) pair[1:2])
-    
-    # create a data table from the resulting list
-    aliasDT <- data.table::as.data.table(matrix(unlist(aliases), byrow = TRUE, ncol = 2))
-    data.table::setnames(aliasDT, old = colnames(aliasDT), new = c('alias', 'index'))
-    return(aliasDT)
+    # return only the first two columns
+    return(aliasDT[, .(alias = V1, index = V2)])
 }
diff --git a/r-pkg/R/uptasticsearch.R b/r-pkg/R/uptasticsearch.R
index b3497dd..75c9262 100644
--- a/r-pkg/R/uptasticsearch.R
+++ b/r-pkg/R/uptasticsearch.R
@@ -10,4 +10,6 @@ utils::globalVariables(c('.'
                          , '.id'
                          , 'field'
                          , 'index'
+                         , 'V1'
+                         , 'V2'
                        ))
\ No newline at end of file
diff --git a/r-pkg/man/get_fields.Rd b/r-pkg/man/get_fields.Rd
index 3d32dd1..fe558aa 100644
--- a/r-pkg/man/get_fields.Rd
+++ b/r-pkg/man/get_fields.Rd
@@ -12,8 +12,8 @@ the form \code{[transfer_protocol][hostname]:[port]}. For example,
 \code{'http://myindex.thing.com:9200'}.}
 
 \item{es_indices}{A character vector that contains the names of indices for
-which to get mappings. Default, is \code{'_all'}, which means
-get the mapping for all indices Names of indices can be
+which to get mappings. Default is \code{'_all'}, which means
+get the mapping for all indices. Names of indices can be
 treated as regular expressions.}
 }
 \value{

From e392bfaca3bb11540903a9f1cd24810b8c0363af Mon Sep 17 00:00:00 2001
From: Michael Frasco <mfrasco6@gmail.com>
Date: Fri, 28 Jul 2017 14:26:42 -0700
Subject: [PATCH 13/13] adding utils to the import section in DESCRIPTION

---
 r-pkg/DESCRIPTION | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/r-pkg/DESCRIPTION b/r-pkg/DESCRIPTION
index 6675d48..c98d5fe 100644
--- a/r-pkg/DESCRIPTION
+++ b/r-pkg/DESCRIPTION
@@ -26,7 +26,8 @@ Imports:
     jsonlite,
     purrr,
     stringr,
-    uuid
+    uuid,
+    utils
 Suggests:
     knitr,
     testthat,