From e8e7f14fc3bc5f67e5dd94f5e41734469cb85160 Mon Sep 17 00:00:00 2001
From: Braedon Vickers <braedon.vickers@gmail.com>
Date: Tue, 14 Jul 2020 17:37:12 +0800
Subject: [PATCH] Fix nodes stats parsing for AWS managed Elasticsearch
 clusters

AWS doesn't include the path of data directories in the `_nodes/stats`
output. This caused an error when the parser tried to extract the path to
use as a label.

When parsing a list of buckets (like the list of data directories), the
parser will now fall back to using bucket's position in the list as the
bucket name if the expected bucket name key isn't present.
---
 prometheus_es_exporter/nodes_stats_parser.py | 13 ++++++--
 tests/test_nodes_stats_parser.py             | 33 ++++++++++++++++++++
 2 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/prometheus_es_exporter/nodes_stats_parser.py b/prometheus_es_exporter/nodes_stats_parser.py
index cb4d0bd..bc941bf 100644
--- a/prometheus_es_exporter/nodes_stats_parser.py
+++ b/prometheus_es_exporter/nodes_stats_parser.py
@@ -50,8 +50,17 @@ def parse_block(block, metric=None, labels=None):
             elif isinstance(value, list) and key in bucket_list_keys:
                 bucket_name_key = bucket_list_keys[key]
 
-                for n_value in value:
-                    bucket_name = n_value[bucket_name_key]
+                for n, n_value in enumerate(value):
+                    if bucket_name_key in n_value:
+                        bucket_name = n_value[bucket_name_key]
+                    else:
+                        # If the expected bucket name key isn't present, fall back to using the
+                        # bucket's position in the list as the bucket name. It's not guaranteed that
+                        # the buckets will remain in the same order between calls, but it's the best
+                        # option available.
+                        # e.g. For AWS managed Elasticsearch instances, the `path` key is missing
+                        #      from the filesystem `data` directory buckets.
+                        bucket_name = str(n)
                     metrics.extend(parse_block(n_value, metric=metric + [key], labels=merge_dicts_ordered(labels, {bucket_name_key: [bucket_name]})))
 
     return metrics
diff --git a/tests/test_nodes_stats_parser.py b/tests/test_nodes_stats_parser.py
index 3432b7c..8ecc8eb 100644
--- a/tests/test_nodes_stats_parser.py
+++ b/tests/test_nodes_stats_parser.py
@@ -743,6 +743,39 @@ def test_endpoint(self):
         result = convert_result(parse_response(response))
         self.assertEqual(expected, result)
 
+    def test_endpoint_aws(self):
+        # AWS managed Elasticsearch clusters return modified responses to the /_nodes/stats endpoint.
+        # (Response trimmed to data with meaningful differences to usual response)
+        response = {
+            'nodes': {
+                'bRcKq5zUTAuwNf4qvnXzIQ': {
+                    'name': 'bRcKq5z',
+                    'fs': {
+                        'data': [
+                            {
+                                # `path` and `mount` keys missing.
+                                # 'path': '/usr/share/elasticsearch/data/nodes/0',
+                                # 'mount': '/usr/share/elasticsearch/data (/dev/mapper/ubuntu--vg-root)',
+                                'type': 'ext4',
+                                'total_in_bytes': 233134567424,
+                                'free_in_bytes': 92206276608,
+                                'available_in_bytes': 80292356096,
+                                'spins': 'true'
+                            }
+                        ],
+                    },
+                }
+            }
+        }
+
+        expected = {
+            'fs_data_total_in_bytes{node_id="bRcKq5zUTAuwNf4qvnXzIQ",node_name="bRcKq5z",path="0"}': 233134567424,
+            'fs_data_free_in_bytes{node_id="bRcKq5zUTAuwNf4qvnXzIQ",node_name="bRcKq5z",path="0"}': 92206276608,
+            'fs_data_available_in_bytes{node_id="bRcKq5zUTAuwNf4qvnXzIQ",node_name="bRcKq5z",path="0"}': 80292356096,
+        }
+        result = convert_result(parse_response(response))
+        self.assertEqual(expected, result)
+
 
 if __name__ == '__main__':
     unittest.main()