ghdna · mary2501 · Feb 15, 2022 · Sep 17, 2024 · Sep 17, 2024
diff --git a/README.md b/README.md
@@ -142,19 +142,20 @@ const athenaExpress = new AthenaExpress(athenaExpressConfig);
 
 ###### Advance config Parameters:
 
-| Parameter  | Format | Default Value | Description |
-| ------------- | ------------- | ------------- | ------------- |
-| s3 | string  | `athena-express` creates a new bucket for you  | The location in Amazon S3 where your query results are stored, such as `s3://path/to/query/bucket/`. <br /> `athena-express` will create a new bucket for you if you don't provide a value for this param but sometimes that could cause an issue if you had recently deleted a bucket with the same name. (something to do with cache). When that happens, just specify you own bucket name. Alternatively you can also use `workgroup`.   |
-| db | string  | `default`  | Athena database name that the SQL queries should be executed in. When a `db` name is specified in the config, you can execute SQL queries without needing to explicitly mention DB name. e.g. <br />` athenaExpress.query("SELECT * FROM movies LIMIT 3")` <br /> as opposed to <br />` athenaExpress.query({sql: "SELECT * FROM movies LIMIT 3", db: "moviedb"});`  |
-| workgroup | string  | `primary`  | The name of the workgroup in which the query is being started. <br /> Note: athena-express cannot create workgroups (as it includes a lot of configuration) so you will need to create one beforehand IFF you intend to use a non default workgroup. Learn More here. [Setting up Workgroups](https://docs.aws.amazon.com/athena/latest/ug/user-created-workgroups.html) |
-|formatJson  | boolean | `true` |  Override as false if you rather get the raw unformatted output from S3. |
-|retry  | integer | `200` milliseconds| Wait interval between re-checking if the specific Athena query has finished executing |
-|getStats | boolean | `false`| Set `getStats: true` to capture additional metadata for your query, such as: <ul><li>`EngineExecutionTimeInMillis`</li><li>`DataScannedInBytes`</li><li>`TotalExecutionTimeInMillis`</li><li>`QueryQueueTimeInMillis`</li><li>`QueryPlanningTimeInMillis`</li><li>`ServiceProcessingTimeInMillis`</li><li>`DataScannedInMB`</li><li>`QueryCostInUSD`</li><li>`Count`</li><li>`QueryExecutionId`</li><li>`S3Location`</li></ul> |
-|ignoreEmpty  | boolean | `true`| Ignore fields with empty values from the final JSON response.  |
-|encryption | object | -- | [Encryption configuation](https://docs.aws.amazon.com/athena/latest/ug/encryption.html) example usage: <br />`{ EncryptionOption: "SSE_KMS", KmsKey: process.env.kmskey}` |
-|skipResults | boolean | `false` | For a unique requirement where a user may only want to execute the query in Athena and store the results in S3 but NOT fetch those results in that moment. <br />Perhaps to be retrieved later or simply stored in S3 for auditing/logging purposes. <br />To retrieve the results, you can simply pass the `QueryExecutionId` into athena-express as such: `athenaExpress.query("ab493e66-138f-4b78-a187-51f43fd5f0eb")`  |
-|waitForResults  | boolean | `true` | When low latency is the objective, you can skip waiting for a query to be completed in Athena. Returns `QueryExecutionId`, which you can pass into athena-express later as such: `athenaExpress.query("ab493e66-138f-4b78-a187-51f43fd5f0eb")` <br /> Not to be confused with `skipResults`, which actually waits for the query to be completed before returning `QueryExecutionId` and other stats. `waitForResults` is meant for fire-and-forget kind of operations.  <br />  |
-|catalog  | string | `null` | The catalog to which the query results belong  |
+| Parameter      | Format | Default Value                                   | Description |
+|----------------| ------------- |-------------------------------------------------| ------------- |
+| s3             | string  | `athena-express` creates a new bucket for you   | The location in Amazon S3 where your query results are stored, such as `s3://path/to/query/bucket/`. <br /> `athena-express` will create a new bucket for you if you don't provide a value for this param but sometimes that could cause an issue if you had recently deleted a bucket with the same name. (something to do with cache). When that happens, just specify you own bucket name. Alternatively you can also use `workgroup`.   |
+| db             | string  | `default`                                       | Athena database name that the SQL queries should be executed in. When a `db` name is specified in the config, you can execute SQL queries without needing to explicitly mention DB name. e.g. <br />` athenaExpress.query("SELECT * FROM movies LIMIT 3")` <br /> as opposed to <br />` athenaExpress.query({sql: "SELECT * FROM movies LIMIT 3", db: "moviedb"});`  |
+| workgroup      | string  | `primary`                                       | The name of the workgroup in which the query is being started. <br /> Note: athena-express cannot create workgroups (as it includes a lot of configuration) so you will need to create one beforehand IFF you intend to use a non default workgroup. Learn More here. [Setting up Workgroups](https://docs.aws.amazon.com/athena/latest/ug/user-created-workgroups.html) |
+| formatJson     | boolean | `true`                                          |  Override as false if you rather get the raw unformatted output from S3. |
+| retry          | integer | `200` milliseconds                              | Wait interval between re-checking if the specific Athena query has finished executing |
+| getStats       | boolean | `false`                                         | Set `getStats: true` to capture additional metadata for your query, such as: <ul><li>`EngineExecutionTimeInMillis`</li><li>`DataScannedInBytes`</li><li>`TotalExecutionTimeInMillis`</li><li>`QueryQueueTimeInMillis`</li><li>`QueryPlanningTimeInMillis`</li><li>`ServiceProcessingTimeInMillis`</li><li>`DataScannedInMB`</li><li>`QueryCostInUSD`</li><li>`Count`</li><li>`QueryExecutionId`</li><li>`S3Location`</li></ul> |
+| ignoreEmpty    | boolean | `true`                                          | Ignore fields with empty values from the final JSON response.  |
+| flatKeys       | boolean | `false`                                         | Don't interpret dots (.) and square brackets in header fields as nested object or array identifiers at all (treat them like regular characters for JSON field identifiers). Default: false.  |
+| encryption     | object | --                                              | [Encryption configuation](https://docs.aws.amazon.com/athena/latest/ug/encryption.html) example usage: <br />`{ EncryptionOption: "SSE_KMS", KmsKey: process.env.kmskey}` |
+| skipResults    | boolean | `false`                                         | For a unique requirement where a user may only want to execute the query in Athena and store the results in S3 but NOT fetch those results in that moment. <br />Perhaps to be retrieved later or simply stored in S3 for auditing/logging purposes. <br />To retrieve the results, you can simply pass the `QueryExecutionId` into athena-express as such: `athenaExpress.query("ab493e66-138f-4b78-a187-51f43fd5f0eb")`  |
+| waitForResults | boolean | `true`                                          | When low latency is the objective, you can skip waiting for a query to be completed in Athena. Returns `QueryExecutionId`, which you can pass into athena-express later as such: `athenaExpress.query("ab493e66-138f-4b78-a187-51f43fd5f0eb")` <br /> Not to be confused with `skipResults`, which actually waits for the query to be completed before returning `QueryExecutionId` and other stats. `waitForResults` is meant for fire-and-forget kind of operations.  <br />  |
+| catalog        | string | `null`                                          | The catalog to which the query results belong  |
 
 
 

diff --git a/index.d.ts b/index.d.ts
@@ -9,6 +9,7 @@ declare module 'athena-express' {
         formatJson: boolean,
         retry: number,
         ignoreEmpty: boolean,
+        flatKeys: boolean,
         encryption: Record<string, string>,
         skipResults: boolean,
         waitForResults: boolean,

diff --git a/lib/athenaExpress.js b/lib/athenaExpress.js
@@ -31,6 +31,7 @@ module.exports = class AthenaExpress {
             formatJson: init.formatJson !== false,
             getStats: init.getStats || init.skipResults,
             ignoreEmpty: init.ignoreEmpty !== false,
+            flatKeys: init.flatKeys || false,
             skipResults: init.skipResults,
             waitForResults: init.waitForResults !== false,
             QueryExecutionId: null,

diff --git a/lib/helpers.js b/lib/helpers.js
@@ -116,7 +116,7 @@ async function getQueryResultsFromS3(params) {
     const input = params.config.s3.getObject(s3Params).createReadStream();
     if (params.config.formatJson) {
       return {
-        items: await cleanUpDML(input, params.config.ignoreEmpty),
+        items: await cleanUpDML(input, params.config.ignoreEmpty, params.config.flatKeys),
       };
     } else {
       return { items: await getRawResultsFromS3(input) };
@@ -177,13 +177,14 @@ function getDataTypes() {
   });
 }
 
-async function cleanUpDML(input, ignoreEmpty) {
+async function cleanUpDML(input, ignoreEmpty, flatKeys) {
   let cleanJson = [];
   const dataTypes = await getDataTypes();
   return new Promise(function (resolve) {
     input.pipe(
       csv({
         ignoreEmpty,
+        flatKeys
       })
         .on("data", (data) => {
           cleanJson.push(