Excel DataLink (#9346)

- Adds the Excel format as one of the formats supported when creating a data link. - The data link can choose to read the file as a workbook, or read a sheet or range from it as a table, like `Excel_Format`. - Also updated Delimited format dialog to allow customizing the quote style.
enso-org · Mar 11, 2024 · e98306f · e98306f
1 parent 1f6db1e
commit e98306f
Show file tree

Hide file tree

Showing 20 changed files with 357 additions and 23 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -628,6 +628,7 @@
 - [Added `Xml_Document.write`][9299]
 - [Added `select_by_type` and `remove_by_type` to `Table` and `DB_Table`][9334]
 - [Make File./ only accept Text][9330]
+- [Implemented Excel Data Link][9346]
 
 [debug-shortcuts]:
   https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
@@ -909,8 +910,9 @@
 [9249]: https://github.com/enso-org/enso/pull/9249
 [9269]: https://github.com/enso-org/enso/pull/9269
 [9299]: https://github.com/enso-org/enso/pull/9299
-[9344]: https://github.com/enso-org/enso/pull/9344
 [9330]: https://github.com/enso-org/enso/pull/9330
+[9334]: https://github.com/enso-org/enso/pull/9334
+[9346]: https://github.com/enso-org/enso/pull/9346
 
 #### Enso Compiler
 

diff --git a/app/ide-desktop/lib/dashboard/src/data/__tests__/dataLinkSchema.test.ts b/app/ide-desktop/lib/dashboard/src/data/__tests__/dataLinkSchema.test.ts
@@ -66,6 +66,20 @@ v.test('correctly validates example S3 .datalink files with the schema', () => {
   }
 })
 
+v.test('correctly validates example Table .datalink files with the schema', () => {
+  const schemas = [
+    'example-http-format-excel-workbook.datalink',
+    'example-http-format-excel-sheet.datalink',
+    'example-http-format-excel-range.datalink',
+    'example-http-format-delimited-custom-quote.datalink',
+    'example-http-format-delimited-ignore-quote.datalink',
+  ]
+  for (const schema of schemas) {
+    const json = loadDataLinkFile(path.resolve(TABLE_DATA_LINKS_ROOT, schema))
+    testSchema(json, schema)
+  }
+})
+
 v.test('correctly validates example Database .datalink files with the schema', () => {
   const schemas = ['postgres-db.datalink', 'postgres-table.datalink']
   for (const schema of schemas) {

diff --git a/app/ide-desktop/lib/dashboard/src/data/dataLinkSchema.json b/app/ide-desktop/lib/dashboard/src/data/dataLinkSchema.json
@@ -30,6 +30,13 @@
       },
       "required": ["type", "secretPath"]
     },
+    "BooleanOrInfer": {
+      "anyOf": [
+        { "title": "Infer", "const": "infer", "type": "string" },
+        { "title": "True", "const": true, "type": "boolean" },
+        { "title": "False", "const": false, "type": "boolean" }
+      ]
+    },
 
     "AwsAuth": {
       "title": "AWS Authentication",
@@ -174,7 +181,8 @@
       "anyOf": [
         { "$ref": "#/$defs/DefaultFormat" },
         { "$ref": "#/$defs/DelimitedFormat" },
-        { "$ref": "#/$defs/JsonFormat" }
+        { "$ref": "#/$defs/JsonFormat" },
+        { "$ref": "#/$defs/ExcelFormat" }
       ]
     },
     "DefaultFormat": {
@@ -207,10 +215,17 @@
         "headers": {
           "title": "Headers",
           "description": "Whether a header row containing column names is present.",
-          "type": "boolean"
+          "$ref": "#/$defs/BooleanOrInfer"
+        },
+        "quote_style": {
+          "title": "Custom Quotes",
+          "anyOf": [
+            { "$ref": "#/$defs/DelimitedQuoteStyle" },
+            { "$ref": "#/$defs/DelimitedQuoteStyleNo" }
+          ]
         }
       },
-      "required": ["type", "subType", "delimiter"]
+      "required": ["type", "subType", "delimiter", "headers"]
     },
     "JsonFormat": {
       "title": "JSON",
@@ -220,6 +235,124 @@
         "subType": { "title": "Type", "const": "json", "type": "string" }
       },
       "required": ["type", "subType"]
+    },
+    "ExcelFormat": {
+      "title": "Excel",
+      "type": "object",
+      "properties": {
+        "type": { "title": "Type", "const": "format", "type": "string" },
+        "subType": { "title": "Type", "const": "excel", "type": "string" },
+        "section": {
+          "title": "Section",
+          "anyOf": [
+            { "$ref": "#/$defs/ExcelSectionWorkbook" },
+            { "$ref": "#/$defs/ExcelSectionSheet" },
+            { "$ref": "#/$defs/ExcelSectionRange" }
+          ]
+        }
+      },
+      "required": ["type", "subType", "section"]
+    },
+
+    "ExcelSectionWorkbook": {
+      "title": "Workbook",
+      "type": "object",
+      "properties": {
+        "type": { "const": "workbook", "type": "string" },
+        "defaultSheet": {
+          "title": "Default sheet",
+          "$comment": "I want the title to be 'Default sheet (for writing)' to make it clear without hovering, but then the label is too wide. Can we make the modal wider?",
+          "description": "The default sheet used when writing a table to this data link.",
+          "type": "string",
+          "minLength": 1,
+          "default": "EnsoSheet"
+        }
+      },
+      "required": ["type"]
+    },
+    "ExcelSectionSheet": {
+      "title": "Sheet",
+      "type": "object",
+      "properties": {
+        "type": { "const": "sheet", "type": "string" },
+        "name": {
+          "title": "Sheet name",
+          "description": "Must not be blank.",
+          "type": "string",
+          "minLength": 1
+        },
+        "headers": {
+          "title": "Headers",
+          "description": "Whether a header row containing column names is present at the top of the sheet.",
+          "$ref": "#/$defs/BooleanOrInfer"
+        }
+      },
+      "required": ["type", "name", "headers"]
+    },
+    "ExcelSectionRange": {
+      "title": "Range",
+      "type": "object",
+      "properties": {
+        "type": { "const": "range", "type": "string" },
+        "address": {
+          "title": "Address",
+          "description": "Must not be blank.",
+          "type": "string",
+          "minLength": 1,
+          "examples": ["Sheet1!A1:B2"]
+        },
+        "headers": {
+          "title": "Headers",
+          "description": "Whether a header row containing column names is present at the top of the range.",
+          "$ref": "#/$defs/BooleanOrInfer"
+        }
+      },
+      "required": ["type", "address", "headers"]
+    },
+
+    "DelimitedQuoteStyle": {
+      "title": "Custom quotes",
+      "type": "object",
+      "properties": {
+        "type": { "title": "Type", "const": "quote_style", "type": "string" },
+        "subType": {
+          "title": "Subtype",
+          "const": "with_quotes",
+          "type": "string"
+        },
+        "quote": {
+          "title": "Quote",
+          "description": "Must be exactly one character.",
+          "type": "string",
+          "minLength": 1,
+          "maxLength": 1,
+          "default": "\"",
+          "examples": ["\"", "'"]
+        },
+        "escape": {
+          "title": "Escape",
+          "description": "Must be exactly one character. The character that is prepended to the quote character to escape it.",
+          "type": "string",
+          "minLength": 1,
+          "maxLength": 1,
+          "default": "\"",
+          "examples": ["\"", "\\"]
+        }
+      },
+      "required": ["type", "subType", "quote", "escape"]
+    },
+    "DelimitedQuoteStyleNo": {
+      "type": "object",
+      "title": "Ignore quotes",
+      "properties": {
+        "type": { "title": "Type", "const": "quote_style", "type": "string" },
+        "subType": {
+          "title": "Subtype",
+          "const": "no_quotes",
+          "type": "string"
+        }
+      },
+      "required": ["type", "subType"]
     }
   }
 }
diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Enso_Cloud/Internal/Enso_Path.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Enso_Cloud/Internal/Enso_Path.enso
@@ -27,10 +27,13 @@ type Enso_Path
             if raw_segments.is_empty then Error.throw (Illegal_Argument.Error "Invalid path - it should contain at least one segment.") else
                 organization_name = raw_segments.first
                 segments = raw_segments.drop 1 . filter s-> s.is_empty.not
-                if organization_name != Enso_User.current.name then Error.throw (Unimplemented.throw "Currently only resolving paths for the current user is supported.") else
-                    if segments.is_empty then Enso_Path.Value organization_name [] Nothing else
-                        asset_name = segments.last
-                        Enso_Path.Value organization_name (segments.drop (Index_Sub_Range.Last 1)) asset_name
+                current_user_name = Enso_User.current.name
+                # The `if_not_error` is a workaround for https://github.com/enso-org/enso/issues/9283 and it can be removed after that is fixed.
+                current_user_name.if_not_error <|
+                    if organization_name != current_user_name then Unimplemented.throw "Currently only resolving paths for the current user is supported." else
+                        if segments.is_empty then Enso_Path.Value organization_name [] Nothing else
+                            asset_name = segments.last
+                            Enso_Path.Value organization_name (segments.drop (Index_Sub_Range.Last 1)) asset_name
 
     ## PRIVATE
     resolve_parent self =

diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/System/File_Format.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/System/File_Format.enso
@@ -2,6 +2,7 @@ import project.Any.Any
 import project.Data.Json.JS_Object
 import project.Data.Json.Json
 import project.Data.Numbers.Integer
+import project.Data.Text.Case.Case
 import project.Data.Text.Encoding.Encoding
 import project.Data.Text.Text
 import project.Data.Vector.Vector
@@ -245,3 +246,17 @@ JSON_Format.from (that : JS_Object) =
 
 ## A setting to infer the default behaviour of some option.
 type Infer
+
+
+## PRIVATE
+   A helper for parsing the JSON representation of `Boolean | Infer`.
+   It defaults to `Infer` if the value was `Nothing`.
+parse_boolean_with_infer (field_name : Text) (value : Boolean | Text | Nothing) -> Boolean | Infer = case value of
+    True -> True
+    False -> False
+    Nothing -> Infer
+    headers_text : Text -> case headers_text.to_case Case.Lower of
+        "infer" -> Infer
+        "true" -> True
+        "false" -> False
+        _ -> Error.throw (Illegal_Argument.Error ("The field `"+field_name+"` must be a boolean or the string `infer`."))
diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Delimited/Delimited_Format.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Delimited/Delimited_Format.enso
@@ -6,6 +6,7 @@ import Standard.Base.System.File_Format_Metadata.File_Format_Metadata
 import Standard.Base.System.Input_Stream.Input_Stream
 from Standard.Base.Metadata.Choice import Option
 from Standard.Base.Widget_Helpers import make_file_read_delimiter_selector
+from Standard.Base.System.File_Format import parse_boolean_with_infer
 
 import project.Data.Data_Formatter.Data_Formatter
 import project.Data.Match_Columns.Match_Columns
@@ -169,14 +170,17 @@ Delimited_Format.from (that : JS_Object) =
     encoding = encoding_name
         . if_not_nothing (Encoding.from_name encoding_name)
         . if_nothing Encoding.utf_8
-    headers = that.get "headers" . if_nothing Infer
+    headers = that.get "headers" |> parse_boolean_with_infer "headers"
     skip_rows = that.get "skip_rows" . if_nothing 0
     row_limit = that.get "row_limit"
     keep_invalid_rows = that.get "keep_invalid_rows" . if_nothing True
+    quote_style = case that.get "quote_style" of
+        Nothing -> Quote_Style.With_Quotes
+        json -> Quote_Style.from json
 
-    unsupported_fields = ["quote_style", "value_formatter", "line_endings", "comment_character"]
+    unsupported_fields = ["value_formatter", "line_endings", "comment_character"]
     case unsupported_fields.find that.contains_key if_missing=Nothing of
         Nothing ->
-            Delimited_Format.Delimited delimiter=delimiter encoding=encoding headers=headers skip_rows=skip_rows row_limit=row_limit keep_invalid_rows=keep_invalid_rows
+            Delimited_Format.Delimited delimiter=delimiter encoding=encoding headers=headers skip_rows=skip_rows row_limit=row_limit quote_style=quote_style keep_invalid_rows=keep_invalid_rows
         field ->
             Error.throw (Illegal_Argument.Error ("The field `" ++ field ++ "` is currently not supported when deserializing the Delimited format from JSON."))
diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Delimited/Quote_Style.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Delimited/Quote_Style.enso
@@ -1,4 +1,5 @@
 from Standard.Base import all
+import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
 
 type Quote_Style
     ## Does not handle quotes at all.
@@ -30,4 +31,16 @@ type Quote_Style
          The quote and escape characters must consist of exactly one code-point
          (i.e. it can be only one character and complex characters like emojis
          may not be used).
-    With_Quotes (always_quote : Boolean = False) (quote : Text = '"') (quote_escape : Text = '"')
+    With_Quotes (always_quote : Boolean = False) (quote : Text = '"') (quote_escape : Text = quote)
+
+## PRIVATE
+Quote_Style.from (that : JS_Object) =
+    sub_type = that.get "subType" if_missing=(Error.throw (Illegal_Argument.Error "Missing `subType` field in quote style."))
+    case sub_type.to_case Case.Lower of
+        "no_quotes" -> Quote_Style.No_Quotes
+        "with_quotes" ->
+            always_quote = that.get "alwaysQuote" if_missing=False
+            quote = that.get "quote" if_missing='"'
+            escape = that.get "escape" if_missing=quote
+            Quote_Style.With_Quotes always_quote quote escape
+        _ -> Error.throw (Illegal_Argument.Error ("Unknown quote style: " + sub_type))
diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Excel/Excel_Format.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Excel/Excel_Format.enso
@@ -6,6 +6,7 @@ import Standard.Base.System.File_Format_Metadata.File_Format_Metadata
 import Standard.Base.System.Input_Stream.Input_Stream
 from Standard.Base.Metadata.Choice import Option
 from Standard.Base.Metadata.Widget import Text_Input, Numeric_Input
+from Standard.Base.System.File_Format import parse_boolean_with_infer
 
 import project.Data.Match_Columns.Match_Columns
 import project.Data.Table.Table
@@ -189,3 +190,22 @@ as_section (format : Excel_Format) -> Excel_Section = case format of
         Excel_Section.Worksheet sheet headers skip_rows row_limit
     Excel_Format.Range address headers skip_rows row_limit _ ->
         Excel_Section.Cell_Range address headers skip_rows row_limit
+
+## PRIVATE
+   Constructs an `Excel_Format` instance from JSON.
+   Only a subset of options is currently supported.
+Excel_Format.from (that : JS_Object) =
+    section = that.get "section" if_missing=(Error.throw (Illegal_Argument.Error "The `section` field is required."))
+    case section.get "type" if_missing=(Error.throw (Illegal_Argument.Error "The `section.type` field is required.")) . to_case Case.Lower of
+        "workbook" ->
+            default_sheet = section.get "default_sheet" if_missing="EnsoSheet"
+            Excel_Format.Workbook default_sheet=default_sheet
+        "sheet" ->
+            name = section.get "name" if_missing=(Error.throw (Illegal_Argument.Error "The `name` field is required."))
+            headers = section.get "headers" |> parse_boolean_with_infer "headers"
+            Excel_Format.Sheet sheet=name headers=headers
+        "range" ->
+            address = section.get "address" if_missing=(Error.throw (Illegal_Argument.Error "The `address` field is required."))
+            headers = section.get "headers" |> parse_boolean_with_infer "headers"
+            Excel_Format.Range address=address headers=headers
+        unknown -> Error.throw (Illegal_Argument.Error "The `section.type` was "+unknown+" but it must be one of `workbook`, `sheet`, or `range`.")
diff --git a/std-bits/base/src/main/java/org/enso/base/file_format/FileFormatSPI.java b/std-bits/base/src/main/java/org/enso/base/file_format/FileFormatSPI.java
@@ -22,7 +22,7 @@ public static Value findFormatForDataLinkSubType(String subType) {
 
     var providers =
         loader.stream()
-            .filter(provider -> subType.equals(provider.get().getDataLinkFormatName()))
+            .filter(provider -> subType.equalsIgnoreCase(provider.get().getDataLinkFormatName()))
             .toList();
     if (providers.isEmpty()) {
       return null;

diff --git a/std-bits/table/src/main/java/org/enso/table/read/ExcelFormatSPI.java b/std-bits/table/src/main/java/org/enso/table/read/ExcelFormatSPI.java
@@ -13,4 +13,9 @@ protected String getModuleName() {
   protected String getTypeName() {
     return "Excel_Format";
   }
+
+  @Override
+  protected String getDataLinkFormatName() {
+    return "excel";
+  }
 }
diff --git a/test/AWS_Tests/data/format-delimited.datalink b/test/AWS_Tests/data/format-delimited.datalink
@@ -10,6 +10,6 @@
     "type": "format",
     "subType": "delimited",
     "delimiter": " ",
-    "headers": false
+    "headers": "infer"
   }
 }
diff --git a/test/Table_Tests/data/datalinks/example-http-format-delimited-custom-quote.datalink b/test/Table_Tests/data/datalinks/example-http-format-delimited-custom-quote.datalink
@@ -0,0 +1,18 @@
+{
+    "type": "HTTP",
+    "libraryName": "Standard.Base",
+    "method": "GET",
+    "uri": "http://http-test-helper.local/testfiles/table.tsv",
+    "format": {
+        "type": "format",
+        "subType": "delimited",
+        "delimiter": "\t",
+        "headers": true,
+        "quote_style": {
+            "type": "quote_style",
+            "subType": "with_quotes",
+            "quote": "'",
+            "escape": "\\"
+        }
+    }
+}
diff --git a/test/Table_Tests/data/datalinks/example-http-format-delimited-ignore-quote.datalink b/test/Table_Tests/data/datalinks/example-http-format-delimited-ignore-quote.datalink
@@ -0,0 +1,16 @@
+{
+    "type": "HTTP",
+    "libraryName": "Standard.Base",
+    "method": "GET",
+    "uri": "http://http-test-helper.local/testfiles/js.txt",
+    "format": {
+        "type": "format",
+        "subType": "delimited",
+        "delimiter": " ",
+        "headers": false,
+        "quote_style": {
+            "type": "quote_style",
+            "subType": "no_quotes"
+        }
+    }
+}