Skip to content

Commit

Permalink
🚨🚨 Source AirTable: parse formula to simple data types (#31181)
Browse files Browse the repository at this point in the history
Co-authored-by: artem1205 <[email protected]>
  • Loading branch information
artem1205 and artem1205 authored Oct 9, 2023
1 parent 64681f6 commit 343c9be
Show file tree
Hide file tree
Showing 9 changed files with 319 additions and 94 deletions.
2 changes: 1 addition & 1 deletion airbyte-integrations/connectors/source-airtable/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,5 @@ COPY source_airtable ./source_airtable
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]

LABEL io.airbyte.version=3.0.1
LABEL io.airbyte.version=4.0.0
LABEL io.airbyte.name=airbyte/source-airtable

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ data:
connectorSubtype: api
connectorType: source
definitionId: 14c6e7ea-97ed-4f5e-a7b5-25e9a80b8212
dockerImageTag: 3.0.1
dockerImageTag: 4.0.0
dockerRepository: airbyte/source-airtable
githubIssueLabel: source-airtable
icon: airtable.svg
Expand All @@ -21,6 +21,11 @@ data:
documentationUrl: https://docs.airbyte.com/integrations/sources/airtable
tags:
- language:python
releases:
breakingChanges:
4.0.0:
message: This release introduces changes to columns with formula to parse values directly from `array` to `string` or `number` (where it is possible). Users should refresh the source schema and reset affected streams after upgrading to ensure uninterrupted syncs.
upgradeDeadline: "2023-10-23"
ab_internal:
sl: 200
ql: 400
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ class SchemaTypes:
"rollup": SchemaTypes.array_with_any,
}

ARRAY_FORMULAS = ("ARRAYCOMPACT", "ARRAYFLATTEN", "ARRAYUNIQUE", "ARRAYSLICE")


class SchemaHelpers:
@staticmethod
Expand Down Expand Up @@ -106,7 +108,11 @@ def get_json_schema(table: Dict[str, Any]) -> Dict[str, str]:
# Other edge cases, if `field_type` not in SIMPLE_AIRTABLE_TYPES, fall back to "simpleText" == `string`
# reference issue: https://github.com/airbytehq/oncall/issues/1432#issuecomment-1412743120
if complex_type == SchemaTypes.array_with_any:
if field_type in SIMPLE_AIRTABLE_TYPES:
if original_type == "formula" and field_type in ("number", "currency", "percent", "duration"):
complex_type = SchemaTypes.number
elif original_type == "formula" and not any((options.get("formula").startswith(x) for x in ARRAY_FORMULAS)):
complex_type = SchemaTypes.string
elif field_type in SIMPLE_AIRTABLE_TYPES:
complex_type["items"] = deepcopy(SIMPLE_AIRTABLE_TYPES.get(field_type))
else:
complex_type["items"] = SchemaTypes.string
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
{
"$schema": "https://json-schema.org/draft-07/schema#",
"additionalProperties": true,
"properties": {
"_airtable_created_time": {
"type": ["null", "string"]
},
"_airtable_id": {
"type": ["null", "string"]
},
"assignee_(from_table_6)": {
"items": {
"type": ["null", "number"]
},
"type": ["null", "array"]
},
"barcode": {
"type": ["null", "string"]
},
"float": {
"type": ["null", "number"]
},
"formula_1": {
"type": ["null", "number"]
},
"formula_2_(array)": {
"items": {
"type": ["null", "string"]
},
"type": ["null", "array"]
},
"formula_3_simple_text": {
"type": ["null", "string"]
},
"integer": {
"type": ["null", "number"]
},
"name": {
"type": ["null", "string"]
},
"status": {
"type": ["null", "string"]
},
"table_6": {
"items": {
"type": ["null", "string"]
},
"type": ["null", "array"]
}
},
"type": "object"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
{
"id": "tblSXpxKHg0OiLxbI",
"name": "Table 1",
"primaryFieldId": "some_id",
"fields": [
{
"type": "singleLineText",
"id": "some_id",
"name": "Name"
},
{
"type": "formula",
"options": {
"isValid": true,
"formula": "1+1",
"referencedFieldIds": [],
"result": {
"type": "number",
"options": {
"precision": 0
}
}
},
"id": "fldG2UVGl3hEglGJq",
"name": "Formula 1"
},
{
"type": "formula",
"options": {
"isValid": true,
"formula": "ARRAYFLATTEN(1,2,3)",
"referencedFieldIds": [],
"result": {
"type": "string",
"options": {
"precision": 0
}
}
},
"id": "fldG2UVGl3hEg123Jq",
"name": "Formula 2 (array)"
},

{
"type": "formula",
"options": {
"isValid": true,
"formula": "CONCAT(1,2,3)",
"referencedFieldIds": [],
"result": {
"type": "string",
"options": {
"precision": 0
}
}
},
"id": "fldG2UVGl3hEg123Jq",
"name": "Formula 3 simple text"
},
{
"type": "singleSelect",
"options": {
"choices": [
{
"id": "seleFjJiXuyLDHNUM",
"name": "Todo",
"color": "redLight2"
},
{
"id": "selskIXEljPBrKHLz",
"name": "In progress",
"color": "yellowLight2"
},
{
"id": "selg4w4LeypED2gpW",
"name": "Done",
"color": "greenLight2"
}
]
},
"id": "fldpwaVKzdfcHe2YV",
"name": "Status"
},
{
"type": "number",
"options": {
"precision": 1
},
"id": "fldZNvmdvMZymPxUc",
"name": "Float"
},
{
"type": "number",
"options": {
"precision": 0
},
"id": "fldsjTjrIkKTv6KjG",
"name": "Integer"
},
{
"type": "barcode",
"id": "fld899obV6ycadgWS",
"name": "Barcode"
},
{
"type": "multipleRecordLinks",
"options": {
"linkedTableId": "tblSXpxKHg0OiLxbI",
"isReversed": false,
"prefersSingleRecordLink": false
},
"id": "fldMkP8CfDgqc5r3j",
"name": "Table 6"
},
{
"type": "multipleLookupValues",
"options": {
"isValid": true,
"recordLinkFieldId": "fldMkP8CfDgqc5r3j",
"fieldIdInLinkedTable": "fldG2UVGl3hEglGJq",
"result": {
"type": "number",
"options": {
"precision": 0
}
}
},
"id": "fldLQOO0P7mB4lm7x",
"name": "Assignee (from Table 6)"
}
],
"views": [
{
"id": "viwadxF7ds4lOBf3R",
"name": "Grid view",
"type": "grid"
}
]
}
Original file line number Diff line number Diff line change
@@ -1,11 +1,19 @@
#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#

import json
import os
from typing import Any, Mapping

from source_airtable.schema_helpers import SchemaHelpers


# HELPERS
def load_file(file_name: str) -> Mapping[str, Any]:
with open(f"{os.path.dirname(__file__)}/{file_name}", "r") as data:
return json.load(data)


def test_clean_name(field_name_to_cleaned, expected_clean_name):
assert expected_clean_name == SchemaHelpers.clean_name(field_name_to_cleaned)

Expand All @@ -20,3 +28,12 @@ def test_get_airbyte_stream(table, expected_json_schema):
assert stream
assert stream.name == table
assert stream.json_schema == expected_json_schema


def test_table_with_formulas():
table = load_file("sample_table_with_formulas.json")

stream_schema = SchemaHelpers.get_json_schema(table)

expected_schema = load_file("expected_schema_for_sample_table.json")
assert stream_schema == expected_schema
4 changes: 4 additions & 0 deletions docs/integrations/sources/airtable-migrations.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Airtable Migration Guide

## Upgrading to 4.0.0
Columns with Formulas are narrowing from `array` to `string` or `number`. You may need to refresh the connection schema (with the reset), and run a sync.
97 changes: 49 additions & 48 deletions docs/integrations/sources/airtable.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,39 +63,39 @@ This source allows you to pull all available tables and bases using `Metadata AP

## Data type map

| Integration Type | Airbyte Type | Nullable |
|:------------------------|:--------------------------------|----------|
| `multipleAttachments` | `string` | Yes |
| `autoNumber` | `string` | Yes |
| `barcode` | `string` | Yes |
| `button` | `string` | Yes |
| `checkbox` | `boolean` | Yes |
| `singleCollaborator` | `string` | Yes |
| `count` | `number` | Yes |
| `createdBy` | `string` | Yes |
| `createdTime` | `datetime`, `format: date-time` | Yes |
| `currency` | `number` | Yes |
| `email` | `string` | Yes |
| `date` | `string`, `format: date` | Yes |
| `duration` | `number` | Yes |
| `lastModifiedBy` | `string` | Yes |
| `lastModifiedTime` | `datetime`, `format: date-time` | Yes |
| `multipleRecordLinks` | `array with strings` | Yes |
| `multilineText` | `string` | Yes |
| `multipleCollaborators` | `array with strings` | Yes |
| `multipleSelects` | `array with strings` | Yes |
| `number` | `number` | Yes |
| `percent` | `number` | Yes |
| `phoneNumber` | `string` | Yes |
| `rating` | `number` | Yes |
| `richText` | `string` | Yes |
| `singleLineText` | `string` | Yes |
| `externalSyncSource` | `string` | Yes |
| `url` | `string` | Yes |
| `formula` | `array with any` | Yes |
| `lookup` | `array with any` | Yes |
| `multipleLookupValues` | `array with any` | Yes |
| `rollup` | `array with any` | Yes |
| Integration Type | Airbyte Type | Nullable |
|:------------------------|:---------------------------------------|----------|
| `multipleAttachments` | `string` | Yes |
| `autoNumber` | `string` | Yes |
| `barcode` | `string` | Yes |
| `button` | `string` | Yes |
| `checkbox` | `boolean` | Yes |
| `singleCollaborator` | `string` | Yes |
| `count` | `number` | Yes |
| `createdBy` | `string` | Yes |
| `createdTime` | `datetime`, `format: date-time` | Yes |
| `currency` | `number` | Yes |
| `email` | `string` | Yes |
| `date` | `string`, `format: date` | Yes |
| `duration` | `number` | Yes |
| `lastModifiedBy` | `string` | Yes |
| `lastModifiedTime` | `datetime`, `format: date-time` | Yes |
| `multipleRecordLinks` | `array with strings` | Yes |
| `multilineText` | `string` | Yes |
| `multipleCollaborators` | `array with strings` | Yes |
| `multipleSelects` | `array with strings` | Yes |
| `number` | `number` | Yes |
| `percent` | `number` | Yes |
| `phoneNumber` | `string` | Yes |
| `rating` | `number` | Yes |
| `richText` | `string` | Yes |
| `singleLineText` | `string` | Yes |
| `externalSyncSource` | `string` | Yes |
| `url` | `string` | Yes |
| `formula` | `string`, `number` or `array with any` | Yes |
| `lookup` | `array with any` | Yes |
| `multipleLookupValues` | `array with any` | Yes |
| `rollup` | `array with any` | Yes |

* All the fields are `nullable` by default, meaning that the field could be empty.
* The `array with any` - represents the classic array with one of the other Airtable data types inside, such as:
Expand All @@ -109,18 +109,19 @@ See information about rate limits [here](https://airtable.com/developers/web/api

## Changelog

| Version | Date | Pull Request | Subject |
|:--------|:-----------|:---------------------------------------------------------|:----------------------------------------------------------------|
| 3.0.1 | 2023-05-10 | [25946](https://github.com/airbytehq/airbyte/pull/25946) | Skip stream if it does not appear in catalog |
| 3.0.0 | 2023-03-20 | [22704](https://github.com/airbytehq/airbyte/pull/22704) | Fix for stream name uniqueness |
| 2.0.4 | 2023-03-15 | [24093](https://github.com/airbytehq/airbyte/pull/24093) | Update spec and doc |
| 2.0.3 | 2023-02-02 | [22311](https://github.com/airbytehq/airbyte/pull/22311) | Fix for `singleSelect` types when discovering the schema |
| 2.0.2 | 2023-02-01 | [22245](https://github.com/airbytehq/airbyte/pull/22245) | Fix for empty `result` object when discovering the schema |
| 2.0.1 | 2023-02-01 | [22224](https://github.com/airbytehq/airbyte/pull/22224) | Fixed broken `API Key` authentication |
| 2.0.0 | 2023-01-27 | [21962](https://github.com/airbytehq/airbyte/pull/21962) | Added casting of native Airtable data types to JsonSchema types |
| 1.0.2 | 2023-01-25 | [20934](https://github.com/airbytehq/airbyte/pull/20934) | Added `OAuth2.0` authentication support |
| 1.0.1 | 2023-01-10 | [21215](https://github.com/airbytehq/airbyte/pull/21215) | Fix field names |
| 1.0.0 | 2022-12-22 | [20846](https://github.com/airbytehq/airbyte/pull/20846) | Migrated to Metadata API for dynamic schema generation |
| 0.1.3 | 2022-10-26 | [18491](https://github.com/airbytehq/airbyte/pull/18491) | Improve schema discovery logic |
| 0.1.2 | 2022-04-30 | [12500](https://github.com/airbytehq/airbyte/pull/12500) | Improve input configuration copy |
| 0.1.1 | 2021-12-06 | [8425](https://github.com/airbytehq/airbyte/pull/8425) | Update title, description fields in spec |
| Version | Date | Pull Request | Subject |
|:--------|:-----------|:---------------------------------------------------------|:---------------------------------------------------------------------------------------|
| 4.0.0 | 2023-10-09 | [31181](https://github.com/airbytehq/airbyte/pull/31181) | Additional schema processing for the FORMULA schema type: Convert to simple data types |
| 3.0.1 | 2023-05-10 | [25946](https://github.com/airbytehq/airbyte/pull/25946) | Skip stream if it does not appear in catalog |
| 3.0.0 | 2023-03-20 | [22704](https://github.com/airbytehq/airbyte/pull/22704) | Fix for stream name uniqueness |
| 2.0.4 | 2023-03-15 | [24093](https://github.com/airbytehq/airbyte/pull/24093) | Update spec and doc |
| 2.0.3 | 2023-02-02 | [22311](https://github.com/airbytehq/airbyte/pull/22311) | Fix for `singleSelect` types when discovering the schema |
| 2.0.2 | 2023-02-01 | [22245](https://github.com/airbytehq/airbyte/pull/22245) | Fix for empty `result` object when discovering the schema |
| 2.0.1 | 2023-02-01 | [22224](https://github.com/airbytehq/airbyte/pull/22224) | Fixed broken `API Key` authentication |
| 2.0.0 | 2023-01-27 | [21962](https://github.com/airbytehq/airbyte/pull/21962) | Added casting of native Airtable data types to JsonSchema types |
| 1.0.2 | 2023-01-25 | [20934](https://github.com/airbytehq/airbyte/pull/20934) | Added `OAuth2.0` authentication support |
| 1.0.1 | 2023-01-10 | [21215](https://github.com/airbytehq/airbyte/pull/21215) | Fix field names |
| 1.0.0 | 2022-12-22 | [20846](https://github.com/airbytehq/airbyte/pull/20846) | Migrated to Metadata API for dynamic schema generation |
| 0.1.3 | 2022-10-26 | [18491](https://github.com/airbytehq/airbyte/pull/18491) | Improve schema discovery logic |
| 0.1.2 | 2022-04-30 | [12500](https://github.com/airbytehq/airbyte/pull/12500) | Improve input configuration copy |
| 0.1.1 | 2021-12-06 | [8425](https://github.com/airbytehq/airbyte/pull/8425) | Update title, description fields in spec |

0 comments on commit 343c9be

Please sign in to comment.