Skip to content

Commit

Permalink
Merge pull request #51 from TyShkan/feature_output_objects
Browse files Browse the repository at this point in the history
Add new output type object
  • Loading branch information
ets authored Jan 6, 2024
2 parents 3f6bcb6 + 58aecbd commit 330cfbb
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 2 deletions.
4 changes: 2 additions & 2 deletions tap_spreadsheets_anywhere/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@
Optional('prefer_schema_as_string'): bool,
Optional('schema_overrides'): {
str: {
Required('type'): Any(Any('null','string','integer','number','date-time'),
[Any('null','string','integer','number','date-time')])
Required('type'): Any(Any('null','string','integer','number','date-time','object'),
[Any('null','string','integer','number','date-time','object')])
}
}
}]
Expand Down
10 changes: 10 additions & 0 deletions tap_spreadsheets_anywhere/conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import pytz
import logging
import pickle
from collections.abc import MutableMapping

LOGGER = logging.getLogger(__name__)

Expand Down Expand Up @@ -71,6 +72,13 @@ def convert(datum, desired_type=None):
except (ValueError, TypeError):
pass

if desired_type in (None, 'object'):
try:
if isinstance(datum, MutableMapping):
return datum, 'object'
except (ValueError, TypeError):
pass

return str(datum), 'string',


Expand Down Expand Up @@ -117,6 +125,8 @@ def pick_datatype(counts,prefer_number_vs_integer=False):
to_return = 'number'
elif counts.get('date-time', 0) > 0:
to_return = 'date-time'
elif counts.get('object', 0) > 0:
to_return = 'object'
elif counts.get('string', 0) <= 0:
LOGGER.warning(f"Unexpected data type encountered in histogram {counts}. Defaulting type to String.")

Expand Down
17 changes: 17 additions & 0 deletions tap_spreadsheets_anywhere/test/test_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@ def test_convert(self):
# strings
self.assertEqual(convert('4 o clock'), ('4 o clock', 'string'))

def test_convert_objects(self):
self.assertEqual(convert("{'k': 'v','k': 'v'}"), ("{'k': 'v','k': 'v'}", 'string'))
self.assertEqual(convert({'k': 'v','k': 'v'}), ({'k': 'v','k': 'v'}, 'object'))
self.assertEqual(convert({'k': 'v','k': 'v'}, 'object'), ({'k': 'v','k': 'v'}, 'object'))

def test_count_sample(self):
self.assertEqual(
count_sample({'id': '1', 'first_name': 'Connor'}),
Expand All @@ -64,6 +69,11 @@ def test_pick_datatype(self):
'number': 1}), 'string')
self.assertEqual(pick_datatype({}), 'string')

def test_pick_datatype_objects(self):
self.assertEqual(pick_datatype({'object': 1}), 'object')
self.assertEqual(pick_datatype({'string': 1,
'object': 1}), 'string')

def test_generate_schema(self):
self.assertEqual(
generate_schema([{'id': '1', 'first_name': 'Connor'},
Expand All @@ -88,3 +98,10 @@ def test_generate_schema(self):
{'id': '2', 'date': '2017-01-02'}]),
{'id': {'type': ['null', 'integer'],},
'date': {'type': ['null', 'string'],}})

def test_generate_schema_objects(self):
self.assertEqual(
generate_schema([{'id': '1', 'obj': { 'date': '2017-01-01', 'count': 100 }},
{'id': '2', 'obj': { 'date': '2017-01-01', 'count': 0 }}]),
{'id': {'type': ['null', 'integer'],},
'obj': {'type': ['null', 'object'],}})

0 comments on commit 330cfbb

Please sign in to comment.