Skip to content

Commit

Permalink
v0.4.13 improve override schema
Browse files Browse the repository at this point in the history
  • Loading branch information
akariv committed Mar 13, 2024
1 parent 3690f4e commit d36c653
Show file tree
Hide file tree
Showing 5 changed files with 20 additions and 13 deletions.
7 changes: 7 additions & 0 deletions data/beatles_drumkits.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
beatles number of drum kits in the band

name,# drum kits
john,N/A
paul,N/A
george,N/A
ringo,1
2 changes: 1 addition & 1 deletion dataflows/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.4.12
0.4.13
2 changes: 1 addition & 1 deletion dataflows/processors/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ def safe_process_datapackage(self, dp: Package):
'Found duplicate headers.' +
'Use the `deduplicate_headers` flag (found headers=%r)' % stream.headers)
stream.headers = self.rename_duplicate_headers(stream.headers)
schema = Schema().infer(
schema = Schema(self.override_schema).infer(
stream.sample, headers=stream.headers,
confidence=1, guesser_cls=self.guesser)
# restore schema field names to original headers
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def read(*paths):
INSTALL_REQUIRES = [
'dataflows-tabulator>=1.54.0',
'datapackage>=1.15.4',
'tableschema>=1.20.7',
'tableschema>=1.20.9',
'kvfile>=0.0.9',
'click',
'jinja2',
Expand Down
20 changes: 10 additions & 10 deletions tests/test_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -1746,10 +1746,10 @@ def test_set_type_regex():
def test_load_override_schema():
from dataflows import load
flow = Flow(
load('data/beatles_age.csv',
load('data/beatles_drumkits.csv',
override_schema={
'title': 'title',
'missingValues': ['ringo'],
'missingValues': ['N/A'],
}
),
)
Expand All @@ -1758,24 +1758,24 @@ def test_load_override_schema():
'profile': 'data-package',
'resources': [{
'format': 'csv',
'name': 'beatles_age',
'path': 'beatles_age.csv',
'name': 'beatles_drumkits',
'path': 'beatles_drumkits.csv',
'profile': 'tabular-data-resource',
'schema': {
'fields': [
{'format': 'default', 'name': 'name', 'type': 'string'},
{'format': 'default', 'name': 'age', 'type': 'integer'}
{'format': 'default', 'name': '# drum kits', 'type': 'integer'}
],
'missingValues': ['ringo'],
'missingValues': ['N/A'],
'title': 'title'
}
}]
}
assert data == [[
{'name': 'john', 'age': 18},
{'name': 'paul', 'age': 16},
{'name': 'george', 'age': 17},
{'name': None, 'age': 22},
{'name': 'john', '# drum kits': None},
{'name': 'paul', '# drum kits': None},
{'name': 'george', '# drum kits': None},
{'name': 'ringo', '# drum kits': 1},
]]


Expand Down

0 comments on commit d36c653

Please sign in to comment.