Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: allow headers to be capitalized TASK-1174 #332

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions dev-requirements.txt
Original file line number Diff line number Diff line change
@@ -7,6 +7,7 @@
# Install testing / development requirements
coverage[toml]==6.5.0
coveralls==3.3.1
ddt==1.7.2
flake8==7.1.1
funcsigs==1.0.2
geojson-rewind==1.1.0
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -19,7 +19,7 @@

setup(
name='formpack',
version='3.0.0',
version='3.0.1',
description='Manipulation tools for KoBo forms',
author='the formpack contributors (https://github.com/kobotoolbox/formpack/graphs/contributors)',
url='https://github.com/kobotoolbox/formpack/',
77 changes: 77 additions & 0 deletions src/formpack/utils/expand_content.py
Original file line number Diff line number Diff line change
@@ -94,7 +94,84 @@ def _get_translations_from_special_cols(
return translations, set(translated_cols)


def clean_column_name(column_name: str, already_seen: dict[str, str]) -> str:
"""

Preserves ":" vs "::" and any spaces around the colons
"""
RE_MEDIA_COLUMN_NAMES = '|'.join(MEDIA_COLUMN_NAMES)
if column_name in already_seen:
return already_seen[column_name]

# "LaBeL" -> "label", "HiNT" -> "hint"
if column_name.lower() in ['label', 'hint']:
cleaned = column_name.lower()
already_seen[column_name] = cleaned
return cleaned

# "Bind:Some:Thing" -> "bind:Some:Thing", "BodY:" -> "body:"
match = re.match(r'^(bind|body):.*', column_name, flags=re.IGNORECASE)
if match:
lower_cased = match.groups()[0].lower()
cleaned = re.sub(r'^(bind|body)', lower_cased, column_name, flags=re.IGNORECASE)
already_seen[column_name] = cleaned
return cleaned

# "Media:Audio::ES" -> "media:audio::ES", "ViDeO : ES" -> "video : ES"
match = re.match(
rf'^(media\s*::?\s*)?({RE_MEDIA_COLUMN_NAMES})\s*::?\s*([^:]+)$',
column_name,
flags=re.IGNORECASE
)
if match:
matched = match.groups()
lower_media_prefix = matched[0].lower() if matched[0] else ''
lower_media_type = matched[1].lower()
cleaned = re.sub(rf'^(media\s*::?\s*)?({RE_MEDIA_COLUMN_NAMES})(\s*::?\s*)([^:]+)$',
rf'{lower_media_prefix}{lower_media_type}\3\4',
column_name, flags=re.IGNORECASE)
already_seen[column_name] = cleaned
return cleaned

# "Media: AuDiO" -> "media: audio", "VIDEO" -> "video"
match = re.match(
rf'^(media\s*::?\s*)?({RE_MEDIA_COLUMN_NAMES})$', column_name, flags=re.IGNORECASE
)
if match:
matched = match.groups()
lower_media_prefix = matched[0].lower() if matched[0] else ''
lower_media_type = matched[1].lower()
cleaned = re.sub(rf'^(media\s*::?\s*)?({RE_MEDIA_COLUMN_NAMES})$',
rf'{lower_media_prefix}{lower_media_type}',
column_name, flags=re.IGNORECASE)
already_seen[column_name] = cleaned

match = re.match(r'^([^:]+)(\s*::?\s*)([^:]+)$', column_name)
if match:
# example: label::x, constraint_message::x, hint::x
matched = match.groups()
lower_column_shortname = matched[0].lower()
cleaned = re.sub(r'^([^:]+)(\s*::?\s*)([^:]+)$', rf'{lower_column_shortname}\2\3', column_name,
flags=re.IGNORECASE)
already_seen[column_name] = cleaned
return cleaned
cleaned = column_name.lower()
already_seen[column_name] = cleaned
return cleaned


def preprocess_columns(content: Dict[str, List[Any]]) -> None:
seen = {}
for sheet, rows in content.items():
for row in rows:
for column_name, value in row.copy().items():
cleaned_name = clean_column_name(column_name, seen)
del row[column_name]
row[cleaned_name] = value

def expand_content_in_place(content: Dict[str, List[Any]]) -> None:
preprocess_columns(content)

specials, translations, transl_cols = _get_special_survey_cols(content)

if len(translations) > 0:
29 changes: 28 additions & 1 deletion tests/test_expand_content.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,21 @@
# coding: utf-8
import copy
from collections import OrderedDict
from ddt import data, ddt, unpack
from unittest import TestCase

from formpack import FormPack
from formpack.constants import OR_OTHER_COLUMN as _OR_OTHER
from formpack.constants import UNTRANSLATED
from formpack.utils.expand_content import SCHEMA_VERSION
from formpack.utils.expand_content import SCHEMA_VERSION, clean_column_name
from formpack.utils.expand_content import _expand_tags
from formpack.utils.expand_content import _get_special_survey_cols
from formpack.utils.expand_content import expand_content, _expand_type_to_dict
from formpack.utils.flatten_content import flatten_content
from formpack.utils.string import orderable_with_none



def test_expand_selects_with_or_other():
assert _expand_type_to_dict('select_one xx or other').get(_OR_OTHER) == True
assert _expand_type_to_dict('select_one xx or_other').get(_OR_OTHER) == True
@@ -604,5 +607,29 @@ def test_expand_translations_null_lang():
assert s1 == s1_copy


def test_expand_ignores_case():
s1 = {'survey': [{'type': 'text', 'Label': 'hi'}]}
expand_content(s1, in_place=True)
assert s1.get('translated') == ['Label']


def _s(rows):
return {'survey': [dict([[key, 'x']]) for key in rows]}

@ddt
class ColumnTestCase(TestCase):
@data(
('FOO', 'foo'),
('LABEL', 'label'),
('HINT', 'hint'),
('BIND::FOO', 'bind::FOO'),
('BODY : FOO', 'body : FOO'),
('MEDIA:AUDIO:Spanish', 'media:audio:Spanish'),
('VIDEO :: SPANISH', 'video :: SPANISH'),
('MEDIA:AUDIO', 'media:audio'),
('IMAGE', 'image'),
('LABEL : SPANISH', 'label : SPANISH')
)
@unpack
def test_clean_column_name(self, name, expected):
assert clean_column_name(name) == expected