Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add index for all wof languages (when differents to default language) #446

Merged
merged 2 commits into from
May 20, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 23 additions & 1 deletion src/components/extractFields.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
const through2 = require('through2');
const _ = require('lodash');
const util = require('util');
const iso639 = require('../helpers/iso639');

// hierarchy in importance-descending order of population fields
const population_hierarchy = [
Expand All @@ -26,6 +27,8 @@ const NAME_ALIAS_FIELDS = [
'label:%s_x_preferred'
];

const WOF_NAMES_REGEX = /(name|label):[a-z]{3}_x_(preferred|variant)/;

// this function is used to verify that a US county QS altname is available
function isUsCounty(base_record, wof_country, qs_a2_alt) {
return 'US' === wof_country &&
Expand Down Expand Up @@ -131,6 +134,23 @@ function getNameAliases(properties) {
return concatArrayFields(properties, nameFields);
}

function getMultiLangNames(defaultName, properties) {
return Object.keys(properties)
.filter(key => WOF_NAMES_REGEX.test(key)) // get only name:.* keys
.map(key => {
return {
key: key.substring(key.indexOf(':') + 1, key.indexOf(':') + 4), // get the iso part of the key name:iso_x_preferred
value: properties[key]
.filter(name => !defaultName || defaultName.indexOf(name) < 0) // remove duplicate elements found in default name
};
}) //
.filter(({ key, value }) => value.length > 0 && iso639[key]) // filter correct iso 3 keys
.map(({key, value}) => { return { key: iso639[key], value: value }; })
.reduce((langs, { key, value }) =>
_.set(langs, key, _.union(langs[key], value)), {}
); // create the lang/value map
}

function getAbbreviation(properties) {
if (properties['wof:placetype'] === 'country' && properties['wof:country']) {
return properties['wof:country'];
Expand Down Expand Up @@ -167,10 +187,12 @@ function getHierarchies(id, properties) {
*/
module.exports.create = function map_fields_stream() {
return through2.obj(function(json_object, enc, callback) {
const default_names = getName(json_object.properties);
var record = {
id: json_object.id,
name: getName(json_object.properties),
name: default_names,
name_aliases: getNameAliases(json_object.properties),
name_langs: getMultiLangNames(default_names, json_object.properties),
abbreviation: getAbbreviation(json_object.properties),
place_type: json_object.properties['wof:placetype'],
lat: getLat(json_object.properties),
Expand Down
207 changes: 207 additions & 0 deletions src/helpers/iso639.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
// Based on https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
module.exports = {
'abk': 'ab',
'aar': 'aa',
'afr': 'af',
'aka': 'ak',
'alb': 'sq',
'sqi': 'sq',
'amh': 'am',
'ara': 'ar',
'arg': 'an',
'hye': 'hy',
'arm': 'hy',
'asm': 'as',
'ava': 'av',
'ave': 'ae',
'aym': 'ay',
'aze': 'az',
'bam': 'bm',
'bak': 'ba',
'eus': 'eu',
'baq': 'eu',
'bel': 'be',
'ben': 'bn',
'bih': 'bh',
'bis': 'bi',
'bos': 'bs',
'bre': 'br',
'bul': 'bg',
'mya': 'my',
'bur': 'my',
'cat': 'ca',
'cha': 'ch',
'che': 'ce',
'nya': 'ny',
'chi': 'zh',
'zho': 'zh',
'chv': 'cv',
'cor': 'kw',
'cos': 'co',
'cre': 'cr',
'hrv': 'hr',
'ces': 'cs',
'cze': 'cs',
'dan': 'da',
'div': 'dv',
'nld': 'nl',
'dut': 'nl',
'dzo': 'dz',
'eng': 'en',
'epo': 'eo',
'est': 'et',
'ewe': 'ee',
'fao': 'fo',
'fij': 'fj',
'fin': 'fi',
'fra': 'fr',
'fre': 'fr',
'ful': 'ff',
'glg': 'gl',
'kat': 'ka',
'geo': 'ka',
'deu': 'de',
'ger': 'de',
'ell': 'el',
'gre': 'el',
'grn': 'gn',
'guj': 'gu',
'hat': 'ht',
'hau': 'ha',
'heb': 'he',
'her': 'hz',
'hin': 'hi',
'hmo': 'ho',
'hun': 'hu',
'ina': 'ia',
'ind': 'id',
'ile': 'ie',
'gle': 'ga',
'ibo': 'ig',
'ipk': 'ik',
'ido': 'io',
'isl': 'is',
'ice': 'is',
'ita': 'it',
'iku': 'iu',
'jpn': 'ja',
'jav': 'jv',
'kal': 'kl',
'kan': 'kn',
'kau': 'kr',
'kas': 'ks',
'kaz': 'kk',
'khm': 'km',
'kik': 'ki',
'kin': 'rw',
'kir': 'ky',
'kom': 'kv',
'kon': 'kg',
'kor': 'ko',
'kur': 'ku',
'kua': 'kj',
'lat': 'la',
'ltz': 'lb',
'lug': 'lg',
'lim': 'li',
'lin': 'ln',
'lao': 'lo',
'lit': 'lt',
'lub': 'lu',
'lav': 'lv',
'glv': 'gv',
'mkd': 'mk',
'mac': 'mk',
'mlg': 'mg',
'may': 'ms',
'msa': 'ms',
'mal': 'ml',
'mlt': 'mt',
'mri': 'mi',
'mao': 'mi',
'mar': 'mr',
'mah': 'mh',
'mon': 'mn',
'nau': 'na',
'nav': 'nv',
'nde': 'nd',
'nep': 'ne',
'ndo': 'ng',
'nob': 'nb',
'nno': 'nn',
'nor': 'no',
'iii': 'ii',
'nbl': 'nr',
'oci': 'oc',
'oji': 'oj',
'chu': 'cu',
'orm': 'om',
'ori': 'or',
'oss': 'os',
'pan': 'pa',
'pli': 'pi',
'per': 'fa',
'fas': 'fa',
'pol': 'pl',
'pus': 'ps',
'por': 'pt',
'que': 'qu',
'roh': 'rm',
'run': 'rn',
'ron': 'ro',
'rum': 'ro',
'rus': 'ru',
'san': 'sa',
'srd': 'sc',
'snd': 'sd',
'sme': 'se',
'smo': 'sm',
'sag': 'sg',
'srp': 'sr',
'gla': 'gd',
'sna': 'sn',
'sin': 'si',
'slk': 'sk',
'slo': 'sk',
'slv': 'sl',
'som': 'so',
'sot': 'st',
'spa': 'es',
'sun': 'su',
'swa': 'sw',
'ssw': 'ss',
'swe': 'sv',
'tam': 'ta',
'tel': 'te',
'tgk': 'tg',
'tha': 'th',
'tir': 'ti',
'bod': 'bo',
'tib': 'bo',
'tuk': 'tk',
'tgl': 'tl',
'tsn': 'tn',
'ton': 'to',
'tur': 'tr',
'tso': 'ts',
'tat': 'tt',
'twi': 'tw',
'tah': 'ty',
'uig': 'ug',
'ukr': 'uk',
'urd': 'ur',
'uzb': 'uz',
'ven': 've',
'vie': 'vi',
'vol': 'vo',
'wln': 'wa',
'cym': 'cy',
'wel': 'cy',
'wol': 'wo',
'fry': 'fy',
'xho': 'xh',
'yid': 'yi',
'yor': 'yo',
'zha': 'za',
'zul': 'zu'
};
25 changes: 21 additions & 4 deletions src/peliasDocGenerators.js
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,18 @@ function assignField(hierarchyElement, wofDoc) {

}

function addMultiLangAliases(wofDoc, name_langs) {
for (let lang in name_langs) {
for (let i = 0; i < name_langs[lang].length; i++) {
if (i === 0) {
wofDoc.setName(lang, name_langs[lang][i]);
} else {
wofDoc.setNameAlias(lang, name_langs[lang][i]);
}
}
}
}

// method that extracts the logic for Document creation. `hierarchy` is optional
function setupDocument(record, hierarchy) {
var wofDoc = new Document( 'whosonfirst', record.place_type, record.id );
Expand All @@ -73,10 +85,15 @@ function setupDocument(record, hierarchy) {
}

// index name aliases for all other records (where available)
else if (record.name_aliases.length) {
record.name_aliases.forEach(alias => {
wofDoc.setNameAlias('default', alias);
});
else {
if (record.name_aliases.length) {
record.name_aliases.forEach(alias => {
wofDoc.setNameAlias('default', alias);
});
}
if (record.name_langs) {
addMultiLangAliases(wofDoc, record.name_langs);
}
}
}
wofDoc.setCentroid({ lat: record.lat, lon: record.lon });
Expand Down
Loading