From a3f2849caff3616ae6d6847a601e8f6df60c207e Mon Sep 17 00:00:00 2001 From: Joxit Date: Wed, 15 May 2019 19:03:48 +0200 Subject: [PATCH] feat(multi-lang): Add index for all wof languages (when differents to default language) --- src/components/extractFields.js | 23 ++++++++++++++++++++++- src/peliasDocGenerators.js | 15 +++++++++++---- test/components/extractFieldsTest.js | 4 ++++ test/readStreamTest.js | 3 +++ 4 files changed, 40 insertions(+), 5 deletions(-) diff --git a/src/components/extractFields.js b/src/components/extractFields.js index e1c4dd89..ada10a69 100644 --- a/src/components/extractFields.js +++ b/src/components/extractFields.js @@ -1,6 +1,7 @@ const through2 = require('through2'); const _ = require('lodash'); const util = require('util'); +const iso3166 = require('iso3166-1'); // hierarchy in importance-descending order of population fields const population_hierarchy = [ @@ -26,6 +27,8 @@ const NAME_ALIAS_FIELDS = [ 'label:%s_x_preferred' ]; +const wofNamesRegex = /name:[a-z]{3}_x_preferred/; + // this function is used to verify that a US county QS altname is available function isUsCounty(base_record, wof_country, qs_a2_alt) { return 'US' === wof_country && @@ -131,6 +134,22 @@ function getNameAliases(properties) { return concatArrayFields(properties, nameFields); } +function getMultiLangNames(defaultName, properties) { + return Object.keys(properties) + .filter(key => wofNamesRegex.test(key)) // get only name:.* keys + .map(key => { + return { + key: key.substring(5, 8).toUpperCase(), // get the iso part of the key name:iso_x_preferred + value: properties[key] + .filter(name => !defaultName || defaultName.indexOf(name) < 0) // remove duplicate elements found in default name + }; + }) // + .filter(({ key, value }) => value.length > 0 && iso3166.is3(key)) // filter correct iso 3 keys + .reduce((langs, { key, value }) => + _.set(langs, iso3166.to2(key).toLowerCase(), value[0]), {} + ); // create the lang/value map +} + function getAbbreviation(properties) { if (properties['wof:placetype'] === 'country' && properties['wof:country']) { return properties['wof:country']; @@ -167,10 +186,12 @@ function getHierarchies(id, properties) { */ module.exports.create = function map_fields_stream() { return through2.obj(function(json_object, enc, callback) { + const default_names = getName(json_object.properties); var record = { id: json_object.id, - name: getName(json_object.properties), + name: default_names, name_aliases: getNameAliases(json_object.properties), + name_langs: getMultiLangNames(default_names, json_object.properties), abbreviation: getAbbreviation(json_object.properties), place_type: json_object.properties['wof:placetype'], lat: getLat(json_object.properties), diff --git a/src/peliasDocGenerators.js b/src/peliasDocGenerators.js index 1404b3ce..b3e3e141 100644 --- a/src/peliasDocGenerators.js +++ b/src/peliasDocGenerators.js @@ -73,10 +73,17 @@ function setupDocument(record, hierarchy) { } // index name aliases for all other records (where available) - else if (record.name_aliases.length) { - record.name_aliases.forEach(alias => { - wofDoc.setNameAlias('default', alias); - }); + else { + if (record.name_aliases.length) { + record.name_aliases.forEach(alias => { + wofDoc.setNameAlias('default', alias); + }); + } + if (record.name_langs) { + for (let lang in record.name_langs) { + wofDoc.setName(lang, record.name_langs[lang]); + } + } } } wofDoc.setCentroid({ lat: record.lat, lon: record.lon }); diff --git a/test/components/extractFieldsTest.js b/test/components/extractFieldsTest.js index 86ed100c..eeb0e2ff 100644 --- a/test/components/extractFieldsTest.js +++ b/test/components/extractFieldsTest.js @@ -54,6 +54,7 @@ tape('readStreamComponents', function(test) { id: 12345, name: 'name 1', name_aliases: [], + name_langs: {}, place_type: 'place type 1', lat: 12.121212, lon: 21.212121, @@ -92,6 +93,7 @@ tape('readStreamComponents', function(test) { id: 23456, name: undefined, name_aliases: [], + name_langs: {}, place_type: undefined, lat: undefined, lon: undefined, @@ -130,6 +132,7 @@ tape('readStreamComponents', function(test) { id: 12345, name: 'name 1', name_aliases: [], + name_langs: {}, place_type: 'place type 1', lat: 12.121212, lon: 21.212121, @@ -504,6 +507,7 @@ tape('readStreamComponents', function(test) { id: 12345, name: 'wof:name value', name_aliases: [], + name_langs: {}, place_type: 'country', lat: undefined, lon: undefined, diff --git a/test/readStreamTest.js b/test/readStreamTest.js index 2457dfe1..dcb477a7 100644 --- a/test/readStreamTest.js +++ b/test/readStreamTest.js @@ -79,6 +79,7 @@ tape('readStream', (test) => { id: 123, name: 'name 1', name_aliases: [], + name_langs: {}, place_type: 'place type 1', lat: 12.121212, lon: 21.212121, @@ -94,6 +95,7 @@ tape('readStream', (test) => { id: 456, name: 'name 2', name_aliases: [], + name_langs: {}, place_type: 'place type 2', lat: 13.131313, lon: 31.313131, @@ -234,6 +236,7 @@ tape('readStream', (test) => { id: 421302191, name: 'name 421302191', name_aliases: [], + name_langs: {}, abbreviation: undefined, place_type: undefined, lat: 45.240295,