From 2135ecc8c2e9422e60c4d29b88f4971cb72608a5 Mon Sep 17 00:00:00 2001 From: Oskar Nyberg Date: Fri, 10 May 2024 14:29:56 +0200 Subject: [PATCH] Remove map related code in geo data scripts --- gui/scripts/extract-geo-data.py | 473 ------------------ gui/scripts/fetch-relay-locations.py | 126 +++++ ...to-app.py => integrate-relay-locations.py} | 54 -- gui/scripts/requirements.txt | 17 - scripts/localization | 20 +- 5 files changed, 128 insertions(+), 562 deletions(-) delete mode 100755 gui/scripts/extract-geo-data.py create mode 100755 gui/scripts/fetch-relay-locations.py rename gui/scripts/{integrate-into-app.py => integrate-relay-locations.py} (61%) diff --git a/gui/scripts/extract-geo-data.py b/gui/scripts/extract-geo-data.py deleted file mode 100755 index 7c6479f2a8fd..000000000000 --- a/gui/scripts/extract-geo-data.py +++ /dev/null @@ -1,473 +0,0 @@ -#!/usr/bin/env python3 -""" -This module forms a geo json of highly populated cities in the world -""" - -import os -from os import path -import json -import urllib.request -from subprocess import Popen, PIPE -from polib import POFile, POEntry -import colorful as c -from terminaltables import AsciiTable - -import fiona -# import order is important, see https://github.com/Toblerity/Shapely/issues/553 -from shapely.geometry import shape, mapping - -SCRIPT_DIR = path.dirname(path.realpath(__file__)) - -# The directory with the existing localizations content -LOCALE_DIR = path.normpath(path.join(SCRIPT_DIR, "../locales")) - -# The output directory for the generated content -OUT_DIR = path.join(SCRIPT_DIR, "out") - -# the directory with the generated localizations content -LOCALE_OUT_DIR = path.join(OUT_DIR, "locales") - -# Relay locations gettext catalogue template filename (.pot) -RELAY_LOCATIONS_POT_FILENAME = "relay-locations.pot" - -# Relay locations gettext catalogue filename (.po) -RELAY_LOCATIONS_PO_FILENAME = "relay-locations.po" - -# Custom locale mapping between the identifiers in the app and Natural Earth datasets -LOCALE_MAPPING = { - # "zh" in Natural Earth Data refers to simplified chinese - "zh-CN": "zh" -} - - -def extract_geometry(): - input_path = get_shape_path("ne_50m_admin_0_countries") - output_path = path.join(OUT_DIR, "geometry.json") - - features = [] - with fiona.open(input_path) as source: - for feat in source: - del feat["properties"] - geometry = feat["geometry"] - feat["bbox"] = shape(geometry).bounds - features.append(feat) - - my_layer = { - "type": "FeatureCollection", - "features": features - } - - with Popen( - ['geo2topo', '-q', '5e3', 'geometry=-', '-o', output_path], - stdin=PIPE, stdout=PIPE, stderr=PIPE - ) as subproc: - errors = subproc.communicate(input=json.dumps(my_layer).encode())[1] - if subproc.returncode == 0: - print(c.green("Extracted data to {}".format(output_path))) - else: - print(c.red("geo2topo exited with {}. {}".format(subproc.returncode, errors.decode().strip()))) - - -def extract_provinces_and_states_lines(): - input_path = get_shape_path("ne_50m_admin_1_states_provinces_lines") - output_path = path.join(OUT_DIR, "states-provinces-lines.json") - - features = [] - with fiona.open(input_path) as source: - for feat in source: - del feat["properties"] - geometry = feat["geometry"] - feat["bbox"] = shape(geometry).bounds - features.append(feat) - - my_layer = { - "type": "FeatureCollection", - "features": features - } - - with Popen( - ['geo2topo', '-q', '5e3', 'geometry=-', '-o', output_path], - stdin=PIPE, stdout=PIPE, stderr=PIPE - ) as subproc: - errors = subproc.communicate(input=json.dumps(my_layer).encode())[1] - if subproc.returncode == 0: - print(c.green("Extracted data to {}".format(output_path))) - else: - print(c.red("geo2topo exited with {}. {}".format(subproc.returncode, errors.decode().strip()))) - - -def sort_pofile_entries(pofile): - pofile.sort(key=lambda o: o.msgid_with_context) - - -def extract_relay_translations(): - try: - response = request_relays() - except Exception as e: - print(c.red("Failed to fetch the relays list: {}".format(e))) - raise - - locations = response.get("locations") - countries = structure_locations(locations) - - extract_relay_locations_pot(countries) - translate_relay_locations(countries) - - -def structure_locations(locations): - countries = {} - - for location_key in locations: - location = locations.get(location_key) - country_name = location.get("country") - city_name = location.get("city") - - if not "-" in location_key: - print("Location key incorrectly formatted: {}".format(location_key)) - continue - - country_code, city_code = location_key.split("-") - - if country_name is None: - print("Country name missing for {}".format(location_key)) - continue - - if city_name is None: - print("City name missing for {}".format(location_key)) - continue - - if country_code not in countries: - countries[country_code] = {"name": country_name, "cities": {}} - - country = countries[country_code] - cities = country["cities"] - if location_key != "se-bet": - if city_code not in cities: - cities[city_code] = city_name - else: - print("There are multiple entries for {} in {}".format(city_name, country_name)) - - return countries - - -def extract_relay_locations_pot(countries): - pot = POFile(encoding='utf-8', check_for_duplicates=True) - pot.metadata = {"Content-Type": "text/plain; charset=utf-8"} - output_path = path.join(LOCALE_OUT_DIR, RELAY_LOCATIONS_POT_FILENAME) - - print("Generating {}".format(output_path)) - - for country_code in countries: - country = countries[country_code] - entry = POEntry( - msgid=country["name"], - msgstr="", - comment=country_code.upper() - ) - pot.append(entry) - - cities = country["cities"] - for city_code in cities: - entry = POEntry( - msgid=cities[city_code], - msgstr="", - comment="{} {}".format(country_code.upper(), city_code.upper()) - ) - - try: - pot.append(entry) - except ValueError as err: - print(c.orange("Cannot add an entry: {}".format(err))) - - pot.save(output_path) - - -def prepare_stats_table_column(item): - (locale, hits, misses) = item - total = hits + misses - hits_ratio = round(float(hits) / total * 100, 2) if total > 0 else 0 - - misses_column = c.orange(str(misses)) if misses > 0 else c.green(str(misses)) - hits_column = c.green(str(hits)) - ratio_column = c.green(str(hits_ratio) + "%") if hits_ratio >= 80 else c.orange(str(hits_ratio)) - total_column = str(total) - - return (locale, hits_column, misses_column, ratio_column, total_column) - -def print_stats_table(title, data): - header = ("Locale", "Hits", "Misses", "% translated", "Total") - color_data = list(map(prepare_stats_table_column, data)) - - table = AsciiTable([header] + color_data) - table.title = title - - for i in range(1, 5): - table.justify_columns[i] = 'center' - - print("") - print(table.table) - print("") - - -def translate_relay_locations(countries): - """ - A helper function to generate the relay-locations.po with automatic translations for each - corresponding locale. - - The `countries` argument is an array that's contained within the "countries" key of the - relay location list. - """ - - country_translator = CountryTranslator() - city_translator = CityTranslator() - stats = [] - - for locale in os.listdir(LOCALE_DIR): - locale_dir = path.join(LOCALE_DIR, locale) - if path.isdir(locale_dir): - print("Generating {}".format(path.join(locale, RELAY_LOCATIONS_PO_FILENAME))) - (hits, misses) = translate_single_relay_locations(country_translator, city_translator, countries, locale) - stats.append((locale, hits, misses)) - - print_stats_table("Relay location translations", stats) - - -def translate_single_relay_locations(country_translator, city_translator, countries, locale): - """ - A helper function to generate the relay-locations.po for the given locale. - - The `countries` argument is an array value that's contained within the "countries" key of the - relay location list. - """ - - po = POFile(encoding='utf-8', check_for_duplicates=True) - po.metadata = {"Content-Type": "text/plain; charset=utf-8"} - locale_out_dir = path.join(LOCALE_OUT_DIR, locale) - output_path = path.join(locale_out_dir, RELAY_LOCATIONS_PO_FILENAME) - - hits = 0 - misses = 0 - - if not path.exists(locale_out_dir): - os.makedirs(locale_out_dir) - - for country_code in countries: - country = countries[country_code] - country_name = country["name"] - - translated_country_name = country_translator.translate(locale, country_code) - # Default to empty string if no translation was found - if translated_country_name is not None: - hits += 1 - else: - translated_country_name = "" - misses += 1 - - # translate country - entry = POEntry( - msgid=country_name, - msgstr=translated_country_name, - comment=country_code.upper() - ) - po.append(entry) - - # translate cities - cities = country["cities"] - for city_code in cities: - city_name = cities[city_code] - - # Make sure to append the US state back to the translated name of the city - if country_code == "us": - split = city_name.rsplit(",", 2) - translated_name = city_translator.translate(locale, split[0].strip()) - - if translated_name is not None and len(split) > 1: - translated_name = "{}, {}".format(translated_name, split[1].strip()) - else: - translated_name = city_translator.translate(locale, city_name) - - # Default to empty string if no translation was found - found_translation = translated_name is not None - if found_translation: - hits += 1 - else: - translated_name = "" - misses += 1 - - entry = POEntry( - msgid=city_name, - msgstr=translated_name, - comment="{} {}".format(country_code.upper(), city_code.upper()) - ) - - try: - po.append(entry) - except ValueError as err: - print(c.orange("Cannot add an entry: {}".format(err))) - - po.save(output_path) - - return (hits, misses) - - -### HELPERS ### - -class CountryTranslator: - """ - This class provides facilities for translating countries - """ - - def __init__(self): - self.dataset = self.__build_index() - - def translate(self, locale, iso_a2): - """ - Lookup the countries dataset for the country matching by ISO A2 code - - When there is a match, the function looks for the translation using the given locale or using - the language component of it. - - Returns None when either there is no match or there is no translation for the matched city. - """ - props = self.dataset.get(iso_a2.upper()) - - if props is not None: - name_key = "name_" + map_locale(locale) - return props.get(name_key) - - return None - - - def __build_index(self): - """ - Private helper to build the index for the geo dataset, that can be used to speed up the - translations lookup. - """ - shape_path = get_shape_path("ne_50m_admin_0_countries") - dataset = dict() - - # build a hash map of the entire datasource in memory - with fiona.open(shape_path, "r") as source: - for feat in source: - props = lower_dict_keys(feat["properties"]) - - iso_a2 = props.get("iso_a2") - if iso_a2 is not None: - dataset[iso_a2.upper()] = props - - return dataset - - -class CityTranslator: - """ - This class provides facilities for translating places from English. - """ - - def __init__(self): - self.dataset = self.__build_index() - - def translate(self, locale, english_name): - """ - Lookup the populated places dataset for the city matching by name, par name or - name representation in ASCII. - - When there is a match, the function looks for the translation using the given locale or using - the language component of it. - - Returns None when either there is no match or there is no translation for the matched city. - """ - props = self.dataset.get(english_name) - - if props is not None: - name_key = "name_" + map_locale(locale) - return props.get(name_key) - - return None - - def __build_index(self): - """ - Private helper to build the index for the geo dataset, that can be used to speed up the - translations lookup. - """ - shape_path = get_shape_path("ne_10m_populated_places") - dataset = dict() - - # build a hash map of the entire datasource in memory - with fiona.open(shape_path, "r") as source: - for feat in source: - props = lower_dict_keys(feat["properties"]) - - name = props.get("name") - - # namepar works for "Wien" - namepar = props.get("namepar") - - # use nameascii to match "Sao Paolo" - nameascii = props.get("nameascii") - - if name is not None: - dataset[name] = props - - if namepar is not None: - dataset[namepar] = props - - if nameascii is not None: - dataset[nameascii] = props - - return dataset - - -def get_shape_path(dataset_name): - return path.join(SCRIPT_DIR, dataset_name, dataset_name + ".shp") - - -def lower_dict_keys(input_dict): - return dict((k.lower(), v) for k, v in input_dict.items()) - - -def convert_locale_ident(locale_ident): - """ - Return the locale identifie converting dashes to underscores. - - Example: en-US becomes en_US - """ - return locale_ident.replace("-", "_") - - -def map_locale(locale_ident): - """ - Map the locale in Natural Earth Data with the locale in the app and Crowdin - """ - if locale_ident in LOCALE_MAPPING: - locale_override = LOCALE_MAPPING[locale_ident] - else: - locale_override = locale_ident - - return convert_locale_ident(locale_override) - - -def request_relays(): - request = urllib.request.Request("https://api.mullvad.net/app/v1/relays") - with urllib.request.urlopen(request) as connection: - return json.load(connection) - - -# Program main() - -def main(): - # ensure output path exists - if not path.exists(OUT_DIR): - os.makedirs(OUT_DIR) - - # ensure locales output path exists - if not path.exists(LOCALE_OUT_DIR): - os.makedirs(LOCALE_OUT_DIR) - - # extract geo data - extract_geometry() - extract_provinces_and_states_lines() - - # extract translations - extract_relay_translations() - -main() diff --git a/gui/scripts/fetch-relay-locations.py b/gui/scripts/fetch-relay-locations.py new file mode 100755 index 000000000000..eeea51d56d48 --- /dev/null +++ b/gui/scripts/fetch-relay-locations.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python3 +""" +This module adds relay location city and country names to relay-locations.pot +""" + +import os +from os import path +import json +import urllib.request +from polib import POFile, POEntry +import colorful as c + +SCRIPT_DIR = path.dirname(path.realpath(__file__)) + +# The output directory for the generated content +OUT_DIR = path.join(SCRIPT_DIR, "out") + +# the directory with the generated localizations content +LOCALE_OUT_DIR = path.join(OUT_DIR, "locales") + +# Relay locations gettext catalogue template filename (.pot) +RELAY_LOCATIONS_POT_FILENAME = "relay-locations.pot" + + +def extract_relay_translations(): + try: + response = request_relays() + except Exception as e: + print(c.red("Failed to fetch the relays list: {}".format(e))) + raise + + locations = response.get("locations") + countries = structure_locations(locations) + + extract_relay_locations_pot(countries) + + +def structure_locations(locations): + countries = {} + + for location_key in locations: + location = locations.get(location_key) + country_name = location.get("country") + city_name = location.get("city") + + if not "-" in location_key: + print("Location key incorrectly formatted: {}".format(location_key)) + continue + + country_code, city_code = location_key.split("-") + + if country_name is None: + print("Country name missing for {}".format(location_key)) + continue + + if city_name is None: + print("City name missing for {}".format(location_key)) + continue + + if country_code not in countries: + countries[country_code] = {"name": country_name, "cities": {}} + + country = countries[country_code] + cities = country["cities"] + if location_key != "se-bet": + if city_code not in cities: + cities[city_code] = city_name + else: + print("There are multiple entries for {} in {}".format(city_name, country_name)) + + return countries + + +def extract_relay_locations_pot(countries): + pot = POFile(encoding='utf-8', check_for_duplicates=True) + pot.metadata = {"Content-Type": "text/plain; charset=utf-8"} + output_path = path.join(LOCALE_OUT_DIR, RELAY_LOCATIONS_POT_FILENAME) + + print("Generating {}".format(output_path)) + + for country_code in countries: + country = countries[country_code] + entry = POEntry( + msgid=country["name"], + msgstr="", + comment=country_code.upper() + ) + pot.append(entry) + + cities = country["cities"] + for city_code in cities: + entry = POEntry( + msgid=cities[city_code], + msgstr="", + comment="{} {}".format(country_code.upper(), city_code.upper()) + ) + + try: + pot.append(entry) + except ValueError as err: + print(c.orange("Cannot add an entry: {}".format(err))) + + pot.save(output_path) + + +def request_relays(): + request = urllib.request.Request("https://api.mullvad.net/app/v1/relays") + with urllib.request.urlopen(request) as connection: + return json.load(connection) + + +# Program main() + +def main(): + # ensure output path exists + if not path.exists(OUT_DIR): + os.makedirs(OUT_DIR) + + # ensure locales output path exists + if not path.exists(LOCALE_OUT_DIR): + os.makedirs(LOCALE_OUT_DIR) + + # extract translations + extract_relay_translations() + +main() diff --git a/gui/scripts/integrate-into-app.py b/gui/scripts/integrate-relay-locations.py similarity index 61% rename from gui/scripts/integrate-into-app.py rename to gui/scripts/integrate-relay-locations.py index 9cba7e8dd264..e99b6a211e47 100644 --- a/gui/scripts/integrate-into-app.py +++ b/gui/scripts/integrate-relay-locations.py @@ -21,36 +21,9 @@ # The directory with the generated localizations content GENERATED_TRANSLATIONS_PATH = path.join(GENERATED_CONTENT_OUTPUT_PATH, "locales") -# The directory with the app's geo assets -APP_GEO_ASSETS_PATH = path.realpath(path.join(SCRIPT_DIR, "../assets/geo")) - # The directory with the existing app localizations APP_TRANSLATIONS_PATH = path.realpath(path.join(SCRIPT_DIR, "../locales")) -# Geo assets for copying from generated content folder into the app folder -GEO_ASSETS_TO_COPY = [ - "geometry.json", - "geometry.rbush.json", - "states-provinces-lines.json", - "states-provinces-lines.rbush.json", -] - -# The filenames of gettext catalogues that should be merged using msgcat -TRANSLATIONS_TO_MERGE = [ - "relay-locations.po" -] - - -def copy_geo_assets(): - for f in GEO_ASSETS_TO_COPY: - src = path.join(GENERATED_CONTENT_OUTPUT_PATH, f) - dst = path.join(APP_GEO_ASSETS_PATH, f) - - print("Copying {} to {}".format(src, dst)) - - shutil.copyfile(src, dst) - - def merge_relay_locations_catalogue_template(): existing_pot_file = path.join(APP_TRANSLATIONS_PATH, RELAY_LOCATIONS_POT_FILENAME) generated_pot_file = path.join(GENERATED_TRANSLATIONS_PATH, RELAY_LOCATIONS_POT_FILENAME) @@ -58,28 +31,6 @@ def merge_relay_locations_catalogue_template(): merge_gettext_catalogues(existing_pot_file, generated_pot_file) -def copy_and_merge_translations(): - for f in os.listdir(GENERATED_TRANSLATIONS_PATH): - src = path.join(GENERATED_TRANSLATIONS_PATH, f) - dst = path.join(APP_TRANSLATIONS_PATH, f) - - if path.isdir(src): - merge_single_locale_folder(src, dst) - - -def merge_single_locale_folder(src, dst): - for f in os.listdir(src): - src_po = path.join(src, f) - dst_po = path.join(dst, f) - - if f in TRANSLATIONS_TO_MERGE: - # merge ../locales/*/file.po with ./out/locales/*/file.po - # use existing translation to resolve conflicts - merge_gettext_catalogues(dst_po, src_po) - else: - print(c.orange("Unexpected file: {}".format(src_po))) - - def merge_gettext_catalogues(existing_catalogue_file, generated_catalogue_file): if path.exists(existing_catalogue_file): args = ( @@ -120,11 +71,6 @@ def run_program(*args): # Program main() def main(): - if not path.exists(APP_GEO_ASSETS_PATH): - os.makedirs(APP_GEO_ASSETS_PATH) - - copy_geo_assets() merge_relay_locations_catalogue_template() - copy_and_merge_translations() main() diff --git a/gui/scripts/requirements.txt b/gui/scripts/requirements.txt index 1b81d6340842..4a09a5bf342a 100644 --- a/gui/scripts/requirements.txt +++ b/gui/scripts/requirements.txt @@ -1,30 +1,13 @@ -Fiona==1.8.13.post1 \ - --hash=sha256:1a432bf9fd56f089256c010da009c90d4a795c531a848132c965052185336600 \ - --hash=sha256:79c3b80e00c9d055d20aead5d74319f54cdd1384e0d9e1a9e67446da2d74d89c \ - --hash=sha256:923a64bded457adee795b4f926b8cbb87d58bbafaabded77bc1d47abb2bba5c6 -Shapely==1.7.1 \ - --hash=sha256:1641724c1055459a7e2b8bbe47ba25bdc89554582e62aec23cb3f3ca25f9b129 \ - --hash=sha256:182716ffb500d114b5d1b75d7fd9d14b7d3414cef3c38c0490534cc9ce20981a \ - --hash=sha256:35be1c5d869966569d3dfd4ec31832d7c780e9df760e1fe52131105685941891 \ - --hash=sha256:4f3c59f6dbf86a9fc293546de492f5e07344e045f9333f3a753f2dda903c45d1 \ - --hash=sha256:6871acba8fbe744efa4f9f34e726d070bfbf9bffb356a8f6d64557846324232b polib==1.1.0 \ --hash=sha256:93b730477c16380c9a96726c54016822ff81acfa553977fdd131f2b90ba858d7 \ --hash=sha256:fad87d13696127ffb27ea0882d6182f1a9cf8a5e2b37a587751166c51e5a332a colorful==0.5.4 \ --hash=sha256:86848ad4e2eda60cd2519d8698945d22f6f6551e23e95f3f14dfbb60997807ea \ --hash=sha256:8d264b52a39aae4c0ba3e2a46afbaec81b0559a99be0d2cfe2aba4cf94531348 -terminaltables==3.1.0 \ - --hash=sha256:f3eb0eb92e3833972ac36796293ca0906e998dc3be91fbe1f8615b331b853b81 ## The following requirements were added by pip freeze: attrs==19.3.0 \ --hash=sha256:08a96c641c3a74e44eb59afb61a24f2cb9f4d7188748e76ba4bb5edfa3cb7d1c \ --hash=sha256:f7b7ce16570fe9965acd6d30101a28f62fb4a7f9e926b3bbc9b61f8b04247e72 -cairocffi==1.1.0 \ - --hash=sha256:f1c0c5878f74ac9ccb5d48b2601fcc75390c881ce476e79f4cfedd288b1b05db -CairoSVG==2.5.1 \ - --hash=sha256:f1ff02625520493eafb5695d987f69544555524bb0f95695b9ddd3f9dc7d29d5 \ - --hash=sha256:bfa0deea7fa0b9b2f29e41b747a915c249dbca731a4667c2917e47ff96e773e0 cffi==1.14.0 \ --hash=sha256:14491a910663bf9f13ddf2bc8f60562d6bc5315c1f09c704937ef17293fb85b0 \ --hash=sha256:2089ed025da3919d2e75a4d963d008330c96751127dd6f73c8dc0c65041b4c26 \ diff --git a/scripts/localization b/scripts/localization index f1a8663acc70..0551091e609b 100755 --- a/scripts/localization +++ b/scripts/localization @@ -42,26 +42,10 @@ function sync_localizations { function update_relay_locations_pot { log_header "Retrieving relay locations from server list and translating by using map data" pushd ../gui/scripts - # Download geo data - curl -L -O https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/50m/cultural/ne_50m_admin_0_countries.zip - curl -L -O https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/50m/cultural/ne_50m_admin_1_states_provinces_lines.zip - curl -L -O https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_populated_places.zip - - unzip ne_50m_admin_0_countries.zip -d ne_50m_admin_0_countries/ - unzip ne_50m_admin_1_states_provinces_lines.zip -d ne_50m_admin_1_states_provinces_lines/ - unzip ne_10m_populated_places.zip -d ne_10m_populated_places/ # Add translations from geo data - python3 extract-geo-data.py - python3 integrate-into-app.py - - # Remove geo data - rm ne_10m_populated_places.zip \ - ne_50m_admin_0_countries.zip \ - ne_50m_admin_1_states_provinces_lines.zip - rm -r ne_10m_populated_places ne_50m_admin_0_countries ne_50m_admin_1_states_provinces_lines - - git restore ../assets + python3 fetch-relay-locations.py + python3 integrate-relay-locations.py popd }