diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 1167191a..564dfd7d 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -2,5 +2,46 @@ include: - project: 'QubesOS/qubes-continuous-integration' file: '/gitlab-website.yml' +stages: + - prepare + - build + build:website: extends: .website + +update-transifex: + tags: + - docker + stage: prepare + rules: + - if: '$TX_TOKEN && $GITHUB_KEY' + when: always + - when: never + artifacts: + expire_in: 7 days + when: always + paths: + - site.tar.gz + variables: + GIT_SUBMODULE_STRATEGY: normal + GIT_AUTHOR_NAME: translation bot + GIT_AUTHOR_EMAIL: builder-bot@qubes-os.org + GIT_COMMITTER_NAME: translation bot + GIT_COMMITTER_EMAIL: builder-bot@qubes-os.org + PAGES_REPO_NWO: QubesOS/qubesos.github.io + TRANSLATED_LANGS: de fr es + LANG: C.UTF-8 + before_script: + - mkdir -p $HOME/.ssh && echo "$GITHUB_KEY" > $HOME/.ssh/id_ed25519 && chmod 700 $HOME/.ssh/id_ed25519 + - echo "github.com,140.82.121.4 ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAq2A7hRGmdnm9tUDbO9IDSwBK6TbQa+PXYPCPy6rbTrTtw7PHkccKrpp0yVhp5HdEIcKr6pLlVDBfOLX9QUsyCOV0wzfjIJNlGEYsdlLJizHhbn2mUjvSAHQqZETYP81eFzLQNnPHt4EVVUh7VfDESU84KezmD5QlWpXLmvU31/yMf+Se8xhHTvKSCZIFImWwoG6mbUoWf9nzpIoaSjB+weqqUUmpaaasXVal72J+UX2B+2RPW3RcT0eOzQgqlJL3RKrTJvdsjE3JEAvGq3lGHSZXy28G3skua2SmVi/w4yCE6gbODqnTWlg7+wC604ydGXA8VJiS5ap43JXiUFFAaQ==" >> $HOME/.ssh/known_hosts + - export PATH=$PATH:$HOME/bin + - sudo dnf install -y python3-pycurl python3-PyYAML python3-jsonschema python3-certifi python3-attrs /usr/bin/bundle rubygem-jekyll rubygem-nokogiri rubygem-concurrent-ruby ruby-devel gcc-c++ transifex-client crudini python3-pycurl python3-pyrsistent + - pip install python-frontmatter + - export NOKOGIRI_USE_SYSTEM_LIBRARIES=true + - gem install github-pages json html-proofer + - git submodule update --init + script: + - _utils/transifex-push + - _utils/transifex-pull $TRANSLATED_LANGS + after_script: + - tar czf site.tar.gz _site diff --git a/.gitmodules b/.gitmodules index 7b7703fa..aa2d7ac7 100644 --- a/.gitmodules +++ b/.gitmodules @@ -10,3 +10,6 @@ [submodule "_hcl"] path = _hcl url = https://github.com/QubesOS/qubes-hcl +[submodule "_translated"] + path = _translated + url = https://github.com/QubesOS/qubes-translated diff --git a/README.md b/README.md index 8d326fc6..10751e9d 100644 --- a/README.md +++ b/README.md @@ -126,6 +126,68 @@ Please carefully read these guidelines before submitting a pull request. - [jQuery 1.7](http://api.jquery.com) - javascript helper library - [jQuery ToC MD Generator](https://github.com/dafi/tocmd-generator) - renders header menu on documentation section +Translation +------------ + +Documentation translation is done using Transifex platform: https://www.transifex.com/otf/qubes/ +The `_translated` directory should not be modified manually. Any manual change +there **will be overriden** with the content downloaded from Transifex. + +The `qubes-translated` repository is not signed and generally should not be +considered trusted for sensitive tasks. But the specific commit referenced from +this repository is validated to not interfere with English website. + +### Transifex integration details ### + +Most of the integration is automated. It is split into few parts: + +1. `_utils/transifex-push` script takes the source (English) content and + uploads to Transifex. The platform merges existing translations to the new + files, so unchanged parts that were translated before remain translated. + Transifex configuration is created from scratch here, to correctly handle + new/removed files. + +2. `_utils/transifex-pull` pulls translated content and places into + `_translated` submodule. Then a set of scripts in + `_utils/_translation_utils` perform various post processing steps, + including: + - validate syntax of retrieved files (if frontmatter is still correctly set etc) + - modify frontmatter settings (`permalink`, `lang`, `redirect_from` etc) to + match the page language + - adjust all internal links to point at pages in the same language + - run htmlproofer to verify if no broken links were introduced + + At the end, the script commit and push the new content to qubes-translated + repository. + +3. `_utils/update-translated` fetches new version of qubes-translated repo (its + master branch), verifies if any page doesn't try to subvert English version, + and if all is fine, makes a commit and push updated submodule (similar to + `_utils/update-submodules` script). + +The points 1 and 2 are running in Gitlab CI environment, without access to any +signing key and with push access only to qubes-translated repository. The third +point is running in a more trusted environment, with access to signing key and +push access to the main repository. + +### Language switcher ### + +The top level `_config.yml` file contains list of languages to be enabled. If +there is more than one (`en`), each page will have a language switch menu in +the top right corner. Only languages listed in `_config.yml` are visible in the +switcher, but there may be more available (accessing them require manually +changing language code in the URL). + +Each markdown file in the repo has `lang` and `ref` attributes (in its +frontmatter). `lang` attribute contains the language of this file +(should always be `en` outside of qubes-translated repository) and `ref` +contains a unique identifier of that page. Language switcher logic uses the +`ref` attribute to find all translations of given page. This allows translated +page to have different page name in URL, although we do not do this right now. + +`lang` and `ref` attributes are added with +`_utils/_translation_utils/prepare_for_translation.py` script. + Deprecated Documentation ------------------------ diff --git a/_config.yml b/_config.yml index 9a724600..4d5640b0 100644 --- a/_config.yml +++ b/_config.yml @@ -21,6 +21,14 @@ relative_permalinks: false permalink: /news/:year/:month/:day/:title/ excerpt_separator: "" +# enabled languages +# remember to create symlinks in _data/translation too +languages: +- en +- de +- fr +- es + plugins: - jekyll-redirect-from - jekyll-sitemap @@ -30,7 +38,7 @@ collections: output: true hcl: output: false - qubes-translated: + translated: output: true diff --git a/_data/includes.yml b/_data/includes.yml index bce18dbc..bfc71c79 100644 --- a/_data/includes.yml +++ b/_data/includes.yml @@ -11,6 +11,8 @@ icon: fa-code - text: Edit This Page icon: fa-code-fork + - text: Translate This Page + icon: fa-language - url: /security/ text: Report a Security Issue icon: fa-lock diff --git a/_data/translation/de b/_data/translation/de new file mode 120000 index 00000000..a25f53a6 --- /dev/null +++ b/_data/translation/de @@ -0,0 +1 @@ +../../_translated/de/_data/de \ No newline at end of file diff --git a/_data/translation/es b/_data/translation/es new file mode 120000 index 00000000..65518b27 --- /dev/null +++ b/_data/translation/es @@ -0,0 +1 @@ +../../_translated/es/_data/es \ No newline at end of file diff --git a/_data/translation/fr b/_data/translation/fr new file mode 120000 index 00000000..68f25cda --- /dev/null +++ b/_data/translation/fr @@ -0,0 +1 @@ +../../_translated/fr/_data/fr \ No newline at end of file diff --git a/_includes/doc-widget.html b/_includes/doc-widget.html index da2253f0..f45dc741 100644 --- a/_includes/doc-widget.html +++ b/_includes/doc-widget.html @@ -5,9 +5,14 @@ {% if page.lang == nil or page.lang == "en" %} {% assign master_edit = site.project_repo_path | append: "/qubes-doc/edit/master" | append: page.relative_path | remove_first: '_doc' %} {% assign master_blob = site.project_repo_path | append: "/qubes-doc/blob/master" | append: page.relative_path | remove_first: '_doc' %} + {% assign pagelang = "" %} + {% assign transifexresource = page.relative_path | replace: '_doc/', 'doc/' | remove: ".md" | replace: "/", "_" %} {% else %} - {% assign master_edit = site.project_repo_path | append: "/qubes-translated/edit/master" | append: page.relative_path | remove_first: '_qubes-translated' %} - {% assign master_blob = site.project_repo_path | append: "/qubes-translated/blob/master" | append: page.relative_path| remove_first: '_qubes-translated' %} + {% assign master_edit = site.project_repo_path | append: "/qubes-translated/edit/master" | append: page.relative_path | remove_first: '_translated' %} + {% assign master_blob = site.project_repo_path | append: "/qubes-translated/blob/master" | append: page.relative_path | remove_first: '_translated' %} + {% assign pagelang = page.lang %} + {% assign prefix = "_translated/" | append: pagelang | append: '/' %} + {% assign transifexresource = page.relative_path | remove_first: prefix | replace: '_doc/', 'doc/' | remove: ".md" | replace: "/", "_" %} {% endif %} {% for item in docs.links %} @@ -19,7 +24,13 @@ {% if item.icon == "fa-code" %} {% assign a_href = master_blob %} {% elsif item.icon == "fa-code-fork" %} + {% if lang != "" %} + + {% continue %} + {% endif %} {% assign a_href = master_edit %} + {% elsif item.icon == "fa-language" %} + {% assign a_href = "https://www.transifex.com/otf/qubes/translate/#" | append: pagelang | append: "/" | append: transifexresource %} {% else %} {% assign a_href = item.url %} {% endif %} diff --git a/_includes/header.html b/_includes/header.html index 86f98f45..37e3521b 100644 --- a/_includes/header.html +++ b/_includes/header.html @@ -10,25 +10,37 @@ - - {% assign langmenu = false %} + + {% if site.languages.size > 1 %} + {% assign langmenu = true %} + {% else %} + {% assign langmenu = false %} + {% endif %} {% if page.layout == nil or page.collection == 'posts' %} {% assign langmenu = false %} {% endif %} {% if langmenu %} - {% assign posts = site.pages | concat: site.doc | concat: site.qubes-translated | where:'ref', page.ref | sort: 'lang' %} + + {% assign page_all_langs = site.pages | concat: site.doc | concat: site.translated | where:'ref', page.ref | sort: 'lang' %}
- {% for post in posts %} - {{ post.lang }} + {% for translated in page_all_langs %} + {% if site.languages contains translated.lang %} + {{ translated.lang }} + {% endif %} {% endfor %}
{% endif %} - + {% if page.lang == nil or page.lang == "" or page.lang == "en" %} + {% assign linktohome = "/" %} + {% else %} + {% assign linktohome = "/" | append: page.lang | append: "/" %} + {% endif %} + Qubes OS Project Qubes OS @@ -46,8 +58,8 @@
- {% for post in posts %} - {{ post.lang }} + {% for translated in page_all_langs %} + {{ translated.lang }} {% endfor %}
diff --git a/_includes/team.html b/_includes/team.html index 06c3fef8..c3fea148 100644 --- a/_includes/team.html +++ b/_includes/team.html @@ -6,7 +6,7 @@ {% assign emeritus = team-page | where_exp: "item", "item.htmlsection == 'emeritus'" | first %} {% assign community = team-page | where_exp: "item", "item.htmlsection == 'community'" | first %} {% assign team_link = lang | append: "/team/" %} -{% assign teams = site.pages | concat: site.qubes-translated | where:'permalink', team_link %} +{% assign teams = site.pages | concat: site.translated | where:'permalink', team_link %} {% if teams.size == 0 %} {% assign team_link = "/team/" %} {% endif %} diff --git a/_translated b/_translated new file mode 160000 index 00000000..03e12c91 --- /dev/null +++ b/_translated @@ -0,0 +1 @@ +Subproject commit 03e12c911da1c0a122db5373fddf11e37c282d3f diff --git a/_utils/_translation_utils/COUNTER.txt b/_utils/_translation_utils/COUNTER.txt new file mode 100644 index 00000000..f37b177f --- /dev/null +++ b/_utils/_translation_utils/COUNTER.txt @@ -0,0 +1 @@ +current counter: 251 diff --git a/_utils/_translation_utils/check_all_langs.sh b/_utils/_translation_utils/check_all_langs.sh new file mode 100644 index 00000000..3755042c --- /dev/null +++ b/_utils/_translation_utils/check_all_langs.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# to be run from the git root +# $1 is directory where translated files reside and language needs to be added to internal urls +# TODO param check + +set -e + +echo "================================= build site ==================================" +#read b +bundle exec jekyll b + +all_ok=true +echo "================================= run htmlproofer ===============================" +htmlproofer ./_site --disable-external --checks-to-ignore ImageCheck --file-ignore "./_site/video-tours/index.html,./_site/.*/video-tours/index.html" --url-ignore "/qubes-issues/" --log-level debug 2&> /tmp/html.output || all_ok=false + +# exit here if all is ok +if $all_ok; then + echo 'All checks passed!' + exit +fi + +echo "================================== as a last resort in case of errors process html proofer errors =================================" +python3 _utils/_translation_utils/postprocess_htmlproofer.py /tmp/html.output "$1" + +echo "================================= build the site and run htmlproofer ====================================" +rm -rf ./_site/ +bundle exec jekyll b +htmlproofer ./_site --disable-external --checks-to-ignore ImageCheck --file-ignore "./_site/video-tours/index.html,./_site/.*/video-tours/index.html" --url-ignore "/qubes-issues/" --log-level debug diff --git a/_utils/_translation_utils/merge_md_heading_ids.py b/_utils/_translation_utils/merge_md_heading_ids.py new file mode 100644 index 00000000..df82153b --- /dev/null +++ b/_utils/_translation_utils/merge_md_heading_ids.py @@ -0,0 +1,213 @@ +#!/usr/bin/python3 +# This is a script provided by TokiDev +# https://github.com/tokideveloper/langswitch-prototype/blob/master/_utils/merge_md_heading_ids.py + +import sys +import re +import subprocess + + +def get_yaml_front_matter(gfm_lines): + counter = 0 + start = 0 + end = 0 + for i in range(len(gfm_lines)): + if gfm_lines[i] == '---\n': + counter += 1 + if counter == 1: + start = i + elif counter == 2: + end = i + 1 + return gfm_lines[start:end], start, end + if counter == 1: + return gfm_lines[start:], start, len(gfm_lines) + # case counter == 0: + return [], 0, 0 + + + +def line_only_made_of(line, char): + length = len(line) + for i in range(length - 1): + if line[i] != char: + return False + return line[length - 1] == '\n' + + + +def render(gfm_lines): + p = subprocess.run(['kramdown'], stdout=subprocess.PIPE, input=''.join(gfm_lines), encoding='utf8') + if p.returncode != 0: + return None + return p.stdout.splitlines(1) + + + +def look_for_headline(rendered_html_lines, headline_id): + for l in range(len(rendered_html_lines)): + x = re.search('', rendered_html_lines[l]) + if x is None: + continue + c = x.start() + if c is None: + continue + else: + return l, c + return None + + + +def extract_headline_id(rendered_html_lines, l, c): + line = rendered_html_lines[l] + line = line[c:] + x = re.search(' 0: + return None + span = x.span() + line = line[(span[1] - span[0]):] + end = line.find('"') + line = line[:end] + return line + + + +def try_create_id(gfm_lines, line_number, this_line, next_line, rendered_html_lines, placeholder): + # save headline + saved_headline = gfm_lines[line_number] + + hl = None + + if this_line.startswith('#'): + # headline starting with '#' + gfm_lines[line_number] = '# ' + placeholder + '\n' + hl = look_for_headline(render(gfm_lines), placeholder) + elif len(next_line) >= 3 and (line_only_made_of(next_line, '=') or line_only_made_of(next_line, '-')): + # headline starting with '===' or '---' + gfm_lines[line_number] = placeholder + '\n' + hl = look_for_headline(render(gfm_lines), placeholder) + + # revert headline + gfm_lines[line_number] = saved_headline + + if hl is None: + return None + + hl_line, hl_col = hl + return extract_headline_id(rendered_html_lines, hl_line, hl_col) + + + +def generate_unique_placeholder(rendered_html_lines): + number = 0 + PREFIX = 'xq' + SUFFIX = 'z' + result = '' + while True: + result = PREFIX + str(number) + SUFFIX + solution_found = True + for line in rendered_html_lines: + if result in line: + number += 1 + solution_found = False + break + if solution_found: + break + # we assume that there will be at least one solution + return result + + + +def create_line_to_id_map(gfm_lines): + result = {} + gfm_lines2 = gfm_lines[:] + rendered_html_lines = render(gfm_lines) + + placeholder = generate_unique_placeholder(rendered_html_lines) + + # line-by-line: assume a headline + n = len(gfm_lines2) + for i in range(n): + this_line = gfm_lines2[i] + next_line = '' + if i < n - 1: + next_line = gfm_lines2[i + 1] + hid = try_create_id(gfm_lines2, i, this_line, next_line, rendered_html_lines, placeholder) + if hid is not None: + result[i] = hid + + return result + + + +def insert_ids_to_gfm_file(line_to_id_map, gfm_lines): + result = gfm_lines[:] + n = len(result) + for key, value in line_to_id_map.items(): + str_to_insert = '' + line = result[key] + if line.startswith('#'): + if key + 1 >= n: + result = result + [''] + result[key + 1] = str_to_insert + result[key + 1] + else: + if key + 2 >= n: + result = result + [''] + result[key + 2] = str_to_insert + result[key + 2] + return result + + + +def merge_ids_in_gfm_files(orig_gfm_lines, trl_gfm_lines): + # assuming that both files match line by line such that matching headlines are in the same lines + + # get yaml front matter from orig + orig_yaml_front_matter, orig_start, orig_end = get_yaml_front_matter(orig_gfm_lines) + + # get yaml front matter from trl + trl_yaml_front_matter, trl_start, trl_end = get_yaml_front_matter(trl_gfm_lines) + + # get body from trl + trl_body = trl_gfm_lines[trl_end:] + + # get body from orig + orig_body = orig_gfm_lines[orig_end:] + + # create line-to-id map + orig_line_to_id_map = create_line_to_id_map(orig_body) + + # insert ids + preresult = insert_ids_to_gfm_file(orig_line_to_id_map, trl_body) + + # create translated document with adapted body + result_trl_gfm = ''.join(trl_yaml_front_matter) + ''.join(preresult) + + return result_trl_gfm + + +def write_lines(content, filename): + with open(filename,'w') as f: + f.write(content) + +def read_lines(filename): + with open(filename, 'r') as f: + lines = f.readlines() + return lines + +def process_headers(mapping): + + for key, item in mapping.items(): + if not item.endswith('.yml'): + original_lines = read_lines(key) + translated_lines = read_lines(item) + # merge ids in gfm files + print(key) + + result = merge_ids_in_gfm_files(original_lines, translated_lines) + write_lines(result, item) + + diff --git a/_utils/_translation_utils/merge_md_heading_ids.rb b/_utils/_translation_utils/merge_md_heading_ids.rb new file mode 100644 index 00000000..1f59df76 --- /dev/null +++ b/_utils/_translation_utils/merge_md_heading_ids.rb @@ -0,0 +1,335 @@ +#!/usr/bin/env ruby + +require 'kramdown' + + + +YamlFrontMatter = Struct.new(:yaml_lines, :startl, :endl) + +def get_yaml_front_matter(gfm_lines) + counter = 0 + startl = 0 + endl = 0 + for i in 0..(gfm_lines.length - 1) + if gfm_lines[i] == "---\n" + counter += 1 + if counter == 1 + startl = i + elsif counter == 2 + endl = i + 1 + result = YamlFrontMatter.new + result.yaml_lines = gfm_lines[startl..(endl - 1)] + result.startl = startl + result.endl = endl + return result + end + end + end + if counter == 1 + result = YamlFrontMatter.new + result.yaml_lines = gfm_lines[startl..-1] + result.startl = startl + result.endl = gfm_lines.length + return result + end + # case counter == 0: + result = YamlFrontMatter.new + result.yaml_lines = [] + result.startl = 0 + result.endl = 0 + return result +end + + + +def line_only_made_of(line, char) + length = line.length + for i in 0..(length - 2) + if line[i] != char + return false + end + end + return line[length - 1] == "\n" +end + + + +def render(gfm_lines) + Kramdown::Document.new(gfm_lines.join).to_html.lines +end + + + +LineColumn = Struct.new(:l, :c) + +def look_for_headline(rendered_html_lines, headline_id) + for l in 0..(rendered_html_lines.length - 1) + m = rendered_html_lines[l].scan(//) + if m.length > 0 + c = rendered_html_lines[l].index(m[0]) + result = LineColumn.new + result.l = l + result.c = c + return result + end + end + return nil +end + + + +def extract_headline_id(rendered_html_lines, l, c) + line = rendered_html_lines[l] + line = line[c..-1] + m = line.scan(/= 3 and (line_only_made_of(next_line, '=') or line_only_made_of(next_line, '-')) + # headline starting with '===' or '---' + gfm_lines[line_number] = placeholder + "\n" + hl = look_for_headline(render(gfm_lines), placeholder) + end + + # revert headline + gfm_lines[line_number] = saved_headline + + return hl +end + + + +def generate_unique_placeholder(rendered_html_lines) + number = 0 + prefix = 'xq' + suffix = 'z' + result = '' + while true do + result = prefix + number.to_s + suffix + solution_found = true + for line in rendered_html_lines + if line.include? result + number += 1 + solution_found = false + break + end + end + if solution_found + break + end + end + # we assume that there will be at least one solution + return result +end + + + +def create_id_list(gfm_lines) + result = [] + gfm_lines2 = gfm_lines[0..-1] + rendered_html_lines = render(gfm_lines) + + placeholder = generate_unique_placeholder(rendered_html_lines) + + # line-by-line: assume a headline + n = gfm_lines2.length + for line_number in 0..(n - 1) + hl = try_get_headline_column_and_line(gfm_lines2, line_number, placeholder) + if hl != nil + hid = extract_headline_id(rendered_html_lines, hl.l, hl.c) + result = result + [hid] + end + end + return result +end + + + +def is_a_headline(gfm_lines, line_number, placeholder) + return try_get_headline_column_and_line(gfm_lines, line_number, placeholder) != nil +end + + + +def insert_ids_into_gfm_file(id_list, gfm_lines) + result = gfm_lines[0..-1] + if id_list.length == 0 + return result + end + n = result.length + rendered_html_lines = render(gfm_lines) + placeholder = generate_unique_placeholder(rendered_html_lines) + id_index = 0 + + for line_number in 0..(gfm_lines.length - 1) + if is_a_headline(gfm_lines, line_number, placeholder) + id = id_list[id_index] + if id != nil + str_to_insert = '' + "\n" + line = result[line_number] + if !line.nil? and line.start_with?('#') + if line_number + 1 >= n + result = result + [''] + end + result[line_number + 1] = str_to_insert.to_s + result[line_number + 1].to_s + else + if line_number + 2 >= n + result = result + [''] + end + result[line_number + 2] = str_to_insert.to_s + result[line_number + 2].to_s + end + end + id_index += 1 + if id_index >= id_list.length + break + end + end + end + return result +end + + + +def merge_ids_in_gfm_files(orig_gfm_lines, trl_gfm_lines) + # assuming that both files match line by line such that matching headlines are in the same lines + + # get yaml front matter from orig + orig_yfm = get_yaml_front_matter(orig_gfm_lines) + orig_yaml_front_matter = orig_yfm.yaml_lines + orig_start = orig_yfm.startl + orig_end = orig_yfm.endl + + # get yaml front matter from trl + trl_yfm = get_yaml_front_matter(trl_gfm_lines) + trl_yaml_front_matter = trl_yfm.yaml_lines + trl_start = trl_yfm.startl + trl_end = trl_yfm.endl + + # get body from trl + trl_body = trl_gfm_lines[trl_end..-1] + + # get body from orig + orig_body = orig_gfm_lines[orig_end..-1] + + # create id list + orig_id_list = create_id_list(orig_body) + + # insert ids + preresult = insert_ids_into_gfm_file(orig_id_list, trl_body) + + # create translated document with adapted body + result_trl_gfm = trl_yaml_front_matter.join + preresult.join + + return result_trl_gfm +end + +def create_dict_from_tx_config(lang, mappingfile) + # read a tx.xonfig file containing only file_filter and source_file information store it in a dict and give it back + # mappingfile: a tx.xonfig file containing only file_filter and source_file information + # return: a dict containing a mapping between an original file and its downloaded tx translation + mapping = {} + + lines = [] + lines = read_file(mappingfile) + + translated = [] + source = [] + n = lines.length + idx = 0 + while idx < n do + t = lines[idx].split('file_filter =')[1].strip + s = lines[idx+1].split('source_file =')[1].strip + translated += ["./" + t.gsub("", lang)] + if idx >= n then + break + end + source += ["./" + s] + idx += 2 + end + + n = translated.length + idx = 0 + while idx < n do + mapping[source[idx]] = translated[idx] + idx += 1 + end + + return mapping +end + +def read_file(filename) + read_lines = [] + File.open(filename, "r") do |f| + f.each_line do |line| + read_lines += [line] + end + end + return read_lines +end + +def write_file(contents, filename) + read_lines = [] + File.open(filename, "w") do |f| + f.write(contents) + end +end + +def main() + if ARGV.length != 2 + exit(1) + end + + mapping = create_dict_from_tx_config(ARGV[0], ARGV[1]) + mapping.each do |key, value| + if !key.end_with?(".yml") + orig_gfm_lines = read_file(key) + trl_gfm_lines = read_file(value) + # merge ids in gfm files + result = merge_ids_in_gfm_files(orig_gfm_lines, trl_gfm_lines) + write_file(result, value) + end + end + +end + + +if __FILE__ == $0 + main() + + # --- for debugging + # orig_gfm_lines = read_file(ARGV[0]) + # trl_gfm_lines = read_file(ARGV[1]) + # result = merge_ids_in_gfm_files(orig_gfm_lines, trl_gfm_lines) + # write_file(result, '/dev/stdout') +end + diff --git a/_utils/_translation_utils/post_transifex_pull.sh b/_utils/_translation_utils/post_transifex_pull.sh new file mode 100644 index 00000000..9e6f6f56 --- /dev/null +++ b/_utils/_translation_utils/post_transifex_pull.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# to be run from the git root +# $1 is lang +# $2 is directory where translated files reside and language needs to be added to internal urls +# TODO param check + +set -e + +echo "============================ post processing step 1 ======================================" +#read b +bash _utils/_translation_utils/prepare_tx_config_postprocess.sh .tx/config /tmp/tx-mapping + +echo "============================ remove obsolete files =======================================" +python3 _utils/_translation_utils/remove_obsolete_files.py "$1" "$2" /tmp/tx-mapping + +echo "============================ post processing step 2 ======================================" +#read b +ruby _utils/_translation_utils/merge_md_heading_ids.rb "$1" /tmp/tx-mapping + +echo "============================ post processing step 3 press to cont ======================================" +#read b +python3 _utils/_translation_utils/postprocess_translation.py "$1" "$2" /tmp/tx-mapping /tmp/translated_href_urls.txt --yml + + +echo "============================ post processing step 4 press to cont ======================================" +#read b +bash _utils/_translation_utils/postprocess_translation.sh "$1" "$2" /tmp/translated_href_urls.txt diff --git a/_utils/_translation_utils/postprocess_htmlproofer.py b/_utils/_translation_utils/postprocess_htmlproofer.py new file mode 100644 index 00000000..e73d98d9 --- /dev/null +++ b/_utils/_translation_utils/postprocess_htmlproofer.py @@ -0,0 +1,306 @@ +#!/usr/bin/python3 +''' +python _utils/_translation_utils/postprocess_htmlproofer.py +invoke: python _utils/_translation_utils/postprocess_htmlproofer.py de /tmp/html.output _translated/de/ +[/tmp/html.output]: output from htmlproofer +[_translated/de/]: the directory with the downloaded translated files from transifex +''' +from frontmatter import Post, load, dump +import yaml +from io import open as iopen +from re import search +from sys import exit +import sys +from os import linesep, walk, environ +from argparse import ArgumentParser +from os.path import isfile, isdir +from json import loads, dumps +from logging import basicConfig, getLogger, DEBUG, Formatter, FileHandler + + +SLASH = '/' +# markdown frontmatter keys +PERMALINK_KEY = 'permalink' +REDIRECT_KEY = 'redirect_from' +TRANSLATED_LANGS = ['de'] +if 'TRANSLATED_LANGS' in environ: + TRANSLATED_LANGS = environ['TRANSLATED_LANGS'].split() +URL_KEY = 'url' + + +basicConfig(level=DEBUG) +logger = getLogger(__name__) +LOG_FILE_NAME='/tmp/postprocess_htmlproofer.log' + +def configure_logging(logname): + handler = FileHandler(logname) + handler.setLevel(DEBUG) + formatter = Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + handler.setFormatter(formatter) + logger.addHandler(handler) + +def log_debug(name, data): + logger.debug('############################################') + logger.debug('############################################') + logger.debug('###\t'+ name.capitalize() + '\t###') + logger.debug('--------------------------------------------') + if isinstance(data,dict): + logger.debug(dumps(data, indent=4)) + else: + logger.debug(data) + logger.debug('############################################') + logger.debug('############################################') + +def get_new_line(line, internal_link, internal_links, permalink): + if internal_link in internal_links and internal_link.startswith("/"): + # TODO redundant + indd = internal_link.find('#') + internal_link_to_replace = internal_link[0:indd] + to_replace = line.replace(internal_link, internal_link_to_replace) + return to_replace + elif internal_link in internal_links and internal_link.startswith("#"): + to_replace = line.replace(internal_link, permalink) + return to_replace + else: + return line + #return None + + +def process_markdown(translated_file, internal_links): + """ + for every translated file discard the erroneous internal links + translated_file: marked and uploaded to transifex for translation, if not downloaded it will be printed out as a debug + internal_links:all internal links belonging to the translated_file that are erroneous according to htmlproofer + """ + mdt = Post + try: + with iopen(translated_file) as t: + mdt = load(t) + lines = [] + headings = [] + permalink = mdt.get(PERMALINK_KEY) + if permalink == None: + permalink = '/' + for line in mdt.content.splitlines(): + # gather information + inst = {} + if line.startswith("[") and "]:" in line: + s = line.find(":") + internal_link = line[s+1:len(line)].strip() + if internal_link in internal_links and internal_link.startswith("/"): + ind = line.rfind('#') + to_replace = line[0:ind] + lines.append(to_replace) + continue + if internal_link in internal_links and internal_link.startswith("#"): + to_replace = line.replace(internal_link, permalink) + lines.append(to_replace) + continue + + if "[" in line and "](" in line and ")" in line: + count = line.count('](') + tmp = line + val = 0 + for i in range(0, count): + s = line.find("](", val) + e = line.find(")", s + 1) + internal_link = line[s+2:e].strip().replace(')','') + + line = get_new_line(line, internal_link, internal_links, permalink) + val = val + s + 1 + lines.append(line) + continue + lines.append(line) + + mdt.content = linesep.join(lines) + '\n' + + with iopen(translated_file, 'wb') as replaced: + dump(mdt, replaced) + + except FileNotFoundError as e: + logger.debug('Following file was not updated/downloaded from transifex: %s' % e.filename) + + + +def get_all_translated_permalinks_and_redirects_to_file_mapping(translated_dir): + """ + traverse the already updated (via tx pull) root directory with all the translated files + and get their permalinks and redirects + translated_dir: root directory with all the translated files + return: set holding the translated permalinks and redirects + """ + mapping = {} + perms = [] + yml_files = [] + for dirname, subdirlist, filelist in walk(translated_dir): + if dirname[0] == '.': + continue + for filename in filelist: + if filename[0] == '.': + continue + filepath = dirname + SLASH + filename + md = Post + with iopen(filepath) as fp: + md = load(fp) + if md.get(PERMALINK_KEY) != None: + perms.append(md.get(PERMALINK_KEY)) + elif filepath.endswith('.yml'): + yml_files.append(filepath) + else: + logger.error('no permalink in frontmatter for file %s' % filename) + redirects = md.get(REDIRECT_KEY) + if redirects != None: + if isinstance(redirects,list): + for r in redirects: + perms.append(r) + elif isinstance(redirects,str): + perms.append(redirects) + else: + logger.error('ERRROR: unexpected in redirect_from: %s' % redirects) + exit(1) + else: + logger.debug('no redirect_from in frontmatter for file %s' % filepath) + mapping[filepath] = perms + perms = [] + return mapping, yml_files + + +# TODO simplify +def get_error_output_from_htmlproofer(htmlproofer_output): + errors_tmp = [] + with iopen(htmlproofer_output,'r') as h: + lines = h.readlines() + errors_tmp = [x for x in lines if not(x.startswith('Checking') or x.startswith('Ran') or x.startswith('Running') or x.startswith('\n') or x.startswith('htmlproofer'))] + + count = 0 + errors = {} + internal_link = [] + u = '' + pattern = 'a href=' + for i in range(len(errors_tmp)): + if pattern in errors_tmp[i]: + i1 = errors_tmp[i].find(pattern, 0) + i2 = errors_tmp[i].find('"', i1 + len(pattern)) + i3 = errors_tmp[i].find('"', i2 +1 ) + + i_l = errors_tmp[i][i2+1:i3] + if '">' in i_l: + i_l = search('(.*)">', i_l).group(1) + internal_link.append(i_l) + count += 1 + if './_site' in errors_tmp[i]: + if count > 0: + errors[u] = internal_link + internal_link = [] + u = search('./_site(.*)index.html',errors_tmp[i]).group(1) + count = 0 + errors[u] = internal_link + return errors + +def replace_url(to_replace, errorlinks): + """ + recursively remove header from the URL in an yaml file. + to_replace: the translated yaml content as a dictionary + errorlinks: all internal links that are deadend and need to be cut off before # meaning get rid of the headers + """ + if not isinstance(to_replace,dict): + return + for (k_r, v_r) in to_replace.items(): + if isinstance(v_r, list): + for i in v_r: + replace_url(i, errorlinks) + elif URL_KEY == k_r: + val = to_replace[k_r] + if val is not None and '#' in val: + tmp_val = val[0:val.find('#')] + to_replace[URL_KEY]= tmp_val if (val in errorlinks) else val + +def process_yml(translated, errorlinks): + """ + for every given source-translated yml file pair add the language to the urls if they belong to already translated files, + if not retain the original ones + translated: translated yml file + errorlinks: all internal links that are deadend and need to be cut off before # meaning get rid of the headers + """ + docs = [] + try: + with iopen(translated) as tp: + docs = yaml.safe_load(tp) + if docs == None: + logger.error("Empty translated file %s" %translated) + exit(1) + for a in docs: + replace_url(a, errorlinks) + except FileNotFoundError as e: + logger.debug('Following file was NOT updated/downloaded from transifex: %s' % e.filename) + + try: + if len(docs)>0: + with iopen(translated, 'w') as replace: + yaml.dump(docs, replace, sort_keys=False) + except FileNotFoundError as e: + logger.debug('do nothing for file: %s. it is OK.' % e.filename) + +if __name__ == '__main__': + # python _utils/_translation_utils/postprocess_htmlproofer.py de /tmp/html.output _translated/de/ + parser = ArgumentParser() + # the file containing the output of htmlproofer + parser.add_argument("htmlproofer_output") + # the directory containing the translated (downloaded via tx pull) files + parser.add_argument("translated_dir") + args = parser.parse_args() + + configure_logging(LOG_FILE_NAME) + + + if not isdir(args.translated_dir): + print("please check your translated directory") + logger.error("please check your translated directory") + exit(1) + + if not isfile(args.htmlproofer_output): + print("please check your html proofer output file") + logger.error("please check your html proofer output file") + sys.exit(1) + + errors = get_error_output_from_htmlproofer(args.htmlproofer_output) + + if not errors: + print("nothing to do, no errors to postprocess") + sys.exit(1) + + logger.debug("------------------------------------------------") + logger.debug("------------------------------------------------") + logger.debug("------------------------------------------------") + logger.debug("-------------STRINGS TAGGED NOTRANSLATE---------") + logger.debug("------------------------------------------------") + logger.debug("------------------------------------------------") + + + error_links = list(sorted({el for val in errors.values() for el in val})) + log_debug("HTML ERRORS", errors) + log_debug("HTML ERRORS", error_links) + logger.debug("------------------------------------------------") + logger.debug("------------------------------------------------") + logger.debug("------------------------------------------------") + + mapping, yml_files = get_all_translated_permalinks_and_redirects_to_file_mapping(args.translated_dir) + + + log_debug('mapping ', mapping) + log_debug('yml files ', yml_files) + + file_to_internal_links = {} + for key, item in mapping.items(): + for k, i in errors.items(): + if k in item: + file_to_internal_links[key] = i + + log_debug(" file to internal links mapping", file_to_internal_links) + for key, item in file_to_internal_links.items(): + process_markdown(key, item) + + # traverse all yml data files and cut the translated urls if they are in error_urls + for yml in yml_files: + process_yml(yml, error_links) + diff --git a/_utils/_translation_utils/postprocess_translation.py b/_utils/_translation_utils/postprocess_translation.py new file mode 100644 index 00000000..db12a6ba --- /dev/null +++ b/_utils/_translation_utils/postprocess_translation.py @@ -0,0 +1,450 @@ +#!/usr/bin/python3 +# adds language pattern in permalink line and all found relative links in the current open file recursively from a given root dir +# evoke like: python _utils/postprocess_translation.py de _translated/de/ _utils/tx-mapping _utils/translated_hrefs_urls.txt --yml +#param1 is the language in short form +#param2 is the root translated dir +#param3 is current transifex mapping between original and translated files in the format: +# file_filter= +# source_file= +#param3 is the output of the script prepare_tx_config.sh +#param4 is the name for the file containing all the permalinks of translated/downloaded via tx client files. it is afterwards used by postprocess_translation.sh script +#param5 is optional indicating .yml files to be processed as in _data directory with no frontmatter whatsoever + +from yaml import safe_load +from yaml import dump as ydump +import frontmatter +from io import open as iopen +from os.path import isfile, isdir +from os import linesep, walk, environ +from re import findall +from sys import exit +from argparse import ArgumentParser +from json import loads, dumps +from collections import deque +from logging import basicConfig, getLogger, DEBUG, Formatter, FileHandler + +patterns = ( + "](/", + "]: /", + "href=\"/", + "url: /", + "href=\'/", +) +# TODO vereinfachen der if bedingung mit einer liste von ommitted urls patterns +news = "/news/" +qubes_issues = "/qubes-issues/" +# constants and such +# yml keys: +YML_KEYS = ['url', 'topic', 'title', 'category', 'folder', 'htmlsection', 'tweet', 'avatar', 'img', + 'article', 'quote', 'name', 'occupation', 'author', 'more', 'text', + 'video', 'intro', 'version', 'subtitle', 'download', 'security', 'bug', 'help', + 'join', 'partner', 'cert', 'picture', 'email', 'website', 'mail', 'links', 'id', + 'paragraph', 'snippet', 'column', 'hover', 'digest', 'signature', 'pgp', 'green', 'red', 'blue', 'trump', + 'tts1', 'tts2', 'txp', 'txaq', 'pxaq', 'column1', 'column2', 'column3', 'yes_short', 'no_short', 'no_extended', 'tba', + 'bold', 'item', 'note', 'section', 'row', 'r_version', + 'go', 'search', 'metatopic', 'ddg', 'hover'] +URL_KEY = 'url' +# md frontmatterkeys: +PERMALINK_KEY = 'permalink' +REDIRECT_KEY = 'redirect_from' +REDIRECT_TO = 'redirect_to' +LANG_KEY = 'lang' +TRANSLATED_KEY = 'translated' +LAYOUT_KEY = 'layout' +SLASH = '/' +MD_URL_SPLIT_PATTERNS = ['/)','/#'] +TRANSLATED_LANGS = ['de'] +if 'TRANSLATED_LANGS' in environ: + TRANSLATED_LANGS = environ['TRANSLATED_LANGS'].split() +#EXCLUDE_FILES = ['download.md' ] + + +basicConfig(level=DEBUG) +logger = getLogger(__name__) +LOG_FILENAME='/tmp/postprocess_translation.log' + +def configure_logging(logname): + handler = FileHandler(logname) + handler.setLevel(DEBUG) + formatter = Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + handler.setFormatter(formatter) + logger.addHandler(handler) + +def log_debug(name, data): + logger.debug('############################################') + logger.debug('############################################') + logger.debug('###\t'+ name.capitalize() + '\t###') + logger.debug('--------------------------------------------') + if isinstance(data,dict): + logger.debug(dumps(data, indent=4)) + else: + logger.debug(data) + logger.debug('############################################') + logger.debug('############################################') + + +def write_to_file(filename, lines): + """ + write the given data structure to a file + filename: the name of the file to be written to + lines: the content + """ + with iopen(filename,'w') as c: + c.write('\n'.join(str(line) for line in lines)) + c.truncate() + +def process_markdown(source_file, translated_file, permalinks, lang): + """ + for every uploaded via tx client markdown file for translation, replace the markdown frontmatter with the frontmatter of the original file, + set the specific language, set translated to yes and for all downloaded/updated via transifex files, respectively permalinks, + add the specific language to the internal url + source_file: original file + translated_file: marked and uploaded to transifex for translation, if not downloaded it will be printed out as a debug + permalinks:all internal links (permalink and redirect_from) belonging to the files dwonloaded from transifex + lang: the translation language + """ + mdt = frontmatter.Post + try: + with iopen(source_file) as s, iopen(translated_file) as t: + mds = frontmatter.load(s) + mdt = frontmatter.load(t) + if mds.get(PERMALINK_KEY) != None: + mdt[PERMALINK_KEY] = SLASH + lang + mds.get(PERMALINK_KEY) + elif PERMALINK_KEY in mdt: + # if missing in source, remove from translated too + del mdt[PERMALINK_KEY] + + if mds.get(REDIRECT_KEY) != None: + redirects = mds.get(REDIRECT_KEY) + if isinstance(redirects, str): + redirects = [redirects] + # just in case + if any('..' in elem for elem in redirects): + logger.error('\'..\' found in redirect_from in file %s' % source_file) + exit(1) + mdt[REDIRECT_KEY] = [(SLASH + lang + elem.replace('/en/', SLASH) if not elem.startswith(SLASH + lang + SLASH) else elem) + for elem in redirects] + + if mds.get(PERMALINK_KEY) != None and mds[PERMALINK_KEY] in mdt[REDIRECT_KEY]: + mdt[REDIRECT_KEY].remove(mds[PERMALINK_KEY]) + if mdt.get(PERMALINK_KEY) != None and mdt[PERMALINK_KEY] in mdt[REDIRECT_KEY]: + mdt[REDIRECT_KEY].remove(mdt[PERMALINK_KEY]) + + tmp = sorted(set(mdt[REDIRECT_KEY])) + mdt[REDIRECT_KEY] = tmp + elif REDIRECT_KEY in mdt: + # if missing in source, remove from translated too + del mdt[REDIRECT_KEY] + + if mds.get(LAYOUT_KEY) != None: + mdt[LAYOUT_KEY] = mds[LAYOUT_KEY] + + if mds.get(REDIRECT_TO) != None: + redirect = mds.get(REDIRECT_TO) + if isinstance(redirect, list): + redirect = redirect[0] + if redirect.startswith('/') and not redirect.startswith(SLASH + lang + SLASH) and not redirect.startswith(news): + mdt[REDIRECT_TO] = SLASH + lang + redirect + else: + mdt[REDIRECT_TO] = redirect + elif REDIRECT_TO in mdt: + del mdt[REDIRECT_TO] + + mdt[LANG_KEY] = lang + # TODO we do not need the translated key anymore + #mdt[TRANSLATED_KEY] = 'yes' + ## for testing purposes only + #if mdt.get('title') != None: + # mdt['title'] = lang.upper() +"!: " + mdt.get('title') + + # replace links + lines = [] + for line in mdt.content.splitlines(): + for pattern in patterns: + if pattern in line: + tmp = line.split(pattern) + line = tmp[0] + for part in range(1, len(tmp)): + if '../' in tmp[part]: + logger.error('\'..\' found in internal url: %s' % tmp[part]) + exit(1) + + # TODO we can translate news you know + if not tmp[part].startswith(lang + SLASH) and \ + not tmp[part].startswith('news') and \ + not tmp[part].startswith('attachment') and \ + not tmp[part].startswith('qubes-issues') and \ + split_and_check(tmp[part],permalinks): + line += pattern + lang + SLASH + tmp[part] + # TODO this is the case with links at the bottom of the file + elif not tmp[part].startswith(SLASH) and \ + SLASH + tmp[part] in permalinks: + line += pattern + lang + SLASH + tmp[part] + # TODO if a url contains a language but the url belongs to a file that is not translated should i actually remove the language -> overengineering? +# elif tmp[part].startswith(lang+SLASH) and not split_and_check(tmp[part][len(lang)+1],permalinks): + # line += pattern + tmp[part][len(lang)+1] + else: + line += pattern + tmp[part] + lines.append(line) + + mdt.content = linesep.join(lines) + '\n' + + with iopen(translated_file, 'wb') as replaced: + frontmatter.dump(mdt, replaced) + + except FileNotFoundError as e: + logger.debug('Following file was not updated/downloaded from transifex: %s' % e.filename) + + + +def split_and_check(md_line, permalinks): + """ + for every given line in a markdown line containing an internal link + return if the internal link belongs to a file already downloaded and translated from transifex + md_line: line in a markdown line containing an internal link + permalinks: all internal links (permalink and redirect_from) belonging to the files dwonloaded from transifex + """ + for pattern in MD_URL_SPLIT_PATTERNS: + if pattern in md_line: + sp = md_line.split(pattern) + t = sp[0] + t = SLASH + t if not t.startswith(SLASH) else t + t = t + SLASH if not t.endswith(SLASH) else t + if t in permalinks: + return True + else: + logger.debug("Following link: %s belongs to a file NOT translated/downloaded from transifex" %t) + return False + +def check_yml_attributes(to_replace, original): + """ + recursively check if the title, folder and category attributes of the translated yaml file + are not empty strings + if they are: replace them with the original content + it assumes that the order between original and translated files loaded as dictionary is preserved + to_replace: the translated yaml content as a dictionary + original: the original yaml content as a dictionary + """ + + if not (isinstance(to_replace,dict) and isinstance(original,dict)): + return + for (k_r, v_r), (k_o, v_o) in zip(to_replace.items(), original.items()): + if isinstance(v_r, list) and isinstance(v_o, list): + for i, j in zip(v_r, v_o): + check_yml_attributes(i, j) + for yml_key in YML_KEYS: + if yml_key == k_r and yml_key == k_o and to_replace[yml_key] == '': + to_replace[yml_key] = original[yml_key] + elif k_r != k_o: + logger.error("ERROR, ordered of the loaded yml file is not preserved %s" % k_r +':' + k_o) + exit(1) + + +def replace_url(to_replace, original, lang, permalinks): + """ + recursively add language to the original value of the key URL if the file with the given URL is translated and save it to the translated yaml file. + if the file is not translated keep the original url + it assumes that the order between original and translated files loaded as dictionary is preserved + to_replace: the translated yaml content as a dictionary + original: the oritignal yaml content as a dictionary + lang: language, for example de + permalinks: urls of the translated/downloaded files from transifex + """ + if not (isinstance(to_replace,dict) and isinstance(original,dict)): + return + for (k_r, v_r), (k_o, v_o) in zip(to_replace.items(), original.items()): + if isinstance(v_r, list) and isinstance(v_o, list): + for i, j in zip(v_r, v_o): + replace_url(i, j, lang, permalinks) + elif URL_KEY == k_r and URL_KEY == k_o: + val = original[k_r] + if val is not None and '#' in val: + tmp_val = val[0:val.find('#')] + to_replace[URL_KEY]= SLASH + lang + val if (tmp_val in permalinks) else val + else: + to_replace[URL_KEY]= SLASH + lang + val if (val in permalinks) else val + elif k_r != k_o: + logger.error("ERROR, ordered of the loaded yml file is not preserved %s" % k_r +':' + k_o) + exit(1) + + + +def process_yml(source, translated, lang, permalinks): + """ + for every given source-translated yml file pair add the language to the urls if they belong to already translated files, + if not retain the original ones + source: original yml file + translated: translated yml file + lang: language, for example de + permalinks: all internal links (permalink and redirect_from) belonging to the files downloaded from transifex + """ + docs = [] + try: + with iopen(source) as fp, iopen(translated) as tp: + docs_original = safe_load(fp) + docs = safe_load(tp) + if docs == None: + logger.error("Empty translated file %s" %translated) + exit(1) + for a, b in zip(docs, docs_original): + replace_url(a, b, lang, permalinks) + check_yml_attributes(a, b) + except FileNotFoundError as e: + logger.debug('Following file was NOT updated/downloaded from transifex: %s' % e.filename) + + try: + if len(docs)>0: + with iopen(translated, 'w') as replace: + ydump(docs, replace, sort_keys=False) + except FileNotFoundError as e: + logger.debug('do nothing for file: %s. it is OK.' % e.filename) + + +def get_all_the_hrefs(translated_dir): + """ + traverse the already updated (via tx pull) root directory with all the translated files for a specific language + and get all the internal urls that are embedded in hmtl code in an href attribute + translated_dir: root directory with all the translated files for a specific language + return: set holding all the internal urls that are embedded in hmtl code in an href attribute + """ + + href = set() + reg ='(?<=href=\").*?(?=\")' + for dirname, subdirlist, filelist in walk(translated_dir): + if dirname[0] == '.': + continue + for filename in filelist: + if filename[0] == '.': + continue + filepath = dirname + SLASH + filename + try: + with iopen(filepath) as fp: + lines = fp.readlines() + for line in lines: + t = findall(reg, line) + if len(t)>0: + for i in t: + href.add(i) + except FileNotFoundError as e: + logger.error('problem opening a file in the translated dir: %s' %e.filename) + exit(1) + return href + +def get_all_translated_permalinks_and_redirects(translated_dir,lang): + """ + traverse the already updated (via tx pull) root directory with all the translated files for a specific language + and get their permalinks and redirects without the specific language + translated_dir: root directory with all the translated files for a specific language + lang: the specific language + return: set holding the original (language code is removed) permalinks and redirects + """ + + perms = set() + for dirname, subdirlist, filelist in walk(translated_dir): + if dirname[0] == '.': + continue + for filename in filelist: + if filename[0] == '.': + continue + filepath = dirname + SLASH + filename + md = frontmatter.Post + with iopen(filepath) as fp: + md = frontmatter.load(fp) + if md.get(PERMALINK_KEY) != None: + perms.add(md.get(PERMALINK_KEY)[len(lang)+1:] if md.get(PERMALINK_KEY).startswith(SLASH+lang +SLASH) else md.get(PERMALINK_KEY)) + else: + logger.error('no permalink in frontmatter for file %s' % filename) + redirects = md.get(REDIRECT_KEY) + if redirects != None: + if isinstance(redirects,list): + for r in redirects: + perms.add(r[len(lang)+1:] if r.startswith(SLASH + lang + SLASH) else r) + elif isinstance(redirects,str): + perms.add(redirects) + else: + logger.error('ERRROR: unexpected in redirect_from: %s' % redirects) + exit(1) + else: + logger.debug('no redirect_from in frontmatter for file %s' % filepath) + return perms + +def create_dict_from_tx_config(mappingfile, lang): + """ + read a tx.xonfig file containing only file_filter and source_file information store it in a dict and give it back + mappingfile: a tx.xonfig file containing only file_filter and source_file information + return: a dict containing a mapping between an original file and its downloaded tx translation + """ + mapping = {} + with iopen(mappingfile) as fp: + lines = fp.readlines() + translated = ['./'+x.split('file_filter =')[1].strip().replace('',lang) for x in lines if lines.index(x)%2==0] + source = ['./'+x.split('source_file =')[1].strip() for x in lines if lines.index(x)%2==1] + + for x in translated: + mapping.update({source[translated.index(x)]:x}) + return mapping + + +def main(translated_dir, lang, yml, mapping, href_filename): + perms = get_all_translated_permalinks_and_redirects(translated_dir, lang) + log_debug('all translated permalinks/redirects', perms) + + hrefs = get_all_the_hrefs(args.translateddir) + + log_debug('all the hrefs', hrefs) + write_to_file(href_filename, perms.intersection(hrefs)) + + # for each pair of source and translated file postprocess the translated file + for key, item in mapping.items(): + if yml and item.endswith('.yml'): + process_yml(key, item, lang, perms) + #if not item.endswith('.yml') and not item.endswith('downloads.md'): + if not item.endswith('.yml'): + process_markdown(key, item, perms, lang) + + + +if __name__ == '__main__': + + # python _utils/postprocess_translation.py de _translated/de/ _utils/tx-mapping _utils/translated_hrefs_urls.txt --yml + parser = ArgumentParser() + # for which language should we do this + parser.add_argument("language") + # the directory containing the translated (downloaded via tx pull) files + parser.add_argument("translateddir") + # provide the mappingfile from tx configuration containing the file_filter to source_file mapping + parser.add_argument("tx_mappingfile") + # name of the file to contain/write to all the internal urls that are embedded in hmtl code in a href attribute + # for later processing postprocess_translation.sh + parser.add_argument("translated_hrefs_filename") + # whether or not to process yml files + parser.add_argument("--yml", action='store_true') + args = parser.parse_args() + + + if not isfile(args.tx_mappingfile): + print("please check your transifex mapping file") + exit(1) + + if not isdir(args.translateddir): + print("please check your translated directory") + exit(1) + + if not args.language in TRANSLATED_LANGS: + print("language not in the expected translation languages") + exit(1) + + configure_logging(LOG_FILENAME) + + + log_debug('START', {}) + + source_translation_mapping = create_dict_from_tx_config(args.tx_mappingfile, args.language) + + + log_debug('source/translation file mapping', source_translation_mapping) + + main(args.translateddir, args.language, args.yml, source_translation_mapping, args.translated_hrefs_filename) + + + diff --git a/_utils/_translation_utils/postprocess_translation.sh b/_utils/_translation_utils/postprocess_translation.sh new file mode 100644 index 00000000..f775084e --- /dev/null +++ b/_utils/_translation_utils/postprocess_translation.sh @@ -0,0 +1,33 @@ +#!/bin/bash +# $1 is lang +# $2 is directory where translated files reside and language needs to be added to internal urls +# $3 is file with internal urls that belong to files already translated and downloaded from transifex to be replaced with lang/url +# where $3 is a file dumped by postprocess_translation.py +# this script exists because is easier to correctly process html code with sed. python messes it up. +# example of evoking the script: +# bash _utils/_translation_utils/test.sh de _translated/de/ _utils/translated_hrefs_urls.txt + + +pattern="href=\"\/" +pattern_reset="href=\"\/"$1"\/" +escaped_slash="\/" + +# find the patterns that contain href=/$lang pattern and reset +find $2 -name '*.md' -or -name '*.html' | xargs sed -i "s/$pattern_reset/$pattern/g" + +while read line; do + # check for traversing patterns in $3: check if every line begins with /word + if [ -z `grep -oP '^(/(\w+))*' <<< $line` ] + then + echo "the string does not begin as it should" + exit 0 + fi + #escape '/' with '\/' + l="${line//\//$escaped_slash}" + search_pattern="href=\""$l"\"" + replace_pattern="href=\"\/"$1$l"\"" + + # search and destroy + find $2 -name '*.md' -or -name '*.html' | xargs sed -i "s/$search_pattern/$replace_pattern/g" +done < $3 + diff --git a/_utils/_translation_utils/prepare_for_translation.py b/_utils/_translation_utils/prepare_for_translation.py new file mode 100644 index 00000000..d35b9ac6 --- /dev/null +++ b/_utils/_translation_utils/prepare_for_translation.py @@ -0,0 +1,128 @@ +#!/usr/bin/python3 +''' +this script adds lang and ref attribute (starting from counter) to existing markdown files after permalink line recursively from a given root dir +invocation: python prepare_for_translation.py en _doc/ ref_counter_file +param1 is the language in short form +param2 is a directory or a single file +param3 is a file containing the value of the current reference counter with exactly onle line in the form of: +current counter: x +''' +from io import open as iopen +from os.path import isfile +import os +from sys import exit +from argparse import ArgumentParser +from frontmatter import Post, load, dump +from logging import basicConfig, getLogger, DEBUG, Formatter, FileHandler + + +PERMALINK_KEY = 'permalink' +REDIRECT_KEY = 'redirect_from' +LANG_KEY = 'lang' +REF_KEY = 'ref' +FILENAME_EXTENSIONS = ['.png', '.svg', '.ico', '.jpg', '.css', '.scss', '.js', '.yml', '.sh', '.py', '.sed', '.dia', '.pdf', '.gif', '.eot', '.woff', '.ttf', '.otf', '.woff2', '.sig', '.json'] + +def read_counter(counterfile): + if not isfile(counterfile): + print('check your files') + exit() + with iopen(counterfile,'r') as c: + counter_line = c.readline() + counter_a = counter_line.split('current counter: ') + return int(counter_a[1]) + +def write_counter_to_file(counter, counterfile): + if not isfile(counterfile): + print('check your files') + exit() + with iopen(counterfile,'w') as c: + counter_line ='current counter: ' + str(counter) + c.writelines(counter_line) + c.truncate() + +def check_file_name(file_name): + return file_name[0] == '.' or any([file_name.endswith(t) for t in FILENAME_EXTENSIONS]) == True + +def check_dir_name(dir_name): + return dir_name[0] == '.' or '/.' in dir_name + + +def main(root_dir, lang, counter): + # if this is only a file + if os.path.isfile(root_dir): + if not check_file_name(root_dir): + with iopen(root_dir) as fp: + md = load(fp) + if not md.metadata: + return counter + # remove permalink in redirects if it is a list + if md.get(PERMALINK_KEY) != None and md.get(REDIRECT_KEY) != None and md[PERMALINK_KEY] in md[REDIRECT_KEY]: + redirects = md.get(REDIRECT_KEY) + if not isinstance(redirects, str): + md[REDIRECT_KEY].remove(md[PERMALINK_KEY]) + if md.get(LANG_KEY) == None: + md[LANG_KEY] = lang + if md.get(REF_KEY) == None: + md[REF_KEY] = counter + counter += 1 + with iopen(root_dir, 'wb') as replaced: + dump(md, replaced) + replaced.write(b'\n') + + return counter + + for dir_name, subdir_list, file_list in os.walk(root_dir): + print('current directory: %s' % dir_name) + print(os.path.basename(dir_name)) + + if check_dir_name(dir_name): + print('\t%s' % dir_name) + print('1continue') + continue + + for file_name in file_list: + print('\t%s' % file_name) + # lazy + if check_file_name(file_name): + print('continue') + continue + file_path = dir_name + "/" + file_name + with iopen(file_path) as fp: + md = load(fp) + if not md.metadata: + print('no metadata in %s' % file_path) + continue + # remove permalink in redirects if it is a list + if md.get(PERMALINK_KEY) != None and md.get(REDIRECT_KEY) != None and md[PERMALINK_KEY] in md[REDIRECT_KEY]: + redirects = md.get(REDIRECT_KEY) + if not isinstance(redirects, str): + md[REDIRECT_KEY].remove(md[PERMALINK_KEY]) + if md.get(LANG_KEY) == None: + md[LANG_KEY] = "en" + if md.get(REF_KEY) == None: + md[REF_KEY] = counter + counter += 1 + + with iopen(file_path, 'wb') as replaced: + dump(md, replaced) + replaced.write(b'\n') + + return counter + + + +if __name__ == '__main__': + parser = ArgumentParser() + parser.add_argument("language") + parser.add_argument("directory") + parser.add_argument("refcounterfile") + args = parser.parse_args() + + counter_file = args.refcounterfile + counter = read_counter(counter_file) + + print('\n CURRENT REF COUNTER IS %s' % counter) + ref_counter = main(args.directory, args.language, counter) + + print('\n NEW CURRENT REF COUNTER IS %s' % ref_counter) + write_counter_to_file(ref_counter, counter_file) diff --git a/_utils/_translation_utils/prepare_tx_config_for_notranslate_tags.sh b/_utils/_translation_utils/prepare_tx_config_for_notranslate_tags.sh new file mode 100755 index 00000000..8ffbee43 --- /dev/null +++ b/_utils/_translation_utils/prepare_tx_config_for_notranslate_tags.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# $1 is tx/config file +# $2 filename to contain only the resources' names +# $3 filename to contain only the source files' names +sed '/^$/d' $1 | sed '/^s/d' | sed '/^t/d' | sed '/^h/d' | sed '/^f/d' | sed '/\[main]/d' | sed 's/\[//' | sed 's/\]//' | sed 's/.*\.//' > $2 +sed '/^$/d' $1 | sed '/source_lang/d' | sed '/^t/d' | sed '/^h/d' | sed '/\[main]/d' | sed '/\[/d' | sed '/^f/d' > $3 diff --git a/_utils/_translation_utils/prepare_tx_config_postprocess.sh b/_utils/_translation_utils/prepare_tx_config_postprocess.sh new file mode 100755 index 00000000..8f2cf10b --- /dev/null +++ b/_utils/_translation_utils/prepare_tx_config_postprocess.sh @@ -0,0 +1,7 @@ +#!/bin/bash +# $1 is .tx/config file +# $2 the new mapping file to be used by postprocess_translation.py +sed '/^$/d' $1 | sed '/source_lang/d' | sed '/^t/d' | sed '/^h/d' | sed '/\[main]/d' | sed '/\[/d' > $2 +#sed -i 's/aux\/news_strings.yml/news\/index.html/g' $2 +#sed -i 's/aux\/news_categories_strings.yml/news\/categories\/index.html/g' $2 + diff --git a/_utils/_translation_utils/remove_obsolete_files.py b/_utils/_translation_utils/remove_obsolete_files.py new file mode 100644 index 00000000..44743f8f --- /dev/null +++ b/_utils/_translation_utils/remove_obsolete_files.py @@ -0,0 +1,40 @@ +#!/usr/bin/python3 + +import argparse +import os +import sys + +parser = argparse.ArgumentParser() +parser.add_argument('lang') +parser.add_argument('translation_dir') +parser.add_argument('tx_mapping') + +def main(): + args = parser.parse_args() + + valid_files = set() + with open(args.tx_mapping) as f_mapping: + for line in f_mapping.readlines(): + if line.startswith('file_filter = '): + valid_files.add(line.strip().split(' = ')[1].replace('', args.lang)) + + if not valid_files: + print('No files found in {}, aborting!'.format(args.tx_mapping)) + return 1 + + existing_files = set() + for dirpath, dirs, files in os.walk(args.translation_dir): + existing_files.update(os.path.join(dirpath, name) for name in files) + + if not existing_files: + print('No files found in {}, aborting!'.format(args.translation_dir)) + return 1 + + for obsolete in existing_files.difference(valid_files): + print('Removing {}'.format(obsolete)) + os.unlink(obsolete) + + +if __name__ == '__main__': + sys.exit(main()) + diff --git a/_utils/_translation_utils/requirements_notranslate.txt b/_utils/_translation_utils/requirements_notranslate.txt new file mode 100644 index 00000000..849dd0f4 --- /dev/null +++ b/_utils/_translation_utils/requirements_notranslate.txt @@ -0,0 +1,10 @@ +attrs==19.3.0 +certifi==2019.11.28 +importlib-metadata==1.5.0 +jsonschema==3.2.0 +pycurl==7.43.0.5 +pyrsistent==0.15.7 +python-frontmatter==0.5.0 +PyYAML==5.3 +six==1.14.0 +zipp==3.1.0 diff --git a/_utils/_translation_utils/tag_strings_as_locked.py b/_utils/_translation_utils/tag_strings_as_locked.py new file mode 100644 index 00000000..b4830408 --- /dev/null +++ b/_utils/_translation_utils/tag_strings_as_locked.py @@ -0,0 +1,399 @@ +#!/usr/bin/python3 +''' +invoke: python tag_as_locked.py tx-resource-names.txt tx-sources-filenames.txt api-token --debug --manual +param1: tx-resources-names.txt: provide the file from tx configuration containing the resource names +param2: tx-sources-filenames.txt: provide the file from tx configuration containing only the original source filenames +param3: api-token: provide the developer api transifex token for auth +param4: debug: whether or not to write debug json files +param5: manual: whether or not to tag file by a file by waiting for a keyboard input +''' +from pycurl import Curl, HTTP_CODE, error, WRITEFUNCTION +from frontmatter import Post, load +from certifi import where +from io import BytesIO +from io import open as iopen +from os import environ +from os.path import isfile +from re import match +import sys +from sys import exit +from argparse import ArgumentParser +from json import loads, dumps +from jsonschema import validate +from jsonschema.exceptions import ValidationError +from collections import deque +from logging import basicConfig, getLogger, DEBUG, Formatter, FileHandler + +# TODO should we also mark notranslate as also reviewed ? it may need manual labor afterwards though ? +# Here should also go a comment that this snippet of code should be extended if the data files are to be altered and there are part that have to stay the same +# This can be done by fetching the strings from the tx api via curl: +# curl -i -L --user api:XXXXXXXXXXXXXXX -X GET https://www.transifex.com/api/2/project/qubes/resource/no_html_data_hcl/translation/en/strings/ +# and searching for the key pattern that should be marked as locked and thus untranslatabel and immutable + +KEY_REGEX_LOCK_PATTERNS = ['^\[(\d)*\](.sub-pages.)\[(\d)*\](.url)$', + '^\[(\d)*\](.sub-pages.)\[(\d)*\](.sub-pages.)\[(\d)*\](.url)$', + '^\[(\d)*\](.sub-pages.)\[(\d)*\](.icon)$', + '^(\[(\d)*\])(.url)$', '^(\[(\d)*\])(.icon)$','^(\[(\d)*\])(.category)$', + '^(\[(\d)*\])(.tech.)(\[(\d)*\])(.img)$', '^(\[(\d)*\])(.tech.)(\[(\d)*\])(.url)$', + '^(\[(\d)*\])(.award.)(\[(\d)*\])(.url)$', '^(\[(\d)*\])(.award.)(\[(\d)*\])(.img)$', + '^(\[(\d)*\])(.media.)(\[(\d)*\])(.img)$', '^(\[(\d)*\])(.media.)(\[(\d)*\])(.article)$', + '^(\[(\d)*\])(.attachment)$', '^(\[(\d)*\])(.expert.)(\[(\d)*\])(.tweet)$', + '^(\[(\d)*\])(.expert.)(\[(\d)*\])(.avatar)$', '^(\[(\d)*\])(.expert.)(\[(\d)*\])(.img)$', + '^(\[(\d)*\])(.htmlsection)$', '^(\[(\d)*\])(.folder)$','redirect_from.\[(\d)*\]', + '^(\[(\d)*\])(.links.)(\[(\d)*\])(.url)$', '^(\[(\d)*\])(.links.)(\[(\d)*\])(.id)$', + '^(\[(\d)*\])(.columns.)(\[(\d)*\])(.url)$', '^(\[(\d)*\])(.subsections.)(\[(\d)*\])(.url)$', + '^(\[(\d)*\])(.htmlsections.)(\[(\d)*\])(.url)$', '^(\[(\d)*\])(.partners.)(\[(\d)*\])(.url)$', + '^(\[(\d)*\])(.partners.)(\[(\d)*\])(.id)$', '^(\[(\d)*\])(.partners.)(\[(\d)*\])(.img)$', + '^(\[(\d)*\])(.partners.)(\[(\d)*\])(.paragraph.)(\[(\d)*\])(.snippets.)(\[(\d)*\])(.url)$', + '^(\[(\d)*\])(.paragraphs.)(\[(\d)*\])(.paragraph.)(\[(\d)*\])(.snippets.)(\[(\d)*\])(.url)$', + '^(\[(\d)*\])(.list.)(\[(\d)*\])(.item.)(\[(\d)*\])(.snippets.)(\[(\d)*\])(.url)$', + '^(\[(\d)*\])(.releases.)(\[(\d)*\])(.snippets.)(\[(\d)*\])(.url)$', + '^(\[(\d)*\])(.note.)(\[(\d)*\])(.snippets.)(\[(\d)*\])(.url)$', + '^(\[(\d)*\])(.htmlsections.)(\[(\d)*\])(.htmlsection)$', + '^(\[(\d)*\])(.subsections.)(\[(\d)*\])(.snippets.)(\[(\d)*\])(.url)$', + '^(\[(\d)*\])(.subsections.)(\[(\d)*\])(.section.)(\[(\d)*\])(.url)$', + '^(\[(\d)*\])(.paragraph.)(\[(\d)*\])(.snippets.)(\[(\d)*\])(.url)$', + '^categories.(\[(\d)*\])(.slug)$', '^papers.(\[(\d)*\])(.title)$','^papers.(\[(\d)*\])(.author)$', '^papers.(\[(\d)*\])(.url)$', '^papers.(\[(\d)*\])(.category)$', + '^(\[(\d)*\])(.name)$', '^(\[(\d)*\])(.type)$', '^(\[(\d)*\])(.picture)$', '^(\[(\d)*\])(.email)$', '^(\[(\d)*\])(.fingerprint)$', '^(\[(\d)*\])(.github)$', '^(\[(\d)*\])(.website)$', + '^(\[(\d)*\])(.section.)(\[(\d)*\])(.snippets.)(\[(\d)*\])(.url)$', '^(\[(\d)*\])(.section)$', + '^(\[(\d)*\])(.releases.)(\[(\d)*\])(.r_version)$', '^(\[(\d)*\])(.section.)(\[(\d)*\])(.note)$'] + +KEY_REGEX_PATTERNS = ['^\[\d\](.sub-pages.)\[\d\](.url)$', '^\[\d\](.sub-pages.)\[\d\](.sub-pages.)\[\d\](.url)$', '^\[\d\](.sub-pages.)\[\d\](.icon)$', '^(\[\d\])(.url)$', '^(\[\d\])(.icon)$','^(\[\d\])(.category)$', '^(\[\d\])(.tech.)(\[\d\])(.img)$', '^(\[\d\])(.tech.)(\[\d\])(.url)$', '^(\[\d\])(.award.)(\[\d\])(.url)$', '^(\[\d\])(.award.)(\[\d\])(.img)$', '^(\[\d\])(.media.)(\[\d\])(.img)$', '^(\[\d\])(.media.)(\[\d\])(.article)$', '^(\[\d\])(.attachment)$', '^(\[\d\])(.expert.)(\[\d\])(.tweet)$', '^(\[\d\])(.expert.)(\[\d\])(.avatar)$', '^(\[\d\])(.expert.)(\[\d\])(.img)$', '^(\[\d\])(.htmlsection)$', '^(\[\d\])(.folder)$','redirect_from.\[\d\]', '^(\[\d\])(.links.)(\[\d\])(.url)$', '^(\[\d\])(.links.)(\[\d\])(.id)$'] + +KEY_PATTERNS = ['lang', 'layout', 'permalink', 'redirect_from'] +SOURCE_PATTERNS = ['* * * * *', ''] +# TODO use re2 per default? +# examples for the first regex: +# ![edit-button-mobile](/attachment/wiki/doc-edit/02-button1.png) +# ![commit](/attachment/wiki/doc-gel/07-commit-msg.png) +# for the second one a liquid include html line + +SOURCE_REGEX_PATTERNS = [ + '^\!\[(\w{0,50}(-){0,50}(\.){0,2}\w{0,50}){0,10}\]\((\/(\w{0,50}(-){0,8}\w{0,50})){0,10}(\w{0,50}(-){0,50}\w{0,50}){0,10}.\w{0,10}\)', + '{%[^\S\n]{1,8}include[\s\w-]*\.html[^\S\n]{1,8}%}' + ] +START_END_PATTERNS = {'{%': '%}', '{{': '}}', '{{':'>', '', '[':']', '', '', '', '
', '', } + +# tx resources keys +STRING_HASH_KEY = 'string_hash' +KEY_KEY = 'key' +SOURCE_STRING_KEY = 'source_string' +# markdown frontmatter keys +PERMALINK_KEY = 'permalink' +REDIRECT_KEY = 'redirect_from' +# comment and tag for tx source strings +UPDATE_TAGS = '{"comment": "Added notranslate tag via curl", "tags": ["notranslate"]}' + +UPDATE_TAGS_LOCKED = '{"comment": "Added locked tags via curl", "tags": ["locked"]}' +DATA_TO_URL_MAPPING = \ +{ + 'hcl': 'Strings are taken from this page: https://www.qubes-os.org/hcl/', + 'download': 'Strings are taken from this page: https://www.qubes-os.org/downloads/', + 'experts': 'Strings are taken from this page: https://www.qubes-os.org/experts/', + 'home': 'Strings are taken from this page: https://www.qubes-os.org/', + 'index': 'Strings are taken from this page: https://www.qubes-os.org/doc/', + 'intro': 'Strings are taken from this page: https://www.qubes-os.org/intro/', + 'partners': 'Strings are taken from this page: https://www.qubes-os.org/partners/', + 'style_guide_content': 'Strings are taken from this page: https://www.qubes-os.org/doc/style-guide/', + 'teamtexts': 'Strings are taken from this page: https://www.qubes-os.org/team/', + 'videos': 'Strings are taken from this page: https://www.qubes-os.org/video-tours/', + } + +# transifex schema +TX_JSON_SCHEMA = { + "type":"array", + "items": + [ + { + "type":"object", + "required": ["comment", "context", "key","string_hash","reviewed","pluralized","source_string","translation"], + "properties": + { + "comment": {"type" : "string"}, + "context": {"type" : "string"}, + "key": {"type" : "string"}, + "string_hash": {"type" : "string"}, + "reviewed": {"type" : "boolean"}, + "pluralized": {"type" : "boolean"}, + "source_string": {"type" : "string"}, + "translation": {"type" : "string"} + } + } + ] + } + +tagged_locked = dict() + +basicConfig(level=DEBUG) +logger = getLogger(__name__) +LOG_FILE_NAME='/tmp/tag_strings_as_locked.log' + +def configure_logging(logname): + handler = FileHandler(logname) + handler.setLevel(DEBUG) + formatter = Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + handler.setFormatter(formatter) + logger.addHandler(handler) + +def check_reg(string, reg): + ''' + check if the given string complies the given regular expression + string: string to check + reg: regular expression + return true if it is the case + ''' + + g = match(reg, string) + return g != None + +def manual_break(): + prompt = input("press c + hit enter to continue: ") + if prompt == 'c': + return + else: + manual_break() + +def check_for_liquid_expression(item): + return any (item.startswith(start) and item.endswith(end) for start, end in START_END_PATTERNS.items()) + +def get_all_original_permalinks_and_redirects(sourcenamesfiles): + ''' + get the permalinks and redirects from all the original to be translated files + sourcenamesfiles: a file containing all the source original file names from the tx config + return: a set containing the original (language code is removed) permalinks and redirects + ''' + sources = [] + with iopen(sourcenamesfiles) as fp: + lines = fp.readlines() + sources = ['./'+x.split('source_file =')[1].strip() for x in lines if lines.index(x)%2==1] + + perms_and_redirects = set() + for filepath in sources: + logger.debug('reading %s' % filepath) + md = Post + with iopen(filepath) as fp: + md = load(fp) + if md.get(PERMALINK_KEY) != None: + perms_and_redirects.add(md.get(PERMALINK_KEY)) + else: + logger.error('no permalink in frontmatter for file %s' % filepath) + redirects = md.get(REDIRECT_KEY) + if redirects != None: + if isinstance(redirects,list): + for r in redirects: + perms_and_redirects.add(r) + elif isinstance(redirects,str): + perms_and_redirects.add(redirects) + else: + logger.error('ERROR: unexpected in redirect_from: %s' % filepath) + exit(1) + else: + logger.debug('no redirect_from in frontmatter for file %s' % filepath) + return perms_and_redirects + + + +def create_hash_and_tags_mapping(tx_resources, tx_api_token, debug, perms_and_redirects, manual): + ''' + for every given file uploaded to transifex, get its resource strings, check if they need to be marked as locked and + save their hashes for further processing if this is the case + tx_resources: a list containing all the uploaded transifex files + tx_api_token: transifex API token + debug: if true be verbose + manual: if true the upload will be done file by file under the supervision of the developer + return a dictionary containing the string hash and resource name + ''' + + hash_and_tags_mapping=dict() + for res in tx_resources: + url = 'https://www.transifex.com/api/2/project/qubes/resource/' + res + '/translation/en/strings/' + logger.info('will fetch url %s via curl' % url) + if manual: + manual_break() + + buf = BytesIO() + c = Curl() + c.setopt(c.URL, url) + c.setopt(c.WRITEDATA, buf) + c.setopt(c.CAINFO, where()) + c.setopt(c.USERPWD, 'api:' + tx_api_token) + c.setopt(c.FOLLOWLOCATION, True) + try: + c.perform() + except error as pe: + logger.error("Pycurl: ", exc_info=True) + c.close() + exit(1) + if c.getinfo(HTTP_CODE) == 404: + logger.error("Following resource was not found in transifex: %s." % res) + logger.error("Response: %s", buf.getvalue()) + c.close() + continue + if c.getinfo(HTTP_CODE) != 200: + logger.error("Following resource could not be fetched: %s." % res) + logger.error("Response: %s", buf.getvalue()) + c.close() + continue + c.close() + + body = loads(buf.getvalue()) + try: + validate(body, TX_JSON_SCHEMA) + except ValidationError as e: + logger.error("SEVERE! invalid json input for res: %s, url: %s" %(res, url)) + logger.error("ValidationError: ", exc_info=True) + exit(1) + + if debug: + logger.debug("___________________________") + logger.debug("resource strings for file %s" % res) + + if isinstance(body, list): + + for item in body: + if isinstance(item, dict): + if any( item[KEY_KEY] == k for k in KEY_PATTERNS ) \ + or (item[SOURCE_STRING_KEY].startswith('![') and item[SOURCE_STRING_KEY].endswith('.png)')) \ + or any( check_reg(item[KEY_KEY], kr) for kr in KEY_REGEX_LOCK_PATTERNS ) \ + or any( item[SOURCE_STRING_KEY] == s for s in SOURCE_PATTERNS ) \ + or any( item[SOURCE_STRING_KEY] == s for s in perms_and_redirects ) \ + or any( check_reg(item[SOURCE_STRING_KEY], sr) for sr in SOURCE_REGEX_PATTERNS ) \ + or check_for_liquid_expression( item[SOURCE_STRING_KEY] ): + + hash_and_tags_mapping.update( {res + '.' + item[STRING_HASH_KEY] : res} ) + to_debug = {res + "." + item[STRING_HASH_KEY] : [item[KEY_KEY], item[SOURCE_STRING_KEY]]} + tagged_locked.update(to_debug) + if debug: + logger.debug("The following resource string will be tagged as 'locked' %s" % to_debug) + else: + logger.error("got some weird stuff from transifex: %s" % item) + exit(1) + else: + logger.error("got some weird stuff from transifex: %s" % body) + exit(1) + + return hash_and_tags_mapping + +def tag_strings_as_locked(hash_and_tag, tx_api_token, debug): + ''' + upload locked tags for given string hashes for given strings for given files + hash_and_tag: dicitonary containing {string_hash: filename} + tx_api_token: transifex API token + debug: if true be verbose + ''' + + for stringreshash, filename in hash_and_tag.items(): + res_hash = stringreshash.split('.') + stringhash = res_hash[1] + url = 'https://www.transifex.com/api/2/project/qubes/resource/' + filename + '/source/' + stringhash + '/' + logger.info('put tag %s via curl' % url) + buf = BytesIO() + c = Curl() + c.setopt(c.URL, url) + c.setopt(WRITEFUNCTION, buf.write) + c.setopt(c.CUSTOMREQUEST, 'PUT') + c.setopt(c.POST, 1) + c.setopt(c.USERPWD, 'api:' + tx_api_token) + c.setopt(c.FOLLOWLOCATION, True) + + #c.setopt(c.POSTFIELDS, UPDATE_TAGS) + if any(filename.endswith(i) for i in DATA_TO_URL_MAPPING) and '_data_' in filename: + spl = filename.split('_data_') + comment = DATA_TO_URL_MAPPING[spl[len(spl)-1]] + tags = '{"comment": "' + comment + '", "tags": ["locked"]}' + c.setopt(c.POSTFIELDS, tags) + c.setopt(c.HTTPHEADER, ['Content-Type: application/json', + 'Content-Length: ' + str(len(tags)) ]) + else: + c.setopt(c.POSTFIELDS, UPDATE_TAGS_LOCKED) + c.setopt(c.HTTPHEADER, ['Content-Type: application/json', + 'Content-Length: ' + str(len(UPDATE_TAGS_LOCKED)) ]) + #'Content-Length: ' + str(len(UPDATE_TAGS)) ]) + + try: + c.perform() + except error as pe: + logger.error("Pycurl: ", exc_info=True) + c.close() + exit(1) + if c.getinfo(HTTP_CODE) == 404: + logger.error("Following string hash %s for file %s could not be tagged as 'locked'." % (stringhash, filename)) + logger.error("Response: %s", buf.getvalue()) + c.close() + continue + if c.getinfo(HTTP_CODE) != 200: + logger.error("Following string hash %s for file %s could not be tagged as 'locked'" % (stringhash, filename)) + logger.error("Response: %s", buf.getvalue()) + c.close() + continue + c.close() + + if debug: + logger.debug('---------------------') + logger.debug(buf.getvalue()) + +if __name__ == '__main__': + # python _utils/tag_as_locked.py _utils/tx-resource-names _utils/tx-sourcesnames api-token --debug --manual + parser = ArgumentParser() + # provide the file from tx configuration containing the resource names + parser.add_argument("tx_resourcenamesfile") + # provide the file from tx configuration containing only the original source filenames + parser.add_argument("tx_sourcesnamesfile") + # whether or not to write debug json files + parser.add_argument("--debug", action='store_true') + # whether or not to tag file by a file + parser.add_argument("--manual", action='store_true') + + + args = parser.parse_args() + configure_logging(LOG_FILE_NAME) + + manual = args.manual + + if not isfile(args.tx_resourcenamesfile): + print("please check your transifex resource names file") + logger.error("please check your transifex resource names file") + sys.exit(1) + + if not isfile(args.tx_sourcesnamesfile): + print("please check your transifex original sourcenames file") + logger.error("please check your transifex original sourcenames file") + sys.exit(1) + + tx_resources = [] + with iopen(args.tx_resourcenamesfile) as f: + tx_resources = f.readlines() + tx_resources = [ t.rstrip() for t in tx_resources] + + + # testing on + if manual: + manual_break() + + if 'TX_TOKEN' not in environ: + parser.error('TX_TOKEN variable not set') + tx_api_token = environ['TX_TOKEN'] + perms_and_redirects = get_all_original_permalinks_and_redirects(args.tx_sourcesnamesfile) + hash_and_tags_mapping = create_hash_and_tags_mapping(tx_resources, tx_api_token, args.debug, perms_and_redirects, manual) + + if args.debug: + logger.debug("------------------------------------------------") + logger.debug("----------HASH 2 TAG NOTRANSLATE MAPPING--------") + logger.debug("------------------------------------------------") + logger.debug(dumps(hash_and_tags_mapping, indent=4)) + logger.debug("------------------------------------------------") + logger.debug("------------------------------------------------") + logger.debug("------------------------------------------------") + logger.debug("-------------STRINGS TAGGED NOTRANSLATE---------") + logger.debug("------------------------------------------------") + logger.debug("------------------------------------------------") + logger.debug(dumps(tagged_locked, indent=4)) + logger.debug("------------------------------------------------") + logger.debug("------------------------------------------------") + logger.debug("------------------------------------------------") + + tag_strings_as_locked(hash_and_tags_mapping, tx_api_token, args.debug) + + + + diff --git a/_utils/_translation_utils/tx_config.sh b/_utils/_translation_utils/tx_config.sh new file mode 100644 index 00000000..b028716b --- /dev/null +++ b/_utils/_translation_utils/tx_config.sh @@ -0,0 +1,49 @@ +#!/bin/bash +# to be run from the git root +#to incoporate them back, no need + +set -e + +# add ref & lang attributes to newly created files +python3 _utils/_translation_utils/prepare_for_translation.py en _doc/ _utils/_translation_utils/COUNTER.txt + +# because there is apparently a feature in tx config that doe snot update an existing configuration, every time everythin will be done from scratch: +# delete current tx configuration + +mv .tx/config /tmp/tx_config_old || : + +#init a tx configuration +tx init --skipsetup + +# map the files with tx config +tx config mapping-bulk -p qubes --source-language en --type GITHUBMARKDOWN -f '.md' -d --source-file-dir _doc -i _dev --expression '_translated//_doc/{filepath}/{filename}{extension}' --execute +tx config mapping-bulk -p qubes --source-language en --type GITHUBMARKDOWN -f '.md' -d --source-file-dir pages --expression '_translated//pages/{filepath}/{filename}{extension}' --execute +tx config mapping-bulk -p qubes --source-language en --type GITHUBMARKDOWN -f '.md' -d --source-file-dir news --expression '_translated//news/{filepath}/{filename}{extension}' --execute + +#HTML +tx config mapping -r qubes._doc_introduction_intro --source-lang en --type HTML --source-file _doc/introduction/intro.md '_translated//_doc/introduction/intro.md' --execute +tx config mapping -r qubes.pages_partners --source-lang en --type HTML --source-file pages/partners.html '_translated//pages/partners.html' --execute +tx config mapping -r qubes.pages_home --source-lang en --type HTML --source-file pages/home.html '_translated//pages/home.html' --execute + +tx config mapping -r qubes.data_architecture --source-lang en --type YAML_GENERIC --source-file _data/architecture.yml --expression '_translated//_data//architecture.yml' --execute +tx config mapping -r qubes.data_doc_index --source-lang en --type YAML_GENERIC --source-file _data/doc-index.yml --expression '_translated//_data//doc-index.yml' --execute +tx config mapping -r qubes.data_includes --source-lang en --type YAML_GENERIC --source-file _data/includes.yml --expression '_translated//_data//includes.yml' --execute +tx config mapping -r qubes.data_team-page --source-lang en --type YAML_GENERIC --source-file _data/team-page.yml --expression '_translated//_data//team-page.yml' --execute +tx config mapping -r qubes.data_team --source-lang en --type YAML_GENERIC --source-file _data/team.yml --expression '_translated//_data//team.yml' --execute +tx config mapping -r qubes.data_experts --source-lang en --type YAML_GENERIC --source-file _data/experts.yml --expression '_translated//_data//experts.yml' --execute +tx config mapping -r qubes.data_downloads_page --source-lang en --type YAML_GENERIC --source-file _data/downloads-page.yml --expression '_translated//_data//downloads-page.yml' --execute +tx config mapping -r qubes.data_hcl --source-lang en --type YAML_GENERIC --source-file _data/hcl.yml --expression '_translated//_data//hcl.yml' --execute +tx config mapping -r qubes.data_research --source-lang en --type YAML_GENERIC --source-file _data/research.yml --expression '_translated//_data//research.yml' --execute +tx config mapping -r qubes.data_style_guide_page --source-lang en --type YAML_GENERIC --source-file _data/style-guide-page.yml --expression '_translated//_data//style-guide-page.yml' --execute + +crudini --del .tx/config qubes._doc_README +crudini --del .tx/config qubes._doc_CONTRIBUTING + +sed -i 's/\._doc_/.doc_/' .tx/config + +echo "#####################################################################################" +echo "############# Please pay attention to the changes made to the current tx config #####" +echo "############# Do you have to delete some resources on transifex manually? ###########" +echo "############# left is the current, on the right is the old tx config ################" +echo "#####################################################################################" +diff /tmp/tx_config_old .tx/config --color || : diff --git a/_utils/_translation_utils/tx_pull.sh b/_utils/_translation_utils/tx_pull.sh new file mode 100644 index 00000000..82061583 --- /dev/null +++ b/_utils/_translation_utils/tx_pull.sh @@ -0,0 +1,10 @@ +#!/bin/bash -e +# first argument is the language for which the translated files should be downloaded +# the mode developer will download all unreviewed translated strings as well +tx pull --force -l $1 --mode reviewed --traceback -r qubes.doc* +tx pull --force -l $1 --mode reviewed --traceback -r qubes.pages* +tx pull --force -l $1 --mode reviewed --traceback -r qubes.news* + +# the different mode here is needed for YAML files, since the developer mode does not download source strings if untranslated +# the mode reviewed here will not work and will return empty strings for nontranslated ones +tx pull --force -l $1 --mode sourceastranslation --traceback -r qubes.data_* diff --git a/_utils/transifex-pull b/_utils/transifex-pull new file mode 100755 index 00000000..282262c6 --- /dev/null +++ b/_utils/transifex-pull @@ -0,0 +1,38 @@ +#!/bin/sh + +# Pull translated pages from Transifex + +if ! command -v tx; then + echo "transifex-client needs to be installed" >&2 + exit 1 +fi + +if [ -z "$TX_TOKEN" ]; then + echo "Please set TX_TOKEN variable to Transifex API key" >&2 + exit 1 +fi + +if [ -z "$1" ]; then + echo "Usage: $0 [ ...]" +fi + +set -e + +for lang in "$@"; do + bash _utils/_translation_utils/tx_pull.sh "$lang" + bash _utils/_translation_utils/post_transifex_pull.sh "$lang" _translated/"$lang" +done + +bash _utils/_translation_utils/check_all_langs.sh _translated + +# switch to ssh for push +git -C _translated remote set-url origin git@github.com:QubesOS/qubes-translated + +# commit and push +git -C _translated add . + +# if nothing to commit, exit early +[ -n "$(git -C _translated status --porcelain)" ] || exit 0 + +git -C _translated commit -m 'Update translated content' +git -C _translated push origin HEAD:master diff --git a/_utils/transifex-push b/_utils/transifex-push new file mode 100755 index 00000000..7d17980d --- /dev/null +++ b/_utils/transifex-push @@ -0,0 +1,27 @@ +#!/bin/sh + +# This script uploads the website source files to Transifex for translation, including all the preparatory work + +if ! command -v tx; then + echo "transifex-client needs to be installed" >&2 + exit 1 +fi + +if [ -z "$TX_TOKEN" ]; then + echo "Please set TX_TOKEN variable to Transifex API key" >&2 + exit 1 +fi + +set -e + +# update config +bash _utils/_translation_utils/tx_config.sh + +# push the sources +tx push -s + +# update sources metadata (notranslate tags, locks etc) +bash _utils/_translation_utils/prepare_tx_config_for_notranslate_tags.sh .tx/config /tmp/tx-resources-names.txt /tmp/tx-sources-filenames.txt +python3 _utils/_translation_utils/tag_strings_as_locked.py /tmp/tx-resources-names.txt /tmp/tx-sources-filenames.txt --debug + + diff --git a/_utils/update-translated b/_utils/update-translated new file mode 100755 index 00000000..9164359c --- /dev/null +++ b/_utils/update-translated @@ -0,0 +1,24 @@ +#!/bin/bash + +set -e + +tmpbranch="new-$$" +git -C _translated fetch origin master:"$tmpbranch" +tmpdir=$(mktemp -d) +trap 'rm -rf $tmpdir; git -C _translated br -D $tmpbranch' EXIT +git clone --shared "$PWD/_translated" -b "$tmpbranch" "$tmpdir/translated" + +if ! _utils/verify-translated "$tmpdir/translated"; then + echo "Translated content did not pass sanity check, not updating" >&2 + # TODO: consider some louder alert? email? issue on github? + exit 1 +fi + +git -C _translated merge --ff-only "$tmpbranch" +git add _translated + +git commit -m 'autoupdate: _translated' +commit_id=$(git show --pretty=format:%H|head -1) +tag_name=auto_${commit_id:0:8} +git tag -s -m "Automatic tag for commit $commit_id" "$tag_name" +git push origin master $tag_name diff --git a/_utils/verify-translated b/_utils/verify-translated new file mode 100755 index 00000000..0bfbc0a2 --- /dev/null +++ b/_utils/verify-translated @@ -0,0 +1,201 @@ +#!/usr/bin/env python3 + +# Simple verifier for qubes-translated repository content sanity. +# This script looks at frontmatter of each file mostly checks that: +# - lang: key is present and matches the directory name +# - ref: key is present +# - url-related parameters (redirect_from, permalink) are properly language-scoped +# - no unexpected entries in the frontmatter are present +# +# Besides that, verifies if no unexpected files are present. +# +# Usage: +# $0 +# Supported env variables: +# - TRANSLATED_LANGS - list of expected languages + +import argparse +import os +import string +import yaml + +TRANSLATED_LANGS = ['de'] +if 'TRANSLATED_LANGS' in os.environ: + TRANSLATED_LANGS = os.environ['TRANSLATED_LANGS'].split() + +SAFE_PATH_CHARS = string.ascii_letters + string.digits + '/_-.' + +ALLOWED_FRONTMATTER_KEYS = ['title', 'lang', 'ref', 'permalink', 'redirect_from', 'redirect_to', 'layout', 'model'] + +MANDATORY_FRONTMATTER_KEYS = ['title', 'lang', 'ref', 'layout'] + +ALLOWED_EXTERNAL_REDIRECT_TARGETS = ['https://github.com/Qubes-Community/Contents/blob/master/'] + +ALLOWED_LAYOUTS = ['doc', 'doc-index', 'doc-full', 'hcl', 'downloads', 'news', 'team', 'home', 'default', 'experts', 'sidebar'] + +#TODO: consider allowing some layouts only on some pages (and consider their redirect_from too) + +parser = argparse.ArgumentParser() +parser.add_argument('directory') + +class VerificationError(Exception): + def __init__(self, path, msg): + safe_name = ''.join(l if l in SAFE_PATH_CHARS else '?' for l in path) + super().__init__('{}: {}'.format(path, msg)) + +def verify_readme(path): + with open(path) as f: + readme_text = f.read() + + if '---' in readme_text: + raise VerificationError(path, 'may not contain frontmatter') + if '<' in readme_text: + raise VerificationError(path, 'may not contain HTML') + if '{' in readme_text: + raise VerificationError(path, 'may not contain liquid templates') + + +def verify_md_file(lang, path): + with open(path) as f: + file_content = f.read() + + # there must be frontmatter + if not file_content.startswith('---\n'): + raise VerificationError(path, 'missing frontmatter') + + # better be more strict - may catch too much (if another separator is + # used), but then loading yaml will detect multiple documents + frontmatter_text = file_content[4:].split('\n---\n')[0] + # there could be _just_ frontmatter too + if frontmatter_text.endswith('\n---'): + frontmatter_text = frontmatter_text[:-4] + try: + frontmatter = yaml.safe_load(frontmatter_text) + except Exception as e: + raise VerificationError(path, 'failed to parse frontmatter: {!s}'.format(e)) from e + verify_frontmatter(lang, path, frontmatter) + + +def verify_frontmatter(lang, path, frontmatter): + # double check if all entries were verified + verified = [] + + url_prefix = '/{}/'.format(lang) + if any(key not in ALLOWED_FRONTMATTER_KEYS for key in frontmatter): + raise VerificationError(path, 'unexpected frontmatter key') + + for key in MANDATORY_FRONTMATTER_KEYS: + if key not in frontmatter: + raise VerificationError(path, key + ' missing in frontmatter') + + if lang != frontmatter['lang']: + raise VerificationError(path, 'lang mismatch') + + verified.append('lang') + + if not isinstance(frontmatter['ref'], int): + raise VerificationError(path, 'invalid ref format') + + verified.append('ref') + + if 'permalink' in frontmatter: + if not frontmatter['permalink'].startswith(url_prefix): + raise VerificationError(path, 'invalid permalink') + + verified.append('permalink') + + if 'redirect_from' in frontmatter: + if isinstance(frontmatter['redirect_from'], str): + redirect_from = [frontmatter['redirect_from']] + elif isinstance(frontmatter['redirect_from'], list): + redirect_from = frontmatter['redirect_from'] + else: + raise VerificationError(path, 'invalid redirect_from format') + + for url in redirect_from: + if '/..' in url: + raise VerificationError(path, '.. in url') + if not url.startswith(url_prefix): + raise VerificationError(path, 'invalid redirect_from') + + verified.append('redirect_from') + + if 'redirect_to' in frontmatter: + url = frontmatter['redirect_to'] + if isinstance(url, list): + if len(url) != 1: + raise VerificationError(path, 'if redirect_to is a list, must be 1-element') + url = url[0] + if not isinstance(url, str): + raise VerificationError(path, 'invalid redirect_to format') + if '/..' in url: + raise VerificationError(path, '.. in redirect_to') + if not any(url.startswith(prefix) for prefix in ALLOWED_EXTERNAL_REDIRECT_TARGETS + [url_prefix]): + raise VerificationError(path, 'forbidden redirect_to target') + + verified.append('redirect_to') + + if frontmatter['layout'] not in ALLOWED_LAYOUTS: + raise VerificationError(path, 'forbidden layout') + + verified.append('layout') + + title = frontmatter['title'] + if not isinstance(title, str): + raise VerificationError(path, 'invalid title format') + + # avoid HTML in title + if '<' in title or '%' in title: + raise VerificationError(path, 'invalid character in title') + + verified.append('title') + + # if 'model' is present, must have 'all' value + if 'model' in frontmatter: + if frontmatter['model'] != 'all': + raise VerificationError(path, 'invalid model value') + verified.append('model') + + # intentionally compare lists, not sets, to catch duplicates too + if sorted(verified) != sorted(frontmatter): + raise VerificationError(path, 'BUG, some frontmatter entries were not verified') + + # all is ok + + +def verify_lang(lang, path): + for dirpath, dirnames, filenames in os.walk(path): + for filename in filenames: + filepath = os.path.join(dirpath, filename) + if any(c not in SAFE_PATH_CHARS for c in filename): + raise VerificationError(filepath, 'unsafe characters in filename') + + if filename.endswith('.md'): + verify_md_file(lang, filepath) + elif filename.endswith('.html'): + # the frontmatter is expected the same + verify_md_file(lang, filepath) + elif filename.endswith('.yml'): + # those are loaded scoped anyway, so can mess only own language + pass + else: + raise VerificationError(filepath, 'unexpected file type') + + + +def main(): + args = parser.parse_args() + + for lang in os.listdir(args.directory): + if lang == '.git': + pass + elif lang == 'README.md': + verify_readme(os.path.join(args.directory, lang)) + elif lang in TRANSLATED_LANGS: + verify_lang(lang, os.path.join(args.directory, lang)) + else: + raise VerificationError(lang, 'unexpected language dir') + + +if __name__ == '__main__': + main()