From 757e8b989d6d1d036ffd3861848c4eafd62eb569 Mon Sep 17 00:00:00 2001 From: Maya Date: Sun, 28 Mar 2021 21:02:10 +0200 Subject: [PATCH 01/19] Add translation scripts Co-authored-by: Tobias Killer --- _utils/_translation_utils/COUNTER.txt | 1 + .../merge_md_heading_ids.py | 213 +++++++++ .../merge_md_heading_ids.rb | 304 +++++++++++++ .../_translation_utils/post_transifex_pull.sh | 48 ++ .../postprocess_htmlproofer.py | 313 +++++++++++++ .../postprocess_translation.py | 430 ++++++++++++++++++ .../postprocess_translation.sh | 33 ++ .../prepare_for_translation.py | 128 ++++++ .../prepare_tx_config_for_notranslate_tags.sh | 6 + .../prepare_tx_config_postprocess.sh | 7 + .../requirements_notranslate.txt | 10 + .../tag_strings_as_locked.py | 397 ++++++++++++++++ _utils/_translation_utils/tx_config.sh | 49 ++ _utils/_translation_utils/tx_pull.sh | 10 + 14 files changed, 1949 insertions(+) create mode 100644 _utils/_translation_utils/COUNTER.txt create mode 100644 _utils/_translation_utils/merge_md_heading_ids.py create mode 100644 _utils/_translation_utils/merge_md_heading_ids.rb create mode 100644 _utils/_translation_utils/post_transifex_pull.sh create mode 100644 _utils/_translation_utils/postprocess_htmlproofer.py create mode 100644 _utils/_translation_utils/postprocess_translation.py create mode 100644 _utils/_translation_utils/postprocess_translation.sh create mode 100644 _utils/_translation_utils/prepare_for_translation.py create mode 100755 _utils/_translation_utils/prepare_tx_config_for_notranslate_tags.sh create mode 100755 _utils/_translation_utils/prepare_tx_config_postprocess.sh create mode 100644 _utils/_translation_utils/requirements_notranslate.txt create mode 100644 _utils/_translation_utils/tag_strings_as_locked.py create mode 100644 _utils/_translation_utils/tx_config.sh create mode 100644 _utils/_translation_utils/tx_pull.sh diff --git a/_utils/_translation_utils/COUNTER.txt b/_utils/_translation_utils/COUNTER.txt new file mode 100644 index 00000000..8525d4ab --- /dev/null +++ b/_utils/_translation_utils/COUNTER.txt @@ -0,0 +1 @@ +current counter: 242 \ No newline at end of file diff --git a/_utils/_translation_utils/merge_md_heading_ids.py b/_utils/_translation_utils/merge_md_heading_ids.py new file mode 100644 index 00000000..df82153b --- /dev/null +++ b/_utils/_translation_utils/merge_md_heading_ids.py @@ -0,0 +1,213 @@ +#!/usr/bin/python3 +# This is a script provided by TokiDev +# https://github.com/tokideveloper/langswitch-prototype/blob/master/_utils/merge_md_heading_ids.py + +import sys +import re +import subprocess + + +def get_yaml_front_matter(gfm_lines): + counter = 0 + start = 0 + end = 0 + for i in range(len(gfm_lines)): + if gfm_lines[i] == '---\n': + counter += 1 + if counter == 1: + start = i + elif counter == 2: + end = i + 1 + return gfm_lines[start:end], start, end + if counter == 1: + return gfm_lines[start:], start, len(gfm_lines) + # case counter == 0: + return [], 0, 0 + + + +def line_only_made_of(line, char): + length = len(line) + for i in range(length - 1): + if line[i] != char: + return False + return line[length - 1] == '\n' + + + +def render(gfm_lines): + p = subprocess.run(['kramdown'], stdout=subprocess.PIPE, input=''.join(gfm_lines), encoding='utf8') + if p.returncode != 0: + return None + return p.stdout.splitlines(1) + + + +def look_for_headline(rendered_html_lines, headline_id): + for l in range(len(rendered_html_lines)): + x = re.search('', rendered_html_lines[l]) + if x is None: + continue + c = x.start() + if c is None: + continue + else: + return l, c + return None + + + +def extract_headline_id(rendered_html_lines, l, c): + line = rendered_html_lines[l] + line = line[c:] + x = re.search(' 0: + return None + span = x.span() + line = line[(span[1] - span[0]):] + end = line.find('"') + line = line[:end] + return line + + + +def try_create_id(gfm_lines, line_number, this_line, next_line, rendered_html_lines, placeholder): + # save headline + saved_headline = gfm_lines[line_number] + + hl = None + + if this_line.startswith('#'): + # headline starting with '#' + gfm_lines[line_number] = '# ' + placeholder + '\n' + hl = look_for_headline(render(gfm_lines), placeholder) + elif len(next_line) >= 3 and (line_only_made_of(next_line, '=') or line_only_made_of(next_line, '-')): + # headline starting with '===' or '---' + gfm_lines[line_number] = placeholder + '\n' + hl = look_for_headline(render(gfm_lines), placeholder) + + # revert headline + gfm_lines[line_number] = saved_headline + + if hl is None: + return None + + hl_line, hl_col = hl + return extract_headline_id(rendered_html_lines, hl_line, hl_col) + + + +def generate_unique_placeholder(rendered_html_lines): + number = 0 + PREFIX = 'xq' + SUFFIX = 'z' + result = '' + while True: + result = PREFIX + str(number) + SUFFIX + solution_found = True + for line in rendered_html_lines: + if result in line: + number += 1 + solution_found = False + break + if solution_found: + break + # we assume that there will be at least one solution + return result + + + +def create_line_to_id_map(gfm_lines): + result = {} + gfm_lines2 = gfm_lines[:] + rendered_html_lines = render(gfm_lines) + + placeholder = generate_unique_placeholder(rendered_html_lines) + + # line-by-line: assume a headline + n = len(gfm_lines2) + for i in range(n): + this_line = gfm_lines2[i] + next_line = '' + if i < n - 1: + next_line = gfm_lines2[i + 1] + hid = try_create_id(gfm_lines2, i, this_line, next_line, rendered_html_lines, placeholder) + if hid is not None: + result[i] = hid + + return result + + + +def insert_ids_to_gfm_file(line_to_id_map, gfm_lines): + result = gfm_lines[:] + n = len(result) + for key, value in line_to_id_map.items(): + str_to_insert = '' + line = result[key] + if line.startswith('#'): + if key + 1 >= n: + result = result + [''] + result[key + 1] = str_to_insert + result[key + 1] + else: + if key + 2 >= n: + result = result + [''] + result[key + 2] = str_to_insert + result[key + 2] + return result + + + +def merge_ids_in_gfm_files(orig_gfm_lines, trl_gfm_lines): + # assuming that both files match line by line such that matching headlines are in the same lines + + # get yaml front matter from orig + orig_yaml_front_matter, orig_start, orig_end = get_yaml_front_matter(orig_gfm_lines) + + # get yaml front matter from trl + trl_yaml_front_matter, trl_start, trl_end = get_yaml_front_matter(trl_gfm_lines) + + # get body from trl + trl_body = trl_gfm_lines[trl_end:] + + # get body from orig + orig_body = orig_gfm_lines[orig_end:] + + # create line-to-id map + orig_line_to_id_map = create_line_to_id_map(orig_body) + + # insert ids + preresult = insert_ids_to_gfm_file(orig_line_to_id_map, trl_body) + + # create translated document with adapted body + result_trl_gfm = ''.join(trl_yaml_front_matter) + ''.join(preresult) + + return result_trl_gfm + + +def write_lines(content, filename): + with open(filename,'w') as f: + f.write(content) + +def read_lines(filename): + with open(filename, 'r') as f: + lines = f.readlines() + return lines + +def process_headers(mapping): + + for key, item in mapping.items(): + if not item.endswith('.yml'): + original_lines = read_lines(key) + translated_lines = read_lines(item) + # merge ids in gfm files + print(key) + + result = merge_ids_in_gfm_files(original_lines, translated_lines) + write_lines(result, item) + + diff --git a/_utils/_translation_utils/merge_md_heading_ids.rb b/_utils/_translation_utils/merge_md_heading_ids.rb new file mode 100644 index 00000000..5f446a29 --- /dev/null +++ b/_utils/_translation_utils/merge_md_heading_ids.rb @@ -0,0 +1,304 @@ +#!/usr/bin/env ruby + +require 'kramdown' + + + +YamlFrontMatter = Struct.new(:yaml_lines, :startl, :endl) + +def get_yaml_front_matter(gfm_lines) + counter = 0 + startl = 0 + endl = 0 + for i in 0..(gfm_lines.length - 1) + if gfm_lines[i] == "---\n" + counter += 1 + if counter == 1 + startl = i + elsif counter == 2 + endl = i + 1 + result = YamlFrontMatter.new + result.yaml_lines = gfm_lines[startl..(endl - 1)] + result.startl = startl + result.endl = endl + return result + end + end + end + if counter == 1 + result = YamlFrontMatter.new + result.yaml_lines = gfm_lines[startl..-1] + result.startl = startl + result.endl = gfm_lines.length + return result + end + # case counter == 0: + result = YamlFrontMatter.new + result.yaml_lines = [] + result.startl = 0 + result.endl = 0 + return result +end + + + +def line_only_made_of(line, char) + length = line.length + for i in 0..(length - 2) + if line[i] != char + return false + end + end + return line[length - 1] == "\n" +end + + + +def render(gfm_lines) + Kramdown::Document.new(gfm_lines.join).to_html.lines +end + + + +LineColumn = Struct.new(:l, :c) + +def look_for_headline(rendered_html_lines, headline_id) + for l in 0..(rendered_html_lines.length - 1) + m = rendered_html_lines[l].scan(//) + if m.length > 0 + c = rendered_html_lines[l].index(m[0]) + result = LineColumn.new + result.l = l + result.c = c + return result + end + end + return nil +end + + + +def extract_headline_id(rendered_html_lines, l, c) + line = rendered_html_lines[l] + line = line[c..-1] + m = line.scan(/= 3 and (line_only_made_of(next_line, '=') or line_only_made_of(next_line, '-')) + # headline starting with '===' or '---' + gfm_lines[line_number] = placeholder + "\n" + hl = look_for_headline(render(gfm_lines), placeholder) + end + + # revert headline + gfm_lines[line_number] = saved_headline + + if hl == nil + return nil + end + + return extract_headline_id(rendered_html_lines, hl.l, hl.c) +end + + + +def generate_unique_placeholder(rendered_html_lines) + number = 0 + prefix = 'xq' + suffix = 'z' + result = '' + while true do + result = prefix + number.to_s + suffix + solution_found = true + for line in rendered_html_lines + if line.include? result + number += 1 + solution_found = false + break + end + end + if solution_found + break + end + end + # we assume that there will be at least one solution + return result +end + + + +def create_line_to_id_map(gfm_lines) + result = {} + gfm_lines2 = gfm_lines[0..-1] + rendered_html_lines = render(gfm_lines) + + placeholder = generate_unique_placeholder(rendered_html_lines) + + # line-by-line: assume a headline + n = gfm_lines2.length + for i in 0..(n - 1) + this_line = gfm_lines2[i] + next_line = '' + if i < n - 1 + next_line = gfm_lines2[i + 1] + end + hid = try_create_id(gfm_lines2, i, this_line, next_line, rendered_html_lines, placeholder) + if hid != nil + result[i] = hid + end + end + return result +end + + + +def insert_ids_to_gfm_file(line_to_id_map, gfm_lines) + result = gfm_lines[0..-1] + n = result.length + line_to_id_map.each do |key, value| + str_to_insert = '' + line = result[key] + if !line.nil? and line.start_with?('#') + if key + 1 >= n + result = result + [''] + end + result[key + 1] = str_to_insert.to_s + result[key + 1].to_s + else + if key + 2 >= n + result = result + [''] + end + result[key + 2] = str_to_insert.to_s + result[key + 2].to_s + end + end + return result +end + + + +def merge_ids_in_gfm_files(orig_gfm_lines, trl_gfm_lines) + # assuming that both files match line by line such that matching headlines are in the same lines + + # get yaml front matter from orig + orig_yfm = get_yaml_front_matter(orig_gfm_lines) + orig_yaml_front_matter = orig_yfm.yaml_lines + orig_start = orig_yfm.startl + orig_end = orig_yfm.endl + + # get yaml front matter from trl + trl_yfm = get_yaml_front_matter(trl_gfm_lines) + trl_yaml_front_matter = trl_yfm.yaml_lines + trl_start = trl_yfm.startl + trl_end = trl_yfm.endl + + # get body from trl + trl_body = trl_gfm_lines[trl_end..-1] + + # get body from orig + orig_body = orig_gfm_lines[orig_end..-1] + + # create line-to-id map + orig_line_to_id_map = create_line_to_id_map(orig_body) + + # insert ids + preresult = insert_ids_to_gfm_file(orig_line_to_id_map, trl_body) + + # create translated document with adapted body + result_trl_gfm = trl_yaml_front_matter.join + preresult.join + + return result_trl_gfm +end + +def create_dict_from_tx_config(lang, mappingfile) + # read a tx.xonfig file containing only file_filter and source_file information store it in a dict and give it back + # mappingfile: a tx.xonfig file containing only file_filter and source_file information + # return: a dict containing a mapping between an original file and its downloaded tx translation + mapping = {} + + lines = [] + lines = read_file(mappingfile) + + translated = [] + source = [] + n = lines.length + idx = 0 + while idx < n do + t = lines[idx].split('file_filter =')[1].strip + s = lines[idx+1].split('source_file =')[1].strip + translated += ["./" + t.gsub("", lang)] + if idx >= n then + break + end + source += ["./" + s] + idx += 2 + end + + n = translated.length + idx = 0 + while idx < n do + mapping[source[idx]] = translated[idx] + idx += 1 + end + + return mapping +end + +def read_file(filename) + read_lines = [] + File.open(filename, "r") do |f| + f.each_line do |line| + read_lines += [line] + end + end + return read_lines +end + +def write_file(contents, filename) + read_lines = [] + File.open(filename, "w") do |f| + f.write(contents) + end +end + +def main() + if ARGV.length != 2 + exit(1) + end + + mapping = create_dict_from_tx_config(ARGV[0], ARGV[1]) + mapping.each do |key, value| + if !key.end_with?(".yml") + orig_gfm_lines = read_file(key) + trl_gfm_lines = read_file(value) + # merge ids in gfm files + result = merge_ids_in_gfm_files(orig_gfm_lines, trl_gfm_lines) + write_file(result, value) + end + end + +end + + +if __FILE__ == $0 + main() +end + diff --git a/_utils/_translation_utils/post_transifex_pull.sh b/_utils/_translation_utils/post_transifex_pull.sh new file mode 100644 index 00000000..d5b15759 --- /dev/null +++ b/_utils/_translation_utils/post_transifex_pull.sh @@ -0,0 +1,48 @@ +#!/bin/bash +# to be run from the git root +# $1 is lang +# $2 is directory where translated files reside and language needs to be added to internal urls +# TODO param check + +echo "============================ post processing step 1 ======================================" +#read b +bash _utils/_translation_utils/prepare_tx_config_postprocess.sh .tx/config /tmp/tx-mapping + + +echo "============================ post processing step 2 ======================================" +#read b +ruby _utils/_translation_utils/merge_md_heading_ids.rb $1 /tmp/tx-mapping + +echo "============================ post processing step 3 press to cont ======================================" +#read b +python3 _utils/_translation_utils/postprocess_translation.py $1 $2 /tmp/tx-mapping /tmp/translated_href_urls.txt --yml + + +echo "============================ post processing step 4 press to cont ======================================" +#read b +bash _utils/_translation_utils/postprocess_translation.sh $1 $2 /tmp/translated_href_urls.txt + + +echo "================================= build suite ==================================" +#read b +bundle exec jekyll b + +echo "================================= run htmlproofer ===============================" +htmlproofer ./_site --disable-external --checks-to-ignore ImageCheck --file-ignore ./_site/video-tours/index.html,./_site/$1/video-tours/index.html --url-ignore "/qubes-issues/" --log-level debug 2&> /tmp/html.output + +echo "================================== as a last resort in case of errors process html proofer errors =================================" +python3 _utils/_translation_utils/postprocess_htmlproofer.py $1 /tmp/html.output $2 + +echo "================================= build the site and run htmlproofer ====================================" +rm -rf ./_site/ +bundle exec jekyll b +htmlproofer ./_site --disable-external --checks-to-ignore ImageCheck --file-ignore ./_site/video-tours/index.html,./_site/$1/video-tours/index.html --url-ignore "/qubes-issues/" --log-level debug || all_ok=false + +if $all_ok; then + echo 'All checks passed!' +else + echo 'Some checked failed. See above.' + exit 1 +fi + + diff --git a/_utils/_translation_utils/postprocess_htmlproofer.py b/_utils/_translation_utils/postprocess_htmlproofer.py new file mode 100644 index 00000000..b593b0bf --- /dev/null +++ b/_utils/_translation_utils/postprocess_htmlproofer.py @@ -0,0 +1,313 @@ +#!/usr/bin/python3 +''' +python _utils/_translation_utils/postprocess_htmlproofer.py +invoke: python _utils/_translation_utils/postprocess_htmlproofer.py de /tmp/html.output _qubes-translated/de/ +[de]: translation language +[/tmp/html.output]: output from htmlproofer +[_qubes-translated/de/]: the directory with the downloaded translated files from transifex +''' +from frontmatter import Post, load, dump +import yaml +from io import open as iopen +from re import search +from sys import exit +import sys +from os import linesep, walk +from argparse import ArgumentParser +from os.path import isfile, isdir +from json import loads, dumps +from logging import basicConfig, getLogger, DEBUG, Formatter, FileHandler + + +SLASH = '/' +# markdown frontmatter keys +PERMALINK_KEY = 'permalink' +REDIRECT_KEY = 'redirect_from' +TRANSLATED_LANGS = ['de'] +URL_KEY = 'url' + + +basicConfig(level=DEBUG) +logger = getLogger(__name__) +LOG_FILE_NAME='/tmp/postprocess_htmlproofer.log' + +def configure_logging(logname): + handler = FileHandler(logname) + handler.setLevel(DEBUG) + formatter = Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + handler.setFormatter(formatter) + logger.addHandler(handler) + +def log_debug(name, data): + logger.debug('############################################') + logger.debug('############################################') + logger.debug('###\t'+ name.capitalize() + '\t###') + logger.debug('--------------------------------------------') + if isinstance(data,dict): + logger.debug(dumps(data, indent=4)) + else: + logger.debug(data) + logger.debug('############################################') + logger.debug('############################################') + +def get_new_line(line, internal_link, internal_links, permalink): + if internal_link in internal_links and internal_link.startswith("/"): + # TODO redundant + indd = internal_link.find('#') + internal_link_to_replace = internal_link[0:indd] + to_replace = line.replace(internal_link, internal_link_to_replace) + return to_replace + elif internal_link in internal_links and internal_link.startswith("#"): + to_replace = line.replace(internal_link, permalink) + return to_replace + else: + return line + #return None + + +def process_markdown(translated_file, internal_links): + """ + for every translated file discard the erroneous internal links + translated_file: marked and uploaded to transifex for translation, if not downloaded it will be printed out as a debug + internal_links:all internal links belonging to the translated_file that are erroneous according to htmlproofer + """ + mdt = Post + try: + with iopen(translated_file) as t: + mdt = load(t) + lines = [] + headings = [] + permalink = mdt.get(PERMALINK_KEY) + if permalink == None: + permalink = '/' + for line in mdt.content.splitlines(): + # gather information + inst = {} + if line.startswith("[") and "]:" in line: + s = line.find(":") + internal_link = line[s+1:len(line)].strip() + if internal_link in internal_links and internal_link.startswith("/"): + ind = line.rfind('#') + to_replace = line[0:ind] + lines.append(to_replace) + continue + if internal_link in internal_links and internal_link.startswith("#"): + to_replace = line.replace(internal_link, permalink) + lines.append(to_replace) + continue + + if "[" and "](" in line and ")" in line: + count = line.count('](') + tmp = line + val = 0 + for i in range(0, count): + s = line.find("](", val) + e = line.find(")", s + 1) + internal_link = line[s+2:e].strip().replace(')','') + + line = get_new_line(line, internal_link, internal_links, permalink) + val = val + s + 1 + lines.append(line) + continue + lines.append(line) + + mdt.content = linesep.join(lines) + '\n' + + with iopen(translated_file, 'wb') as replaced: + dump(mdt, replaced) + + except FileNotFoundError as e: + logger.debug('Following file was not updated/downloaded from transifex: %s' % e.filename) + + + +def get_all_translated_permalinks_and_redirects_to_file_mapping(translated_dir, lang): + """ + traverse the already updated (via tx pull) root directory with all the translated files for a specific language + and get their permalinks and redirects without the specific language + translated_dir: root directory with all the translated files for a specific language + lang: the specific language + return: set holding the translated permalinks and redirects + """ + mapping = {} + perms = [] + yml_files = [] + for dirname, subdirlist, filelist in walk(translated_dir): + if dirname[0] == '.': + continue + for filename in filelist: + if filename[0] == '.': + continue + filepath = dirname + SLASH + filename + md = Post + with iopen(filepath) as fp: + md = load(fp) + if md.get(PERMALINK_KEY) != None: + perms.append(md.get(PERMALINK_KEY)) + elif filepath.endswith('.yml'): + yml_files.append(filepath) + else: + logger.error('no permalink in frontmatter for file %s' % filename) + redirects = md.get(REDIRECT_KEY) + if redirects != None: + if isinstance(redirects,list): + for r in redirects: + perms.append(r) + elif isinstance(redirects,str): + perms.append(redirects) + else: + logger.error('ERRROR: unexpected in redirect_from: %s' % redirects) + exit(1) + else: + logger.debug('no redirect_from in frontmatter for file %s' % filepath) + mapping[filepath] = perms + perms = [] + return mapping, yml_files + + +# TODO simplify +def get_error_output_from_htmlproofer(htmlproofer_output): + errors_tmp = [] + with iopen(htmlproofer_output,'r') as h: + lines = h.readlines() + errors_tmp = [x for x in lines if not(x.startswith('Checking') or x.startswith('Ran') or x.startswith('Running') or x.startswith('\n') or x.startswith('htmlproofer'))] + + count = 0 + errors = {} + internal_link = [] + u = '' + pattern = 'a href=' + for i in range(len(errors_tmp)): + if pattern in errors_tmp[i]: + i1 = errors_tmp[i].find(pattern, 0) + i2 = errors_tmp[i].find('"', i1 + len(pattern)) + i3 = errors_tmp[i].find('"', i2 +1 ) + + i_l = errors_tmp[i][i2+1:i3] + if '">' in i_l: + i_l = search('(.*)">', i_l).group(1) + internal_link.append(i_l) + count += 1 + if './_site' in errors_tmp[i]: + if count > 0: + errors[u] = internal_link + internal_link = [] + u = search('./_site(.*)index.html',errors_tmp[i]).group(1) + count = 0 + errors[u] = internal_link + return errors + +def replace_url(to_replace, errorlinks): + """ + recursively remove header from the URL in an yaml file. + to_replace: the translated yaml content as a dictionary + errorlinks: all internal links that are deadend and need to be cut off before # meaning get rid of the headers + """ + if not isinstance(to_replace,dict): + return + for (k_r, v_r) in to_replace.items(): + if isinstance(v_r, list): + for i in v_r: + replace_url(i, errorlinks) + elif URL_KEY == k_r: + val = to_replace[k_r] + if val is not None and '#' in val: + tmp_val = val[0:val.find('#')] + to_replace[URL_KEY]= tmp_val if (val in errorlinks) else val + +def process_yml(translated, errorlinks): + """ + for every given source-translated yml file pair add the language to the urls if they belong to already translated files, + if not retain the original ones + translated: translated yml file + errorlinks: all internal links that are deadend and need to be cut off before # meaning get rid of the headers + """ + docs = [] + try: + with iopen(translated) as tp: + docs = yaml.safe_load(tp) + if docs == None: + logger.error("Empty translated file %s" %translated) + exit(1) + for a in docs: + replace_url(a, errorlinks) + except FileNotFoundError as e: + logger.debug('Following file was NOT updated/downloaded from transifex: %s' % e.filename) + + try: + if len(docs)>0: + with iopen(translated, 'w') as replace: + yaml.dump(docs, replace, sort_keys=False) + except FileNotFoundError as e: + logger.debug('do nothing for file: %s. it is OK.' % e.filename) + +if __name__ == '__main__': + # python _utils/_translation_utils/postprocess_htmlproofer.py de /tmp/html.output _qubes-translated/de/ + parser = ArgumentParser() + # for which language should we do this + parser.add_argument("language") + # the file containing the output of htmlproofer + parser.add_argument("htmlproofer_output") + # the directory containing the translated (downloaded via tx pull) files + parser.add_argument("translated_dir") + args = parser.parse_args() + + configure_logging(LOG_FILE_NAME) + + + if not isdir(args.translated_dir): + print("please check your translated directory") + logger.error("please check your translated directory") + exit(1) + + if not args.language in TRANSLATED_LANGS: + print("language not in the expected translation languages") + logger.error("please check your translation language") + exit(1) + + if not isfile(args.htmlproofer_output): + print("please check your html proofer output file") + logger.error("please check your html proofer output file") + sys.exit(1) + + errors = get_error_output_from_htmlproofer(args.htmlproofer_output) + + if not errors: + print("nothing to do, no errors to postprocess") + sys.exit(1) + + logger.debug("------------------------------------------------") + logger.debug("------------------------------------------------") + logger.debug("------------------------------------------------") + logger.debug("-------------STRINGS TAGGED NOTRANSLATE---------") + logger.debug("------------------------------------------------") + logger.debug("------------------------------------------------") + + + error_links = list(sorted({el for val in errors.values() for el in val})) + log_debug("HTML ERRORS", errors) + log_debug("HTML ERRORS", error_links) + logger.debug("------------------------------------------------") + logger.debug("------------------------------------------------") + logger.debug("------------------------------------------------") + + mapping, yml_files = get_all_translated_permalinks_and_redirects_to_file_mapping(args.translated_dir, args.language) + + + log_debug('mapping ', mapping) + log_debug('yml files ', yml_files) + + file_to_internal_links = {} + for key, item in mapping.items(): + for k, i in errors.items(): + if k in item: + file_to_internal_links[key] = i + + log_debug(" file to internal links mapping", file_to_internal_links) + for key, item in file_to_internal_links.items(): + process_markdown(key, item) + + # traverse all yml data files and cut the translated urls if they are in error_urls + for yml in yml_files: + process_yml(yml, error_links) + diff --git a/_utils/_translation_utils/postprocess_translation.py b/_utils/_translation_utils/postprocess_translation.py new file mode 100644 index 00000000..033329ca --- /dev/null +++ b/_utils/_translation_utils/postprocess_translation.py @@ -0,0 +1,430 @@ +#!/usr/bin/python3 +# adds language pattern in permalink line and all found relative links in the current open file recursively from a given root dir +# evoke like: python _utils/postprocess_translation.py de _qubes-translated/de/ _utils/tx-mapping _utils/translated_hrefs_urls.txt --yml +#param1 is the language in short form +#param2 is the root translated dir +#param3 is current transifex mapping between original and translated files in the format: +# file_filter= +# source_file= +#param3 is the output of the script prepare_tx_config.sh +#param4 is the name for the file containing all the permalinks of translated/downloaded via tx client files. it is afterwards used by postprocess_translation.sh script +#param5 is optional indicating .yml files to be processed as in _data directory with no frontmatter whatsoever + +from yaml import safe_load +from yaml import dump as ydump +import frontmatter +from io import open as iopen +from os.path import isfile, isdir +from os import linesep, walk +from re import findall +from sys import exit +from argparse import ArgumentParser +from json import loads, dumps +from collections import deque +from logging import basicConfig, getLogger, DEBUG, Formatter, FileHandler + +patterns = ( + "](/", + "]: /", + "href=\"/", + "url: /", + "href=\'/", +) +# TODO vereinfachen der if bedingung mit einer liste von ommitted urls patterns +news = "/news/" +qubes_issues = "/qubes-issues/" +# constants and such +# yml keys: +YML_KEYS = ['url', 'topic', 'title', 'category', 'folder', 'htmlsection', 'tweet', 'avatar', 'img', + 'article', 'quote', 'name', 'occupation', 'author', 'more', 'text', + 'video', 'intro', 'version', 'subtitle', 'download', 'security', 'bug', 'help', + 'join', 'partner', 'cert', 'picture', 'email', 'website', 'mail', 'links', 'id', + 'paragraph', 'snippet', 'column', 'hover', 'digest', 'signature', 'pgp', 'green', 'red', 'blue', 'trump', + 'tts1', 'tts2', 'txp', 'txaq', 'pxaq', 'column1', 'column2', 'column3', 'yes_short', 'no_short', 'no_extended', 'tba', + 'bold', 'item', 'note', 'section', 'row', 'r_version', + 'go', 'search', 'metatopic', 'ddg', 'hover'] +URL_KEY = 'url' +# md frontmatterkeys: +PERMALINK_KEY = 'permalink' +REDIRECT_KEY = 'redirect_from' +LANG_KEY = 'lang' +TRANSLATED_KEY = 'translated' +LAYOUT_KEY = 'layout' +SLASH = '/' +MD_URL_SPLIT_PATTERNS = ['/)','/#'] +TRANSLATED_LANGS = ['de'] +#EXCLUDE_FILES = ['download.md' ] + + +basicConfig(level=DEBUG) +logger = getLogger(__name__) +LOG_FILENAME='/tmp/postprocess_translation.log' + +def configure_logging(logname): + handler = FileHandler(logname) + handler.setLevel(DEBUG) + formatter = Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + handler.setFormatter(formatter) + logger.addHandler(handler) + +def log_debug(name, data): + logger.debug('############################################') + logger.debug('############################################') + logger.debug('###\t'+ name.capitalize() + '\t###') + logger.debug('--------------------------------------------') + if isinstance(data,dict): + logger.debug(dumps(data, indent=4)) + else: + logger.debug(data) + logger.debug('############################################') + logger.debug('############################################') + + +def write_to_file(filename, lines): + """ + write the given data structure to a file + filename: the name of the file to be written to + lines: the content + """ + with iopen(filename,'w') as c: + c.write('\n'.join(str(line) for line in lines)) + c.truncate() + +def process_markdown(source_file, translated_file, permalinks, lang): + """ + for every uploaded via tx client markdown file for translation, replace the markdown frontmatter with the frontmatter of the original file, + set the specific language, set translated to yes and for all downloaded/updated via transifex files, respectively permalinks, + add the specific language to the internal url + source_file: original file + translated_file: marked and uploaded to transifex for translation, if not downloaded it will be printed out as a debug + permalinks:all internal links (permalink and redirect_from) belonging to the files dwonloaded from transifex + lang: the translation language + """ + mdt = frontmatter.Post + try: + with iopen(source_file) as s, iopen(translated_file) as t: + mds = frontmatter.load(s) + mdt = frontmatter.load(t) + if mds.get(PERMALINK_KEY) != None: + mdt[PERMALINK_KEY] = SLASH + lang + mds.get(PERMALINK_KEY) + + if mds.get(REDIRECT_KEY) != None: + redirects = mds.get(REDIRECT_KEY) + if isinstance(redirects, str): + redirects = [redirects] + # just in case + if any('..' in elem for elem in redirects): + logger.error('\'..\' found in redirect_from in file %s' % source_file) + exit(1) + mdt[REDIRECT_KEY] = [(SLASH + lang + elem.replace('/en/', SLASH) if not elem.startswith(SLASH + lang + SLASH) else elem) + for elem in redirects] + + if mds.get(PERMALINK_KEY) != None and mds[PERMALINK_KEY] in mdt[REDIRECT_KEY]: + mdt[REDIRECT_KEY].remove(mds[PERMALINK_KEY]) + if mdt.get(PERMALINK_KEY) != None and mdt[PERMALINK_KEY] in mdt[REDIRECT_KEY]: + mdt[REDIRECT_KEY].remove(mdt[PERMALINK_KEY]) + + tmp = list(set(mdt[REDIRECT_KEY])) + mdt[REDIRECT_KEY] = tmp + + if mds.get(LAYOUT_KEY) != None: + mdt[LAYOUT_KEY] = mds[LAYOUT_KEY] + + mdt[LANG_KEY] = lang + # TODO we do not need the translated key anymore + #mdt[TRANSLATED_KEY] = 'yes' + ## for testing purposes only + #if mdt.get('title') != None: + # mdt['title'] = lang.upper() +"!: " + mdt.get('title') + + # replace links + lines = [] + for line in mdt.content.splitlines(): + for pattern in patterns: + if pattern in line: + tmp = line.split(pattern) + line = tmp[0] + for part in range(1, len(tmp)): + if '../' in tmp[part]: + logger.error('\'..\' found in internal url: %s' % tmp[part]) + exit(1) + + # TODO we can translate news you know + if not tmp[part].startswith(lang + SLASH) and \ + not tmp[part].startswith('news') and \ + not tmp[part].startswith('attachment') and \ + not tmp[part].startswith('qubes-issues') and \ + split_and_check(tmp[part],permalinks): + line += pattern + lang + SLASH + tmp[part] + # TODO this is the case with links at the bottom of the file + elif not tmp[part].startswith(SLASH) and \ + SLASH + tmp[part] in permalinks: + line += pattern + lang + SLASH + tmp[part] + # TODO if a url contains a language but the url belongs to a file that is not translated should i actually remove the language -> overengineering? +# elif tmp[part].startswith(lang+SLASH) and not split_and_check(tmp[part][len(lang)+1],permalinks): + # line += pattern + tmp[part][len(lang)+1] + else: + line += pattern + tmp[part] + lines.append(line) + + mdt.content = linesep.join(lines) + '\n' + + with iopen(translated_file, 'wb') as replaced: + frontmatter.dump(mdt, replaced) + + except FileNotFoundError as e: + logger.debug('Following file was not updated/downloaded from transifex: %s' % e.filename) + + + +def split_and_check(md_line, permalinks): + """ + for every given line in a markdown line containing an internal link + return if the internal link belongs to a file already downloaded and translated from transifex + md_line: line in a markdown line containing an internal link + permalinks: all internal links (permalink and redirect_from) belonging to the files dwonloaded from transifex + """ + for pattern in MD_URL_SPLIT_PATTERNS: + if pattern in md_line: + sp = md_line.split(pattern) + t = sp[0] + t = SLASH + t if not t.startswith(SLASH) else t + t = t + SLASH if not t.endswith(SLASH) else t + if t in permalinks: + return True + else: + logger.debug("Following link: %s belongs to a file NOT translated/downloaded from transifex" %t) + return False + +def check_yml_attributes(to_replace, original): + """ + recursively check if the title, folder and category attributes of the translated yaml file + are not empty strings + if they are: replace them with the original content + it assumes that the order between original and translated files loaded as dictionary is preserved + to_replace: the translated yaml content as a dictionary + original: the original yaml content as a dictionary + """ + + if not (isinstance(to_replace,dict) and isinstance(original,dict)): + return + for (k_r, v_r), (k_o, v_o) in zip(to_replace.items(), original.items()): + if isinstance(v_r, list) and isinstance(v_o, list): + for i, j in zip(v_r, v_o): + check_yml_attributes(i, j) + for yml_key in YML_KEYS: + if yml_key == k_r and yml_key == k_o and to_replace[yml_key] == '': + to_replace[yml_key] = original[yml_key] + elif k_r != k_o: + logger.error("ERROR, ordered of the loaded yml file is not preserved %s" % k_r +':' + k_o) + exit(1) + + +def replace_url(to_replace, original, lang, permalinks): + """ + recursively add language to the original value of the key URL if the file with the given URL is translated and save it to the translated yaml file. + if the file is not translated keep the original url + it assumes that the order between original and translated files loaded as dictionary is preserved + to_replace: the translated yaml content as a dictionary + original: the oritignal yaml content as a dictionary + lang: language, for example de + permalinks: urls of the translated/downloaded files from transifex + """ + if not (isinstance(to_replace,dict) and isinstance(original,dict)): + return + for (k_r, v_r), (k_o, v_o) in zip(to_replace.items(), original.items()): + if isinstance(v_r, list) and isinstance(v_o, list): + for i, j in zip(v_r, v_o): + replace_url(i, j, lang, permalinks) + elif URL_KEY == k_r and URL_KEY == k_o: + val = original[k_r] + if val is not None and '#' in val: + tmp_val = val[0:val.find('#')] + to_replace[URL_KEY]= SLASH + lang + val if (tmp_val in permalinks) else val + else: + to_replace[URL_KEY]= SLASH + lang + val if (val in permalinks) else val + elif k_r != k_o: + logger.error("ERROR, ordered of the loaded yml file is not preserved %s" % k_r +':' + k_o) + exit(1) + + + +def process_yml(source, translated, lang, permalinks): + """ + for every given source-translated yml file pair add the language to the urls if they belong to already translated files, + if not retain the original ones + source: original yml file + translated: translated yml file + lang: language, for example de + permalinks: all internal links (permalink and redirect_from) belonging to the files downloaded from transifex + """ + docs = [] + try: + with iopen(source) as fp, iopen(translated) as tp: + docs_original = safe_load(fp) + docs = safe_load(tp) + if docs == None: + logger.error("Empty translated file %s" %translated) + exit(1) + for a, b in zip(docs, docs_original): + replace_url(a, b, lang, permalinks) + check_yml_attributes(a, b) + except FileNotFoundError as e: + logger.debug('Following file was NOT updated/downloaded from transifex: %s' % e.filename) + + try: + if len(docs)>0: + with iopen(translated, 'w') as replace: + ydump(docs, replace, sort_keys=False) + except FileNotFoundError as e: + logger.debug('do nothing for file: %s. it is OK.' % e.filename) + + +def get_all_the_hrefs(translated_dir): + """ + traverse the already updated (via tx pull) root directory with all the translated files for a specific language + and get all the internal urls that are embedded in hmtl code in an href attribute + translated_dir: root directory with all the translated files for a specific language + return: set holding all the internal urls that are embedded in hmtl code in an href attribute + """ + + href = set() + reg ='(?<=href=\").*?(?=\")' + for dirname, subdirlist, filelist in walk(translated_dir): + if dirname[0] == '.': + continue + for filename in filelist: + if filename[0] == '.': + continue + filepath = dirname + SLASH + filename + try: + with iopen(filepath) as fp: + lines = fp.readlines() + for line in lines: + t = findall(reg, line) + if len(t)>0: + for i in t: + href.add(i) + except FileNotFoundError as e: + logger.error('problem opening a file in the translated dir: %s' %e.filename) + exit(1) + return href + +def get_all_translated_permalinks_and_redirects(translated_dir,lang): + """ + traverse the already updated (via tx pull) root directory with all the translated files for a specific language + and get their permalinks and redirects without the specific language + translated_dir: root directory with all the translated files for a specific language + lang: the specific language + return: set holding the original (language code is removed) permalinks and redirects + """ + + perms = set() + for dirname, subdirlist, filelist in walk(translated_dir): + if dirname[0] == '.': + continue + for filename in filelist: + if filename[0] == '.': + continue + filepath = dirname + SLASH + filename + md = frontmatter.Post + with iopen(filepath) as fp: + md = frontmatter.load(fp) + if md.get(PERMALINK_KEY) != None: + perms.add(md.get(PERMALINK_KEY)[len(lang)+1:] if md.get(PERMALINK_KEY).startswith(SLASH+lang +SLASH) else md.get(PERMALINK_KEY)) + else: + logger.error('no permalink in frontmatter for file %s' % filename) + redirects = md.get(REDIRECT_KEY) + if redirects != None: + if isinstance(redirects,list): + for r in redirects: + perms.add(r[len(lang)+1:] if r.startswith(SLASH + lang + SLASH) else r) + elif isinstance(redirects,str): + perms.add(redirects) + else: + logger.error('ERRROR: unexpected in redirect_from: %s' % redirects) + exit(1) + else: + logger.debug('no redirect_from in frontmatter for file %s' % filepath) + return perms + +def create_dict_from_tx_config(mappingfile, lang): + """ + read a tx.xonfig file containing only file_filter and source_file information store it in a dict and give it back + mappingfile: a tx.xonfig file containing only file_filter and source_file information + return: a dict containing a mapping between an original file and its downloaded tx translation + """ + mapping = {} + with iopen(mappingfile) as fp: + lines = fp.readlines() + translated = ['./'+x.split('file_filter =')[1].strip().replace('',lang) for x in lines if lines.index(x)%2==0] + source = ['./'+x.split('source_file =')[1].strip() for x in lines if lines.index(x)%2==1] + + for x in translated: + mapping.update({source[translated.index(x)]:x}) + return mapping + + +def main(translated_dir, lang, yml, mapping, href_filename): + perms = get_all_translated_permalinks_and_redirects(translated_dir, lang) + log_debug('all translated permalinks/redirects', perms) + + hrefs = get_all_the_hrefs(args.translateddir) + + log_debug('all the hrefs', hrefs) + write_to_file(href_filename, perms.intersection(hrefs)) + + # for each pair of source and translated file postprocess the translated file + for key, item in mapping.items(): + if yml and item.endswith('.yml'): + process_yml(key, item, lang, perms) + #if not item.endswith('.yml') and not item.endswith('downloads.md'): + if not item.endswith('.yml'): + process_markdown(key, item, perms, lang) + + + +if __name__ == '__main__': + + # python _utils/postprocess_translation.py de _qubes-translated/de/ _utils/tx-mapping _utils/translated_hrefs_urls.txt --yml + parser = ArgumentParser() + # for which language should we do this + parser.add_argument("language") + # the directory containing the translated (downloaded via tx pull) files + parser.add_argument("translateddir") + # provide the mappingfile from tx configuration containing the file_filter to source_file mapping + parser.add_argument("tx_mappingfile") + # name of the file to contain/write to all the internal urls that are embedded in hmtl code in a href attribute + # for later processing postprocess_translation.sh + parser.add_argument("translated_hrefs_filename") + # whether or not to process yml files + parser.add_argument("--yml", action='store_true') + args = parser.parse_args() + + + if not isfile(args.tx_mappingfile): + print("please check your transifex mapping file") + exit(1) + + if not isdir(args.translateddir): + print("please check your translated directory") + exit(1) + + if not args.language in TRANSLATED_LANGS: + print("language not in the expected translation languages") + exit(1) + + configure_logging(LOG_FILENAME) + + + log_debug('START', {}) + + source_translation_mapping = create_dict_from_tx_config(args.tx_mappingfile, args.language) + + + log_debug('source/translation file mapping', source_translation_mapping) + + main(args.translateddir, args.language, args.yml, source_translation_mapping, args.translated_hrefs_filename) + + + diff --git a/_utils/_translation_utils/postprocess_translation.sh b/_utils/_translation_utils/postprocess_translation.sh new file mode 100644 index 00000000..03a74dfb --- /dev/null +++ b/_utils/_translation_utils/postprocess_translation.sh @@ -0,0 +1,33 @@ +#!/bin/bash +# $1 is lang +# $2 is directory where translated files reside and language needs to be added to internal urls +# $3 is file with internal urls that belong to files already translated and downloaded from transifex to be replaced with lang/url +# where $3 is a file dumped by postprocess_translation.py +# this script exists because is easier to correctly process html code with sed. python messes it up. +# example of evoking the script: +# bash _utils/_translation_utils/test.sh de _qubes-translated/de/ _utils/translated_hrefs_urls.txt + + +pattern="href=\"\/" +pattern_reset="href=\"\/"$1"\/" +escaped_slash="\/" + +# find the patterns that contain href=/$lang pattern and reset +find $2 -name '*.md' -or -name '*.html' | xargs sed -i "s/$pattern_reset/$pattern/g" + +while read line; do + # check for traversing patterns in $3: check if every line begins with /word + if [ -z `grep -oP '^(/(\w+))*' <<< $line` ] + then + echo "the string does not begin as it should" + exit 0 + fi + #escape '/' with '\/' + l="${line//\//$escaped_slash}" + search_pattern="href=\""$l"\"" + replace_pattern="href=\"\/"$1$l"\"" + + # search and destroy + find $2 -name '*.md' -or -name '*.html' | xargs sed -i "s/$search_pattern/$replace_pattern/g" +done < $3 + diff --git a/_utils/_translation_utils/prepare_for_translation.py b/_utils/_translation_utils/prepare_for_translation.py new file mode 100644 index 00000000..32647a8f --- /dev/null +++ b/_utils/_translation_utils/prepare_for_translation.py @@ -0,0 +1,128 @@ +#!/usr/bin/python3 +''' +this script adds lang and ref attribute (starting from counter) to existing markdown files after permalink line recursively from a given root dir +invocation: python prepare_for_translation.py en _doc/ ref_counter_file +param1 is the language in short form +param2 is a directory or a single file +param3 is a file containing the value of the current reference counter with exactly onle line in the form of: +current counter: x +''' +from io import open as iopen +from os.path import isfile +import os +from sys import exit +from argparse import ArgumentParser +from frontmatter import Post, load, dump +from logging import basicConfig, getLogger, DEBUG, Formatter, FileHandler + + +PERMALINK_KEY = 'permalink' +REDIRECT_KEY = 'redirect_from' +LANG_KEY = 'lang' +REF_KEY = 'ref' +FILENAME_EXTENSIONS = ['.png', '.svg', '.ico', '.jpg', '.css', '.scss', '.js', '.yml', '.sh', '.py', '.sed', '.dia', '.pdf', '.gif', '.eot', '.woff', '.ttf', '.otf', '.woff2', '.sig', '.json'] + +def read_counter(counterfile): + if not isfile(counterfile): + print('check your files') + exit() + with iopen(counterfile,'r') as c: + counter_line = c.readline() + counter_a = counter_line.split('current counter: ') + return int(counter_a[1]) + +def write_counter_to_file(counter, counterfile): + if not isfile(counterfile): + print('check your files') + exit() + with iopen(counterfile,'w') as c: + counter_line ='current counter: ' + str(counter) + c.writelines(counter_line) + c.truncate() + +def check_file_name(file_name): + return file_name[0] == '.' or any([file_name.endswith(t) for t in FILENAME_EXTENSIONS]) == True + +def check_dir_name(dir_name): + return dir_name[0] == '.' or '/.' in dir_name + + +def main(root_dir, lang, counter): + # if this is only a file + if os.path.isfile(root_dir): + if not check_file_name(root_dir): + with iopen(root_dir) as fp: + md = load(fp) + if not md.metadata: + return counter + # remove permalink in redirects if it is a list + if md.get(PERMALINK_KEY) != None and md.get(REDIRECT_KEY) != None and md[PERMALINK_KEY] in md[REDIRECT_KEY]: + redirects = md.get(REDIRECT_KEY) + if not isinstance(redirects, str): + md[REDIRECT_KEY].remove(md[PERMALINK_KEY]) + if md.get(LANG_KEY) == None: + md[LANG_KEY] = "en" + if md.get(REF_KEY) == None: + md[REF_KEY] = counter + counter += 1 + with iopen(root_dir, 'wb') as replaced: + dump(md, replaced) + replaced.write(b'\n') + + return counter + + for dir_name, subdir_list, file_list in os.walk(root_dir): + print('current directory: %s' % dir_name) + print(os.path.basename(dir_name)) + + if check_dir_name(dir_name): + print('\t%s' % dir_name) + print('1continue') + continue + + for file_name in file_list: + print('\t%s' % file_name) + # lazy + if check_file_name(file_name): + print('continue') + continue + file_path = dir_name + "/" + file_name + with iopen(file_path) as fp: + md = load(fp) + if not md.metadata: + print('no metadata in %s' % file_path) + continue + # remove permalink in redirects if it is a list + if md.get(PERMALINK_KEY) != None and md.get(REDIRECT_KEY) != None and md[PERMALINK_KEY] in md[REDIRECT_KEY]: + redirects = md.get(REDIRECT_KEY) + if not isinstance(redirects, str): + md[REDIRECT_KEY].remove(md[PERMALINK_KEY]) + #if md.get(LANG_KEY) == None: + # md[LANG_KEY] = "en" + #if md.get(REF_KEY) == None: + # md[REF_KEY] = counter + # counter += 1 + + with iopen(file_path, 'wb') as replaced: + dump(md, replaced) + replaced.write(b'\n') + + return counter + + + +if __name__ == '__main__': + parser = ArgumentParser() + parser.add_argument("language") + parser.add_argument("directory") + parser.add_argument("refcounterfile") + args = parser.parse_args() + + counter_file = args.refcounterfile + counter = read_counter(counter_file) + + print('\n CURRENT REF COUNTER IS %s' % counter) + ref_counter = main(args.directory, args.language, counter) + + print('\n NEW CURRENT REF COUNTER IS %s' % ref_counter) + write_counter_to_file(ref_counter, counter_file) diff --git a/_utils/_translation_utils/prepare_tx_config_for_notranslate_tags.sh b/_utils/_translation_utils/prepare_tx_config_for_notranslate_tags.sh new file mode 100755 index 00000000..8ffbee43 --- /dev/null +++ b/_utils/_translation_utils/prepare_tx_config_for_notranslate_tags.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# $1 is tx/config file +# $2 filename to contain only the resources' names +# $3 filename to contain only the source files' names +sed '/^$/d' $1 | sed '/^s/d' | sed '/^t/d' | sed '/^h/d' | sed '/^f/d' | sed '/\[main]/d' | sed 's/\[//' | sed 's/\]//' | sed 's/.*\.//' > $2 +sed '/^$/d' $1 | sed '/source_lang/d' | sed '/^t/d' | sed '/^h/d' | sed '/\[main]/d' | sed '/\[/d' | sed '/^f/d' > $3 diff --git a/_utils/_translation_utils/prepare_tx_config_postprocess.sh b/_utils/_translation_utils/prepare_tx_config_postprocess.sh new file mode 100755 index 00000000..8f2cf10b --- /dev/null +++ b/_utils/_translation_utils/prepare_tx_config_postprocess.sh @@ -0,0 +1,7 @@ +#!/bin/bash +# $1 is .tx/config file +# $2 the new mapping file to be used by postprocess_translation.py +sed '/^$/d' $1 | sed '/source_lang/d' | sed '/^t/d' | sed '/^h/d' | sed '/\[main]/d' | sed '/\[/d' > $2 +#sed -i 's/aux\/news_strings.yml/news\/index.html/g' $2 +#sed -i 's/aux\/news_categories_strings.yml/news\/categories\/index.html/g' $2 + diff --git a/_utils/_translation_utils/requirements_notranslate.txt b/_utils/_translation_utils/requirements_notranslate.txt new file mode 100644 index 00000000..849dd0f4 --- /dev/null +++ b/_utils/_translation_utils/requirements_notranslate.txt @@ -0,0 +1,10 @@ +attrs==19.3.0 +certifi==2019.11.28 +importlib-metadata==1.5.0 +jsonschema==3.2.0 +pycurl==7.43.0.5 +pyrsistent==0.15.7 +python-frontmatter==0.5.0 +PyYAML==5.3 +six==1.14.0 +zipp==3.1.0 diff --git a/_utils/_translation_utils/tag_strings_as_locked.py b/_utils/_translation_utils/tag_strings_as_locked.py new file mode 100644 index 00000000..c23808da --- /dev/null +++ b/_utils/_translation_utils/tag_strings_as_locked.py @@ -0,0 +1,397 @@ +#!/usr/bin/python3 +''' +invoke: python tag_as_locked.py tx-resource-names.txt tx-sources-filenames.txt api-token --debug --manual +param1: tx-resources-names.txt: provide the file from tx configuration containing the resource names +param2: tx-sources-filenames.txt: provide the file from tx configuration containing only the original source filenames +param3: api-token: provide the developer api transifex token for auth +param4: debug: whether or not to write debug json files +param5: manual: whether or not to tag file by a file by waiting for a keyboard input +''' +from pycurl import Curl, HTTP_CODE, error, WRITEFUNCTION +from frontmatter import Post, load +from certifi import where +from io import BytesIO +from io import open as iopen +from os.path import isfile +from re import match +import sys +from sys import exit +from argparse import ArgumentParser +from json import loads, dumps +from jsonschema import validate +from jsonschema.exceptions import ValidationError +from collections import deque +from logging import basicConfig, getLogger, DEBUG, Formatter, FileHandler + +# TODO should we also mark notranslate as also reviewed ? it may need manual labor afterwards though ? +# Here should also go a comment that this snippet of code should be extended if the data files are to be altered and there are part that have to stay the same +# This can be done by fetching the strings from the tx api via curl: +# curl -i -L --user api:XXXXXXXXXXXXXXX -X GET https://www.transifex.com/api/2/project/qubes/resource/no_html_data_hcl/translation/en/strings/ +# and searching for the key pattern that should be marked as locked and thus untranslatabel and immutable + +KEY_REGEX_LOCK_PATTERNS = ['^\[(\d)*\](.sub-pages.)\[(\d)*\](.url)$', + '^\[(\d)*\](.sub-pages.)\[(\d)*\](.sub-pages.)\[(\d)*\](.url)$', + '^\[(\d)*\](.sub-pages.)\[(\d)*\](.icon)$', + '^(\[(\d)*\])(.url)$', '^(\[(\d)*\])(.icon)$','^(\[(\d)*\])(.category)$', + '^(\[(\d)*\])(.tech.)(\[(\d)*\])(.img)$', '^(\[(\d)*\])(.tech.)(\[(\d)*\])(.url)$', + '^(\[(\d)*\])(.award.)(\[(\d)*\])(.url)$', '^(\[(\d)*\])(.award.)(\[(\d)*\])(.img)$', + '^(\[(\d)*\])(.media.)(\[(\d)*\])(.img)$', '^(\[(\d)*\])(.media.)(\[(\d)*\])(.article)$', + '^(\[(\d)*\])(.attachment)$', '^(\[(\d)*\])(.expert.)(\[(\d)*\])(.tweet)$', + '^(\[(\d)*\])(.expert.)(\[(\d)*\])(.avatar)$', '^(\[(\d)*\])(.expert.)(\[(\d)*\])(.img)$', + '^(\[(\d)*\])(.htmlsection)$', '^(\[(\d)*\])(.folder)$','redirect_from.\[(\d)*\]', + '^(\[(\d)*\])(.links.)(\[(\d)*\])(.url)$', '^(\[(\d)*\])(.links.)(\[(\d)*\])(.id)$', + '^(\[(\d)*\])(.columns.)(\[(\d)*\])(.url)$', '^(\[(\d)*\])(.subsections.)(\[(\d)*\])(.url)$', + '^(\[(\d)*\])(.htmlsections.)(\[(\d)*\])(.url)$', '^(\[(\d)*\])(.partners.)(\[(\d)*\])(.url)$', + '^(\[(\d)*\])(.partners.)(\[(\d)*\])(.id)$', '^(\[(\d)*\])(.partners.)(\[(\d)*\])(.img)$', + '^(\[(\d)*\])(.partners.)(\[(\d)*\])(.paragraph.)(\[(\d)*\])(.snippets.)(\[(\d)*\])(.url)$', + '^(\[(\d)*\])(.paragraphs.)(\[(\d)*\])(.paragraph.)(\[(\d)*\])(.snippets.)(\[(\d)*\])(.url)$', + '^(\[(\d)*\])(.list.)(\[(\d)*\])(.item.)(\[(\d)*\])(.snippets.)(\[(\d)*\])(.url)$', + '^(\[(\d)*\])(.releases.)(\[(\d)*\])(.snippets.)(\[(\d)*\])(.url)$', + '^(\[(\d)*\])(.note.)(\[(\d)*\])(.snippets.)(\[(\d)*\])(.url)$', + '^(\[(\d)*\])(.htmlsections.)(\[(\d)*\])(.htmlsection)$', + '^(\[(\d)*\])(.subsections.)(\[(\d)*\])(.snippets.)(\[(\d)*\])(.url)$', + '^(\[(\d)*\])(.subsections.)(\[(\d)*\])(.section.)(\[(\d)*\])(.url)$', + '^(\[(\d)*\])(.paragraph.)(\[(\d)*\])(.snippets.)(\[(\d)*\])(.url)$', + '^categories.(\[(\d)*\])(.slug)$', '^papers.(\[(\d)*\])(.title)$','^papers.(\[(\d)*\])(.author)$', '^papers.(\[(\d)*\])(.url)$', '^papers.(\[(\d)*\])(.category)$', + '^(\[(\d)*\])(.name)$', '^(\[(\d)*\])(.type)$', '^(\[(\d)*\])(.picture)$', '^(\[(\d)*\])(.email)$', '^(\[(\d)*\])(.fingerprint)$', '^(\[(\d)*\])(.github)$', '^(\[(\d)*\])(.website)$', + '^(\[(\d)*\])(.section.)(\[(\d)*\])(.snippets.)(\[(\d)*\])(.url)$', '^(\[(\d)*\])(.section)$', + '^(\[(\d)*\])(.releases.)(\[(\d)*\])(.r_version)$', '^(\[(\d)*\])(.section.)(\[(\d)*\])(.note)$'] + +KEY_REGEX_PATTERNS = ['^\[\d\](.sub-pages.)\[\d\](.url)$', '^\[\d\](.sub-pages.)\[\d\](.sub-pages.)\[\d\](.url)$', '^\[\d\](.sub-pages.)\[\d\](.icon)$', '^(\[\d\])(.url)$', '^(\[\d\])(.icon)$','^(\[\d\])(.category)$', '^(\[\d\])(.tech.)(\[\d\])(.img)$', '^(\[\d\])(.tech.)(\[\d\])(.url)$', '^(\[\d\])(.award.)(\[\d\])(.url)$', '^(\[\d\])(.award.)(\[\d\])(.img)$', '^(\[\d\])(.media.)(\[\d\])(.img)$', '^(\[\d\])(.media.)(\[\d\])(.article)$', '^(\[\d\])(.attachment)$', '^(\[\d\])(.expert.)(\[\d\])(.tweet)$', '^(\[\d\])(.expert.)(\[\d\])(.avatar)$', '^(\[\d\])(.expert.)(\[\d\])(.img)$', '^(\[\d\])(.htmlsection)$', '^(\[\d\])(.folder)$','redirect_from.\[\d\]', '^(\[\d\])(.links.)(\[\d\])(.url)$', '^(\[\d\])(.links.)(\[\d\])(.id)$'] + +KEY_PATTERNS = ['lang', 'layout', 'permalink', 'redirect_from'] +SOURCE_PATTERNS = ['* * * * *', ''] +# TODO use re2 per default? +# examples for the first regex: +# ![edit-button-mobile](/attachment/wiki/doc-edit/02-button1.png) +# ![commit](/attachment/wiki/doc-gel/07-commit-msg.png) +# for the second one a liquid include html line + +SOURCE_REGEX_PATTERNS = [ + '^\!\[(\w{0,50}(-){0,50}(\.){0,2}\w{0,50}){0,10}\]\((\/(\w{0,50}(-){0,8}\w{0,50})){0,10}(\w{0,50}(-){0,50}\w{0,50}){0,10}.\w{0,10}\)', + '{%[^\S\n]{1,8}include[\s\w-]*\.html[^\S\n]{1,8}%}' + ] +START_END_PATTERNS = {'{%': '%}', '{{': '}}', '{{':'>', '', '[':']', '', '', '', '
', '', } + +# tx resources keys +STRING_HASH_KEY = 'string_hash' +KEY_KEY = 'key' +SOURCE_STRING_KEY = 'source_string' +# markdown frontmatter keys +PERMALINK_KEY = 'permalink' +REDIRECT_KEY = 'redirect_from' +# comment and tag for tx source strings +UPDATE_TAGS = '{"comment": "Added notranslate tag via curl", "tags": ["notranslate"]}' + +UPDATE_TAGS_LOCKED = '{"comment": "Added locked tags via curl", "tags": ["locked"]}' +DATA_TO_URL_MAPPING = \ +{ + 'hcl': 'Strings are taken from this page: https://www.qubes-os.org/hcl/', + 'download': 'Strings are taken from this page: https://www.qubes-os.org/downloads/', + 'experts': 'Strings are taken from this page: https://www.qubes-os.org/experts/', + 'home': 'Strings are taken from this page: https://www.qubes-os.org/', + 'index': 'Strings are taken from this page: https://www.qubes-os.org/doc/', + 'intro': 'Strings are taken from this page: https://www.qubes-os.org/intro/', + 'partners': 'Strings are taken from this page: https://www.qubes-os.org/partners/', + 'style_guide_content': 'Strings are taken from this page: https://www.qubes-os.org/doc/style-guide/', + 'teamtexts': 'Strings are taken from this page: https://www.qubes-os.org/team/', + 'videos': 'Strings are taken from this page: https://www.qubes-os.org/video-tours/', + } + +# transifex schema +TX_JSON_SCHEMA = { + "type":"array", + "items": + [ + { + "type":"object", + "required": ["comment", "context", "key","string_hash","reviewed","pluralized","source_string","translation"], + "properties": + { + "comment": {"type" : "string"}, + "context": {"type" : "string"}, + "key": {"type" : "string"}, + "string_hash": {"type" : "string"}, + "reviewed": {"type" : "boolean"}, + "pluralized": {"type" : "boolean"}, + "source_string": {"type" : "string"}, + "translation": {"type" : "string"} + } + } + ] + } + +tagged_locked = dict() + +basicConfig(level=DEBUG) +logger = getLogger(__name__) +LOG_FILE_NAME='/tmp/tag_strings_as_locked.log' + +def configure_logging(logname): + handler = FileHandler(logname) + handler.setLevel(DEBUG) + formatter = Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + handler.setFormatter(formatter) + logger.addHandler(handler) + +def check_reg(string, reg): + ''' + check if the given string complies the given regular expression + string: string to check + reg: regular expression + return true if it is the case + ''' + + g = match(reg, string) + return g != None + +def manual_break(): + prompt = input("press c + hit enter to continue: ") + if prompt == 'c': + return + else: + manual_break() + +def check_for_liquid_expression(item): + return any (item.startswith(start) and item.endswith(end) for start, end in START_END_PATTERNS.items()) + +def get_all_original_permalinks_and_redirects(sourcenamesfiles): + ''' + get the permalinks and redirects from all the original to be translated files + sourcenamesfiles: a file containing all the source original file names from the tx config + return: a set containing the original (language code is removed) permalinks and redirects + ''' + sources = [] + with iopen(sourcenamesfiles) as fp: + lines = fp.readlines() + sources = ['./'+x.split('source_file =')[1].strip() for x in lines if lines.index(x)%2==1] + + perms_and_redirects = set() + for filepath in sources: + logger.debug('reading %s' % filepath) + md = Post + with iopen(filepath) as fp: + md = load(fp) + if md.get(PERMALINK_KEY) != None: + perms_and_redirects.add(md.get(PERMALINK_KEY)) + else: + logger.error('no permalink in frontmatter for file %s' % filepath) + redirects = md.get(REDIRECT_KEY) + if redirects != None: + if isinstance(redirects,list): + for r in redirects: + perms_and_redirects.add(r) + elif isinstance(redirects,str): + perms_and_redirects.add(redirects) + else: + logger.error('ERROR: unexpected in redirect_from: %s' % filepath) + exit(1) + else: + logger.debug('no redirect_from in frontmatter for file %s' % filepath) + return perms_and_redirects + + + +def create_hash_and_tags_mapping(tx_resources, tx_api_token, debug, perms_and_redirects, manual): + ''' + for every given file uploaded to transifex, get its resource strings, check if they need to be marked as locked and + save their hashes for further processing if this is the case + tx_resources: a list containing all the uploaded transifex files + tx_api_token: transifex API token + debug: if true be verbose + manual: if true the upload will be done file by file under the supervision of the developer + return a dictionary containing the string hash and resource name + ''' + + hash_and_tags_mapping=dict() + for res in tx_resources: + url = 'https://www.transifex.com/api/2/project/qubes/resource/' + res + '/translation/en/strings/' + logger.info('will fetch url %s via curl' % url) + if manual: + manual_break() + + buf = BytesIO() + c = Curl() + c.setopt(c.URL, url) + c.setopt(c.WRITEDATA, buf) + c.setopt(c.CAINFO, where()) + c.setopt(c.USERPWD, 'api:' + tx_api_token) + c.setopt(c.FOLLOWLOCATION, True) + try: + c.perform() + except error as pe: + logger.error("Pycurl: ", exc_info=True) + c.close() + exit(1) + if c.getinfo(HTTP_CODE) == 404: + logger.error("Following resource was not found in transifex: %s." % res) + logger.error("Response: %s", buf.getvalue()) + c.close() + continue + if c.getinfo(HTTP_CODE) != 200: + logger.error("Following resource could not be fetched: %s." % res) + logger.error("Response: %s", buf.getvalue()) + c.close() + continue + c.close() + + body = loads(buf.getvalue()) + try: + validate(body, TX_JSON_SCHEMA) + except ValidationError as e: + logger.error("SEVERE! invalid json input for res: %s, url: %s" %(res, url)) + logger.error("ValidationError: ", exc_info=True) + exit(1) + + if debug: + logger.debug("___________________________") + logger.debug("resource strings for file %s" % res) + + if isinstance(body, list): + + for item in body: + if isinstance(item, dict): + if any( item[KEY_KEY] == k for k in KEY_PATTERNS ) \ + or (item[SOURCE_STRING_KEY].startswith('![') and item[SOURCE_STRING_KEY].endswith('.png)')) \ + or any( check_reg(item[KEY_KEY], kr) for kr in KEY_REGEX_LOCK_PATTERNS ) \ + or any( item[SOURCE_STRING_KEY] == s for s in SOURCE_PATTERNS ) \ + or any( item[SOURCE_STRING_KEY] == s for s in perms_and_redirects ) \ + or any( check_reg(item[SOURCE_STRING_KEY], sr) for sr in SOURCE_REGEX_PATTERNS ) \ + or check_for_liquid_expression( item[SOURCE_STRING_KEY] ): + + hash_and_tags_mapping.update( {res + '.' + item[STRING_HASH_KEY] : res} ) + to_debug = {res + "." + item[STRING_HASH_KEY] : [item[KEY_KEY], item[SOURCE_STRING_KEY]]} + tagged_locked.update(to_debug) + if debug: + logger.debug("The following resource string will be tagged as 'locked' %s" % to_debug) + else: + logger.error("got some weird stuff from transifex: %s" % item) + exit(1) + else: + logger.error("got some weird stuff from transifex: %s" % body) + exit(1) + + return hash_and_tags_mapping + +def tag_strings_as_locked(hash_and_tag, tx_api_token, debug): + ''' + upload locked tags for given string hashes for given strings for given files + hash_and_tag: dicitonary containing {string_hash: filename} + tx_api_token: transifex API token + debug: if true be verbose + ''' + + for stringreshash, filename in hash_and_tag.items(): + res_hash = stringreshash.split('.') + stringhash = res_hash[1] + url = 'https://www.transifex.com/api/2/project/qubes/resource/' + filename + '/source/' + stringhash + '/' + logger.info('put tag %s via curl' % url) + buf = BytesIO() + c = Curl() + c.setopt(c.URL, url) + c.setopt(WRITEFUNCTION, buf.write) + c.setopt(c.CUSTOMREQUEST, 'PUT') + c.setopt(c.POST, 1) + c.setopt(c.USERPWD, 'api:' + tx_api_token) + c.setopt(c.FOLLOWLOCATION, True) + + #c.setopt(c.POSTFIELDS, UPDATE_TAGS) + if any(filename.endswith(i) for i in DATA_TO_URL_MAPPING) and '_data_' in filename: + spl = filename.split('_data_') + comment = DATA_TO_URL_MAPPING[spl[len(spl)-1]] + tags = '{"comment": "' + comment + '", "tags": ["locked"]}' + c.setopt(c.POSTFIELDS, tags) + c.setopt(c.HTTPHEADER, ['Content-Type: application/json', + 'Content-Length: ' + str(len(tags)) ]) + else: + c.setopt(c.POSTFIELDS, UPDATE_TAGS_LOCKED) + c.setopt(c.HTTPHEADER, ['Content-Type: application/json', + 'Content-Length: ' + str(len(UPDATE_TAGS_LOCKED)) ]) + #'Content-Length: ' + str(len(UPDATE_TAGS)) ]) + + try: + c.perform() + except error as pe: + logger.error("Pycurl: ", exc_info=True) + c.close() + exit(1) + if c.getinfo(HTTP_CODE) == 404: + logger.error("Following string hash %s for file %s could not be tagged as 'locked'." % (stringhash, filename)) + logger.error("Response: %s", buf.getvalue()) + c.close() + continue + if c.getinfo(HTTP_CODE) != 200: + logger.error("Following string hash %s for file %s could not be tagged as 'locked'" % (stringhash, filename)) + logger.error("Response: %s", buf.getvalue()) + c.close() + continue + c.close() + + if debug: + logger.debug('---------------------') + logger.debug(buf.getvalue()) + +if __name__ == '__main__': + # python _utils/tag_as_locked.py _utils/tx-resource-names _utils/tx-sourcesnames api-token --debug --manual + parser = ArgumentParser() + # provide the file from tx configuration containing the resource names + parser.add_argument("tx_resourcenamesfile") + # provide the file from tx configuration containing only the original source filenames + parser.add_argument("tx_sourcesnamesfile") + # provide the developer api transifex token for auth + parser.add_argument("tx_api_token") + # whether or not to write debug json files + parser.add_argument("--debug", action='store_true') + # whether or not to tag file by a file + parser.add_argument("--manual", action='store_true') + + + args = parser.parse_args() + configure_logging(LOG_FILE_NAME) + + manual = args.manual + + if not isfile(args.tx_resourcenamesfile): + print("please check your transifex resource names file") + logger.error("please check your transifex resource names file") + sys.exit(1) + + if not isfile(args.tx_sourcesnamesfile): + print("please check your transifex original sourcenames file") + logger.error("please check your transifex original sourcenames file") + sys.exit(1) + + tx_resources = [] + with iopen(args.tx_resourcenamesfile) as f: + tx_resources = f.readlines() + tx_resources = [ t.rstrip() for t in tx_resources] + + + # testing on + if manual: + manual_break() + + perms_and_redirects = get_all_original_permalinks_and_redirects(args.tx_sourcesnamesfile) + hash_and_tags_mapping = create_hash_and_tags_mapping(tx_resources, args.tx_api_token, args.debug, perms_and_redirects, manual) + + if args.debug: + logger.debug("------------------------------------------------") + logger.debug("----------HASH 2 TAG NOTRANSLATE MAPPING--------") + logger.debug("------------------------------------------------") + logger.debug(dumps(hash_and_tags_mapping, indent=4)) + logger.debug("------------------------------------------------") + logger.debug("------------------------------------------------") + logger.debug("------------------------------------------------") + logger.debug("-------------STRINGS TAGGED NOTRANSLATE---------") + logger.debug("------------------------------------------------") + logger.debug("------------------------------------------------") + logger.debug(dumps(tagged_locked, indent=4)) + logger.debug("------------------------------------------------") + logger.debug("------------------------------------------------") + logger.debug("------------------------------------------------") + + tag_strings_as_locked(hash_and_tags_mapping, args.tx_api_token, args.debug) + + + + diff --git a/_utils/_translation_utils/tx_config.sh b/_utils/_translation_utils/tx_config.sh new file mode 100644 index 00000000..10bb0aba --- /dev/null +++ b/_utils/_translation_utils/tx_config.sh @@ -0,0 +1,49 @@ +#!/bin/bash +# to be run from the git root +#to incoporate them back, no need + +# add ref & lang attributes to newly created files +python3 _utils/_translation_utils/prepare_for_translation.py en _doc/ _utils/_translation_utils/COUNTER.txt + +# because there is apparently a feature in tx config that doe snot update an existing configuration, every time everythin will be done from scratch: +# delete current tx configuration + +mv .tx/config /tmp/tx_config_old + +#init a tx configuration +tx init --skipsetup + + +# map the files with tx config +tx config mapping-bulk -p qubes --source-language en --type GITHUBMARKDOWN -f '.md' -d --source-file-dir _doc/en/_doc/ -i _dev --expression '_qubes-translated//_doc/{filepath}/{filename}{extension}' --execute +tx config mapping-bulk -p qubes --source-language en --type GITHUBMARKDOWN -f '.md' -d --source-file-dir pages --expression '_qubes-translated//pages/{filepath}/{filename}{extension}' --execute +tx config mapping-bulk -p qubes --source-language en --type GITHUBMARKDOWN -f '.md' -d --source-file-dir news --expression '_qubes-translated//news/{filepath}/{filename}{extension}' --execute + +#HTML +tx config mapping -r qubes._doc_en__doc_introduction_intro --source-lang en --type HTML --source-file _doc/en/_doc/introduction/intro.html '_qubes-translated//_doc/introduction/intro.html' --execute +tx config mapping -r qubes.pages_partners --source-lang en --type HTML --source-file pages/partners.html '_qubes-translated//pages/partners.html' --execute +tx config mapping -r qubes.pages_home --source-lang en --type HTML --source-file pages/home.html '_qubes-translated//pages/home.html' --execute + +tx config mapping -r qubes.data_architecture --source-lang en --type YAML_GENERIC --source-file _data/architecture.yml --expression '_qubes-translated//_data//architecture.yml' --execute +tx config mapping -r qubes.data_index --source-lang en --type YAML_GENERIC --source-file _data/index.yml --expression '_qubes-translated//_data//index.yml' --execute +tx config mapping -r qubes.data_includes --source-lang en --type YAML_GENERIC --source-file _data/includes.yml --expression '_qubes-translated//_data//includes.yml' --execute +tx config mapping -r qubes.data_teamtexts --source-lang en --type YAML_GENERIC --source-file _data/teamtexts.yml --expression '_qubes-translated//_data//teamtexts.yml' --execute +tx config mapping -r qubes.data_team --source-lang en --type YAML_GENERIC --source-file _data/team.yml --expression '_qubes-translated//_data//team.yml' --execute +tx config mapping -r qubes.data_experts --source-lang en --type YAML_GENERIC --source-file _data/experts.yml --expression '_qubes-translated//_data//experts.yml' --execute +tx config mapping -r qubes.data_download --source-lang en --type YAML_GENERIC --source-file _data/download.yml --expression '_qubes-translated//_data//download.yml' --execute +tx config mapping -r qubes.data_hcl --source-lang en --type YAML_GENERIC --source-file _data/hcl.yml --expression '_qubes-translated//_data//hcl.yml' --execute +tx config mapping -r qubes.data_research --source-lang en --type YAML_GENERIC --source-file _data/research.yml --expression '_qubes-translated//_data//research.yml' --execute +tx config mapping -r qubes.data_style_guide_content --source-lang en --type YAML_GENERIC --source-file _data/style_guide_content.yml --expression '_qubes-translated//_data//style_guide_content.yml' --execute + +crudini --del .tx/config qubes._doc_en__doc_README +crudini --del .tx/config qubes._doc_en__doc_CONTRIBUTING + +sed -i 's/_doc_en__doc/doc/g' .tx/config + + +echo "#####################################################################################" +echo "############# Please pay attention to the changes made to the current tx config #####" +echo "############# Do you have to delete some resources on transifex manually? ###########" +echo "############# left is the current, on the right is the old tx config ################" +echo "#####################################################################################" +diff .tx/config /tmp/tx_config_old --color diff --git a/_utils/_translation_utils/tx_pull.sh b/_utils/_translation_utils/tx_pull.sh new file mode 100644 index 00000000..e1d94deb --- /dev/null +++ b/_utils/_translation_utils/tx_pull.sh @@ -0,0 +1,10 @@ +#!/bin/bash +# first argument is the language for which the translated files should be downloaded +# the mode developer will download all unreviewed translated strings as well +tx pull -l $1 --mode reviewed -d --traceback -r qubes.*doc* +tx pull -l $1 --mode reviewed -d --traceback -r qubes.*pages* +tx pull -l $1 --mode reviewed -d --traceback -r qubes.*news* + +# the different mode here is needed for YAML files, since the developer mode does not download source strings if untranslated +# the mode reviewed here will not work and will return empty strings for nontranslated ones +tx pull -l $1 --mode sourceastranslation -d --traceback -r qubes.*data_* From 040981952acf3a603a636dc7c90a6894d3d72f74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Sun, 28 Mar 2021 22:43:11 +0200 Subject: [PATCH 02/19] Add qubes-translated submodule Add it as "_translated" for consistency with other submodules. This is a change from _qubes-translated name in the scripts, so update the scripts (and few html files) here too. Adjust tx_confg.sh script for few renames. --- .gitmodules | 3 ++ _config.yml | 2 +- _includes/doc-widget.html | 4 +- _includes/header.html | 2 +- _includes/team.html | 2 +- _translated | 1 + .../postprocess_htmlproofer.py | 6 +-- .../postprocess_translation.py | 4 +- .../postprocess_translation.sh | 2 +- _utils/_translation_utils/tx_config.sh | 45 +++++++++---------- 10 files changed, 37 insertions(+), 34 deletions(-) create mode 160000 _translated diff --git a/.gitmodules b/.gitmodules index 7b7703fa..aa2d7ac7 100644 --- a/.gitmodules +++ b/.gitmodules @@ -10,3 +10,6 @@ [submodule "_hcl"] path = _hcl url = https://github.com/QubesOS/qubes-hcl +[submodule "_translated"] + path = _translated + url = https://github.com/QubesOS/qubes-translated diff --git a/_config.yml b/_config.yml index 9a724600..c299c866 100644 --- a/_config.yml +++ b/_config.yml @@ -30,7 +30,7 @@ collections: output: true hcl: output: false - qubes-translated: + translated: output: true diff --git a/_includes/doc-widget.html b/_includes/doc-widget.html index da2253f0..3bef12ae 100644 --- a/_includes/doc-widget.html +++ b/_includes/doc-widget.html @@ -6,8 +6,8 @@ {% assign master_edit = site.project_repo_path | append: "/qubes-doc/edit/master" | append: page.relative_path | remove_first: '_doc' %} {% assign master_blob = site.project_repo_path | append: "/qubes-doc/blob/master" | append: page.relative_path | remove_first: '_doc' %} {% else %} - {% assign master_edit = site.project_repo_path | append: "/qubes-translated/edit/master" | append: page.relative_path | remove_first: '_qubes-translated' %} - {% assign master_blob = site.project_repo_path | append: "/qubes-translated/blob/master" | append: page.relative_path| remove_first: '_qubes-translated' %} + {% assign master_edit = site.project_repo_path | append: "/qubes-translated/edit/master" | append: page.relative_path | remove_first: '_translated' %} + {% assign master_blob = site.project_repo_path | append: "/qubes-translated/blob/master" | append: page.relative_path| remove_first: '_translated' %} {% endif %} {% for item in docs.links %} diff --git a/_includes/header.html b/_includes/header.html index 86f98f45..91b75af3 100644 --- a/_includes/header.html +++ b/_includes/header.html @@ -17,7 +17,7 @@ {% assign langmenu = false %} {% endif %} {% if langmenu %} - {% assign posts = site.pages | concat: site.doc | concat: site.qubes-translated | where:'ref', page.ref | sort: 'lang' %} + {% assign posts = site.pages | concat: site.doc | concat: site.translated | where:'ref', page.ref | sort: 'lang' %}
diff --git a/_includes/team.html b/_includes/team.html index 06c3fef8..c3fea148 100644 --- a/_includes/team.html +++ b/_includes/team.html @@ -6,7 +6,7 @@ {% assign emeritus = team-page | where_exp: "item", "item.htmlsection == 'emeritus'" | first %} {% assign community = team-page | where_exp: "item", "item.htmlsection == 'community'" | first %} {% assign team_link = lang | append: "/team/" %} -{% assign teams = site.pages | concat: site.qubes-translated | where:'permalink', team_link %} +{% assign teams = site.pages | concat: site.translated | where:'permalink', team_link %} {% if teams.size == 0 %} {% assign team_link = "/team/" %} {% endif %} diff --git a/_translated b/_translated new file mode 160000 index 00000000..d3b9b03c --- /dev/null +++ b/_translated @@ -0,0 +1 @@ +Subproject commit d3b9b03c1e480a9a53ffd46309c711a4dc0b6f08 diff --git a/_utils/_translation_utils/postprocess_htmlproofer.py b/_utils/_translation_utils/postprocess_htmlproofer.py index b593b0bf..bfb9cb56 100644 --- a/_utils/_translation_utils/postprocess_htmlproofer.py +++ b/_utils/_translation_utils/postprocess_htmlproofer.py @@ -1,10 +1,10 @@ #!/usr/bin/python3 ''' python _utils/_translation_utils/postprocess_htmlproofer.py -invoke: python _utils/_translation_utils/postprocess_htmlproofer.py de /tmp/html.output _qubes-translated/de/ +invoke: python _utils/_translation_utils/postprocess_htmlproofer.py de /tmp/html.output _translated/de/ [de]: translation language [/tmp/html.output]: output from htmlproofer -[_qubes-translated/de/]: the directory with the downloaded translated files from transifex +[_translated/de/]: the directory with the downloaded translated files from transifex ''' from frontmatter import Post, load, dump import yaml @@ -242,7 +242,7 @@ def process_yml(translated, errorlinks): logger.debug('do nothing for file: %s. it is OK.' % e.filename) if __name__ == '__main__': - # python _utils/_translation_utils/postprocess_htmlproofer.py de /tmp/html.output _qubes-translated/de/ + # python _utils/_translation_utils/postprocess_htmlproofer.py de /tmp/html.output _translated/de/ parser = ArgumentParser() # for which language should we do this parser.add_argument("language") diff --git a/_utils/_translation_utils/postprocess_translation.py b/_utils/_translation_utils/postprocess_translation.py index 033329ca..30a8f47c 100644 --- a/_utils/_translation_utils/postprocess_translation.py +++ b/_utils/_translation_utils/postprocess_translation.py @@ -1,6 +1,6 @@ #!/usr/bin/python3 # adds language pattern in permalink line and all found relative links in the current open file recursively from a given root dir -# evoke like: python _utils/postprocess_translation.py de _qubes-translated/de/ _utils/tx-mapping _utils/translated_hrefs_urls.txt --yml +# evoke like: python _utils/postprocess_translation.py de _translated/de/ _utils/tx-mapping _utils/translated_hrefs_urls.txt --yml #param1 is the language in short form #param2 is the root translated dir #param3 is current transifex mapping between original and translated files in the format: @@ -386,7 +386,7 @@ def main(translated_dir, lang, yml, mapping, href_filename): if __name__ == '__main__': - # python _utils/postprocess_translation.py de _qubes-translated/de/ _utils/tx-mapping _utils/translated_hrefs_urls.txt --yml + # python _utils/postprocess_translation.py de _translated/de/ _utils/tx-mapping _utils/translated_hrefs_urls.txt --yml parser = ArgumentParser() # for which language should we do this parser.add_argument("language") diff --git a/_utils/_translation_utils/postprocess_translation.sh b/_utils/_translation_utils/postprocess_translation.sh index 03a74dfb..f775084e 100644 --- a/_utils/_translation_utils/postprocess_translation.sh +++ b/_utils/_translation_utils/postprocess_translation.sh @@ -5,7 +5,7 @@ # where $3 is a file dumped by postprocess_translation.py # this script exists because is easier to correctly process html code with sed. python messes it up. # example of evoking the script: -# bash _utils/_translation_utils/test.sh de _qubes-translated/de/ _utils/translated_hrefs_urls.txt +# bash _utils/_translation_utils/test.sh de _translated/de/ _utils/translated_hrefs_urls.txt pattern="href=\"\/" diff --git a/_utils/_translation_utils/tx_config.sh b/_utils/_translation_utils/tx_config.sh index 10bb0aba..4eb937c6 100644 --- a/_utils/_translation_utils/tx_config.sh +++ b/_utils/_translation_utils/tx_config.sh @@ -15,31 +15,30 @@ tx init --skipsetup # map the files with tx config -tx config mapping-bulk -p qubes --source-language en --type GITHUBMARKDOWN -f '.md' -d --source-file-dir _doc/en/_doc/ -i _dev --expression '_qubes-translated//_doc/{filepath}/{filename}{extension}' --execute -tx config mapping-bulk -p qubes --source-language en --type GITHUBMARKDOWN -f '.md' -d --source-file-dir pages --expression '_qubes-translated//pages/{filepath}/{filename}{extension}' --execute -tx config mapping-bulk -p qubes --source-language en --type GITHUBMARKDOWN -f '.md' -d --source-file-dir news --expression '_qubes-translated//news/{filepath}/{filename}{extension}' --execute +tx config mapping-bulk -p qubes --source-language en --type GITHUBMARKDOWN -f '.md' -d --source-file-dir _doc -i _dev --expression '_translated//_doc/{filepath}/{filename}{extension}' --execute +tx config mapping-bulk -p qubes --source-language en --type GITHUBMARKDOWN -f '.md' -d --source-file-dir pages --expression '_translated//pages/{filepath}/{filename}{extension}' --execute +tx config mapping-bulk -p qubes --source-language en --type GITHUBMARKDOWN -f '.md' -d --source-file-dir news --expression '_translated//news/{filepath}/{filename}{extension}' --execute #HTML -tx config mapping -r qubes._doc_en__doc_introduction_intro --source-lang en --type HTML --source-file _doc/en/_doc/introduction/intro.html '_qubes-translated//_doc/introduction/intro.html' --execute -tx config mapping -r qubes.pages_partners --source-lang en --type HTML --source-file pages/partners.html '_qubes-translated//pages/partners.html' --execute -tx config mapping -r qubes.pages_home --source-lang en --type HTML --source-file pages/home.html '_qubes-translated//pages/home.html' --execute - -tx config mapping -r qubes.data_architecture --source-lang en --type YAML_GENERIC --source-file _data/architecture.yml --expression '_qubes-translated//_data//architecture.yml' --execute -tx config mapping -r qubes.data_index --source-lang en --type YAML_GENERIC --source-file _data/index.yml --expression '_qubes-translated//_data//index.yml' --execute -tx config mapping -r qubes.data_includes --source-lang en --type YAML_GENERIC --source-file _data/includes.yml --expression '_qubes-translated//_data//includes.yml' --execute -tx config mapping -r qubes.data_teamtexts --source-lang en --type YAML_GENERIC --source-file _data/teamtexts.yml --expression '_qubes-translated//_data//teamtexts.yml' --execute -tx config mapping -r qubes.data_team --source-lang en --type YAML_GENERIC --source-file _data/team.yml --expression '_qubes-translated//_data//team.yml' --execute -tx config mapping -r qubes.data_experts --source-lang en --type YAML_GENERIC --source-file _data/experts.yml --expression '_qubes-translated//_data//experts.yml' --execute -tx config mapping -r qubes.data_download --source-lang en --type YAML_GENERIC --source-file _data/download.yml --expression '_qubes-translated//_data//download.yml' --execute -tx config mapping -r qubes.data_hcl --source-lang en --type YAML_GENERIC --source-file _data/hcl.yml --expression '_qubes-translated//_data//hcl.yml' --execute -tx config mapping -r qubes.data_research --source-lang en --type YAML_GENERIC --source-file _data/research.yml --expression '_qubes-translated//_data//research.yml' --execute -tx config mapping -r qubes.data_style_guide_content --source-lang en --type YAML_GENERIC --source-file _data/style_guide_content.yml --expression '_qubes-translated//_data//style_guide_content.yml' --execute - -crudini --del .tx/config qubes._doc_en__doc_README -crudini --del .tx/config qubes._doc_en__doc_CONTRIBUTING - -sed -i 's/_doc_en__doc/doc/g' .tx/config - +tx config mapping -r qubes._doc_introduction_intro --source-lang en --type HTML --source-file _doc/introduction/intro.md '_translated//_doc/introduction/intro.md' --execute +tx config mapping -r qubes.pages_partners --source-lang en --type HTML --source-file pages/partners.html '_translated//pages/partners.html' --execute +tx config mapping -r qubes.pages_home --source-lang en --type HTML --source-file pages/home.html '_translated//pages/home.html' --execute + +tx config mapping -r qubes.data_architecture --source-lang en --type YAML_GENERIC --source-file _data/architecture.yml --expression '_translated//_data//architecture.yml' --execute +tx config mapping -r qubes.data_doc_index --source-lang en --type YAML_GENERIC --source-file _data/doc-index.yml --expression '_translated//_data//doc-index.yml' --execute +tx config mapping -r qubes.data_includes --source-lang en --type YAML_GENERIC --source-file _data/includes.yml --expression '_translated//_data//includes.yml' --execute +tx config mapping -r qubes.data_team-page --source-lang en --type YAML_GENERIC --source-file _data/team-page.yml --expression '_translated//_data//team-page.yml' --execute +tx config mapping -r qubes.data_team --source-lang en --type YAML_GENERIC --source-file _data/team.yml --expression '_translated//_data//team.yml' --execute +tx config mapping -r qubes.data_experts --source-lang en --type YAML_GENERIC --source-file _data/experts.yml --expression '_translated//_data//experts.yml' --execute +tx config mapping -r qubes.data_downloads_page --source-lang en --type YAML_GENERIC --source-file _data/downloads-page.yml --expression '_translated//_data//downloads-page.yml' --execute +tx config mapping -r qubes.data_hcl --source-lang en --type YAML_GENERIC --source-file _data/hcl.yml --expression '_translated//_data//hcl.yml' --execute +tx config mapping -r qubes.data_research --source-lang en --type YAML_GENERIC --source-file _data/research.yml --expression '_translated//_data//research.yml' --execute +tx config mapping -r qubes.data_style_guide_page --source-lang en --type YAML_GENERIC --source-file _data/style-guide-page.yml --expression '_translated//_data//style-guide-page.yml' --execute + +crudini --del .tx/config qubes._doc_README +crudini --del .tx/config qubes._doc_CONTRIBUTING + +sed -i 's/\._doc_/.doc_/' .tx/config echo "#####################################################################################" echo "############# Please pay attention to the changes made to the current tx config #####" From b3e884207601fff1eb69f4cce8bb6af668175896 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Tue, 30 Mar 2021 13:21:58 +0200 Subject: [PATCH 03/19] Various translation scripts improvements - use `set -e` in top level script, quote variables - prefer env variable for API token (not visible in the process list, and easier to provide in CI config); same for TRANSLATED_LANGS setting - fix setting tags - there need to be an empty line before the content, otherwise the header is not handled correctly; this change makes merge_md_heading_ids.rb not idempotent, because line numbers change now, but it should be fine to call it one time directly after tx pull - if htmlproofer says it's ok, don't try to recover from (non-existent) failure - fix setting ref/lang (erroneously commented out before) - handle redirect_to frontmatter key (if internal - prefix with lang too) - better synchronize translated frontmatter to the source one - if source lacked redirect_from, remove one from translated too - use `tx pull --force` - git checkout time isn't necessary time of the last `tx pull`, so comparing mtime isn't really helpful to decide which file is up to date - don't prepend language prefix when redirecting to news article - those are not translated - sort redirect_from frontmatter entry, to avoid no-op commits --- .../merge_md_heading_ids.rb | 2 +- .../_translation_utils/post_transifex_pull.sh | 32 +++++++++---------- .../postprocess_htmlproofer.py | 6 ++-- .../postprocess_translation.py | 24 ++++++++++++-- .../prepare_for_translation.py | 12 +++---- .../tag_strings_as_locked.py | 10 +++--- _utils/_translation_utils/tx_config.sh | 7 ++-- _utils/_translation_utils/tx_pull.sh | 10 +++--- 8 files changed, 64 insertions(+), 39 deletions(-) diff --git a/_utils/_translation_utils/merge_md_heading_ids.rb b/_utils/_translation_utils/merge_md_heading_ids.rb index 5f446a29..c85f7933 100644 --- a/_utils/_translation_utils/merge_md_heading_ids.rb +++ b/_utils/_translation_utils/merge_md_heading_ids.rb @@ -176,7 +176,7 @@ def insert_ids_to_gfm_file(line_to_id_map, gfm_lines) result = gfm_lines[0..-1] n = result.length line_to_id_map.each do |key, value| - str_to_insert = '' + str_to_insert = '' + "\n" line = result[key] if !line.nil? and line.start_with?('#') if key + 1 >= n diff --git a/_utils/_translation_utils/post_transifex_pull.sh b/_utils/_translation_utils/post_transifex_pull.sh index d5b15759..23676533 100644 --- a/_utils/_translation_utils/post_transifex_pull.sh +++ b/_utils/_translation_utils/post_transifex_pull.sh @@ -4,6 +4,8 @@ # $2 is directory where translated files reside and language needs to be added to internal urls # TODO param check +set -e + echo "============================ post processing step 1 ======================================" #read b bash _utils/_translation_utils/prepare_tx_config_postprocess.sh .tx/config /tmp/tx-mapping @@ -11,38 +13,36 @@ bash _utils/_translation_utils/prepare_tx_config_postprocess.sh .tx/config /tmp/ echo "============================ post processing step 2 ======================================" #read b -ruby _utils/_translation_utils/merge_md_heading_ids.rb $1 /tmp/tx-mapping +ruby _utils/_translation_utils/merge_md_heading_ids.rb "$1" /tmp/tx-mapping echo "============================ post processing step 3 press to cont ======================================" #read b -python3 _utils/_translation_utils/postprocess_translation.py $1 $2 /tmp/tx-mapping /tmp/translated_href_urls.txt --yml +python3 _utils/_translation_utils/postprocess_translation.py "$1" "$2" /tmp/tx-mapping /tmp/translated_href_urls.txt --yml echo "============================ post processing step 4 press to cont ======================================" #read b -bash _utils/_translation_utils/postprocess_translation.sh $1 $2 /tmp/translated_href_urls.txt +bash _utils/_translation_utils/postprocess_translation.sh "$1" "$2" /tmp/translated_href_urls.txt -echo "================================= build suite ==================================" +echo "================================= build site ==================================" #read b bundle exec jekyll b +all_ok=true echo "================================= run htmlproofer ===============================" -htmlproofer ./_site --disable-external --checks-to-ignore ImageCheck --file-ignore ./_site/video-tours/index.html,./_site/$1/video-tours/index.html --url-ignore "/qubes-issues/" --log-level debug 2&> /tmp/html.output - -echo "================================== as a last resort in case of errors process html proofer errors =================================" -python3 _utils/_translation_utils/postprocess_htmlproofer.py $1 /tmp/html.output $2 - -echo "================================= build the site and run htmlproofer ====================================" -rm -rf ./_site/ -bundle exec jekyll b -htmlproofer ./_site --disable-external --checks-to-ignore ImageCheck --file-ignore ./_site/video-tours/index.html,./_site/$1/video-tours/index.html --url-ignore "/qubes-issues/" --log-level debug || all_ok=false +htmlproofer ./_site --disable-external --checks-to-ignore ImageCheck --file-ignore "./_site/video-tours/index.html,./_site/$1/video-tours/index.html" --url-ignore "/qubes-issues/" --log-level debug 2&> /tmp/html.output || all_ok=false +# exit here if all is ok if $all_ok; then echo 'All checks passed!' -else - echo 'Some checked failed. See above.' - exit 1 + exit fi +echo "================================== as a last resort in case of errors process html proofer errors =================================" +python3 _utils/_translation_utils/postprocess_htmlproofer.py "$1" /tmp/html.output "$2" +echo "================================= build the site and run htmlproofer ====================================" +rm -rf ./_site/ +bundle exec jekyll b +htmlproofer ./_site --disable-external --checks-to-ignore ImageCheck --file-ignore "./_site/video-tours/index.html,./_site/$1/video-tours/index.html" --url-ignore "/qubes-issues/" --log-level debug diff --git a/_utils/_translation_utils/postprocess_htmlproofer.py b/_utils/_translation_utils/postprocess_htmlproofer.py index bfb9cb56..eb6a587b 100644 --- a/_utils/_translation_utils/postprocess_htmlproofer.py +++ b/_utils/_translation_utils/postprocess_htmlproofer.py @@ -12,7 +12,7 @@ from re import search from sys import exit import sys -from os import linesep, walk +from os import linesep, walk, environ from argparse import ArgumentParser from os.path import isfile, isdir from json import loads, dumps @@ -24,6 +24,8 @@ PERMALINK_KEY = 'permalink' REDIRECT_KEY = 'redirect_from' TRANSLATED_LANGS = ['de'] +if 'TRANSLATED_LANGS' in environ: + TRANSLATED_LANGS = environ['TRANSLATED_LANGS'].split() URL_KEY = 'url' @@ -96,7 +98,7 @@ def process_markdown(translated_file, internal_links): lines.append(to_replace) continue - if "[" and "](" in line and ")" in line: + if "[" in line and "](" in line and ")" in line: count = line.count('](') tmp = line val = 0 diff --git a/_utils/_translation_utils/postprocess_translation.py b/_utils/_translation_utils/postprocess_translation.py index 30a8f47c..db12a6ba 100644 --- a/_utils/_translation_utils/postprocess_translation.py +++ b/_utils/_translation_utils/postprocess_translation.py @@ -15,7 +15,7 @@ import frontmatter from io import open as iopen from os.path import isfile, isdir -from os import linesep, walk +from os import linesep, walk, environ from re import findall from sys import exit from argparse import ArgumentParser @@ -47,12 +47,15 @@ # md frontmatterkeys: PERMALINK_KEY = 'permalink' REDIRECT_KEY = 'redirect_from' +REDIRECT_TO = 'redirect_to' LANG_KEY = 'lang' TRANSLATED_KEY = 'translated' LAYOUT_KEY = 'layout' SLASH = '/' MD_URL_SPLIT_PATTERNS = ['/)','/#'] TRANSLATED_LANGS = ['de'] +if 'TRANSLATED_LANGS' in environ: + TRANSLATED_LANGS = environ['TRANSLATED_LANGS'].split() #EXCLUDE_FILES = ['download.md' ] @@ -107,6 +110,9 @@ def process_markdown(source_file, translated_file, permalinks, lang): mdt = frontmatter.load(t) if mds.get(PERMALINK_KEY) != None: mdt[PERMALINK_KEY] = SLASH + lang + mds.get(PERMALINK_KEY) + elif PERMALINK_KEY in mdt: + # if missing in source, remove from translated too + del mdt[PERMALINK_KEY] if mds.get(REDIRECT_KEY) != None: redirects = mds.get(REDIRECT_KEY) @@ -124,12 +130,26 @@ def process_markdown(source_file, translated_file, permalinks, lang): if mdt.get(PERMALINK_KEY) != None and mdt[PERMALINK_KEY] in mdt[REDIRECT_KEY]: mdt[REDIRECT_KEY].remove(mdt[PERMALINK_KEY]) - tmp = list(set(mdt[REDIRECT_KEY])) + tmp = sorted(set(mdt[REDIRECT_KEY])) mdt[REDIRECT_KEY] = tmp + elif REDIRECT_KEY in mdt: + # if missing in source, remove from translated too + del mdt[REDIRECT_KEY] if mds.get(LAYOUT_KEY) != None: mdt[LAYOUT_KEY] = mds[LAYOUT_KEY] + if mds.get(REDIRECT_TO) != None: + redirect = mds.get(REDIRECT_TO) + if isinstance(redirect, list): + redirect = redirect[0] + if redirect.startswith('/') and not redirect.startswith(SLASH + lang + SLASH) and not redirect.startswith(news): + mdt[REDIRECT_TO] = SLASH + lang + redirect + else: + mdt[REDIRECT_TO] = redirect + elif REDIRECT_TO in mdt: + del mdt[REDIRECT_TO] + mdt[LANG_KEY] = lang # TODO we do not need the translated key anymore #mdt[TRANSLATED_KEY] = 'yes' diff --git a/_utils/_translation_utils/prepare_for_translation.py b/_utils/_translation_utils/prepare_for_translation.py index 32647a8f..d35b9ac6 100644 --- a/_utils/_translation_utils/prepare_for_translation.py +++ b/_utils/_translation_utils/prepare_for_translation.py @@ -61,7 +61,7 @@ def main(root_dir, lang, counter): if not isinstance(redirects, str): md[REDIRECT_KEY].remove(md[PERMALINK_KEY]) if md.get(LANG_KEY) == None: - md[LANG_KEY] = "en" + md[LANG_KEY] = lang if md.get(REF_KEY) == None: md[REF_KEY] = counter counter += 1 @@ -97,11 +97,11 @@ def main(root_dir, lang, counter): redirects = md.get(REDIRECT_KEY) if not isinstance(redirects, str): md[REDIRECT_KEY].remove(md[PERMALINK_KEY]) - #if md.get(LANG_KEY) == None: - # md[LANG_KEY] = "en" - #if md.get(REF_KEY) == None: - # md[REF_KEY] = counter - # counter += 1 + if md.get(LANG_KEY) == None: + md[LANG_KEY] = "en" + if md.get(REF_KEY) == None: + md[REF_KEY] = counter + counter += 1 with iopen(file_path, 'wb') as replaced: dump(md, replaced) diff --git a/_utils/_translation_utils/tag_strings_as_locked.py b/_utils/_translation_utils/tag_strings_as_locked.py index c23808da..b4830408 100644 --- a/_utils/_translation_utils/tag_strings_as_locked.py +++ b/_utils/_translation_utils/tag_strings_as_locked.py @@ -12,6 +12,7 @@ from certifi import where from io import BytesIO from io import open as iopen +from os import environ from os.path import isfile from re import match import sys @@ -338,8 +339,6 @@ def tag_strings_as_locked(hash_and_tag, tx_api_token, debug): parser.add_argument("tx_resourcenamesfile") # provide the file from tx configuration containing only the original source filenames parser.add_argument("tx_sourcesnamesfile") - # provide the developer api transifex token for auth - parser.add_argument("tx_api_token") # whether or not to write debug json files parser.add_argument("--debug", action='store_true') # whether or not to tag file by a file @@ -371,8 +370,11 @@ def tag_strings_as_locked(hash_and_tag, tx_api_token, debug): if manual: manual_break() + if 'TX_TOKEN' not in environ: + parser.error('TX_TOKEN variable not set') + tx_api_token = environ['TX_TOKEN'] perms_and_redirects = get_all_original_permalinks_and_redirects(args.tx_sourcesnamesfile) - hash_and_tags_mapping = create_hash_and_tags_mapping(tx_resources, args.tx_api_token, args.debug, perms_and_redirects, manual) + hash_and_tags_mapping = create_hash_and_tags_mapping(tx_resources, tx_api_token, args.debug, perms_and_redirects, manual) if args.debug: logger.debug("------------------------------------------------") @@ -390,7 +392,7 @@ def tag_strings_as_locked(hash_and_tag, tx_api_token, debug): logger.debug("------------------------------------------------") logger.debug("------------------------------------------------") - tag_strings_as_locked(hash_and_tags_mapping, args.tx_api_token, args.debug) + tag_strings_as_locked(hash_and_tags_mapping, tx_api_token, args.debug) diff --git a/_utils/_translation_utils/tx_config.sh b/_utils/_translation_utils/tx_config.sh index 4eb937c6..b028716b 100644 --- a/_utils/_translation_utils/tx_config.sh +++ b/_utils/_translation_utils/tx_config.sh @@ -2,18 +2,19 @@ # to be run from the git root #to incoporate them back, no need +set -e + # add ref & lang attributes to newly created files python3 _utils/_translation_utils/prepare_for_translation.py en _doc/ _utils/_translation_utils/COUNTER.txt # because there is apparently a feature in tx config that doe snot update an existing configuration, every time everythin will be done from scratch: # delete current tx configuration -mv .tx/config /tmp/tx_config_old +mv .tx/config /tmp/tx_config_old || : #init a tx configuration tx init --skipsetup - # map the files with tx config tx config mapping-bulk -p qubes --source-language en --type GITHUBMARKDOWN -f '.md' -d --source-file-dir _doc -i _dev --expression '_translated//_doc/{filepath}/{filename}{extension}' --execute tx config mapping-bulk -p qubes --source-language en --type GITHUBMARKDOWN -f '.md' -d --source-file-dir pages --expression '_translated//pages/{filepath}/{filename}{extension}' --execute @@ -45,4 +46,4 @@ echo "############# Please pay attention to the changes made to the current tx c echo "############# Do you have to delete some resources on transifex manually? ###########" echo "############# left is the current, on the right is the old tx config ################" echo "#####################################################################################" -diff .tx/config /tmp/tx_config_old --color +diff /tmp/tx_config_old .tx/config --color || : diff --git a/_utils/_translation_utils/tx_pull.sh b/_utils/_translation_utils/tx_pull.sh index e1d94deb..cc616803 100644 --- a/_utils/_translation_utils/tx_pull.sh +++ b/_utils/_translation_utils/tx_pull.sh @@ -1,10 +1,10 @@ -#!/bin/bash +#!/bin/bash -e # first argument is the language for which the translated files should be downloaded # the mode developer will download all unreviewed translated strings as well -tx pull -l $1 --mode reviewed -d --traceback -r qubes.*doc* -tx pull -l $1 --mode reviewed -d --traceback -r qubes.*pages* -tx pull -l $1 --mode reviewed -d --traceback -r qubes.*news* +tx pull --force -l $1 --mode reviewed -d --traceback -r qubes.doc* +tx pull --force -l $1 --mode reviewed -d --traceback -r qubes.pages* +tx pull --force -l $1 --mode reviewed -d --traceback -r qubes.news* # the different mode here is needed for YAML files, since the developer mode does not download source strings if untranslated # the mode reviewed here will not work and will return empty strings for nontranslated ones -tx pull -l $1 --mode sourceastranslation -d --traceback -r qubes.*data_* +tx pull --force -l $1 --mode sourceastranslation -d --traceback -r qubes.data_* From 0b496ff48ceed60bf1538e33ec756d126f326fa0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Tue, 30 Mar 2021 13:30:44 +0200 Subject: [PATCH 04/19] Introduce an option to filter what languages are visible in the switcher Also, if there is just one, hide the language switcher --- _config.yml | 5 +++++ _includes/header.html | 12 +++++++++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/_config.yml b/_config.yml index c299c866..0de5e68a 100644 --- a/_config.yml +++ b/_config.yml @@ -21,6 +21,11 @@ relative_permalinks: false permalink: /news/:year/:month/:day/:title/ excerpt_separator: "" +# enabled languages +# remember to create symlinks in _data/translation too +languages: +- en + plugins: - jekyll-redirect-from - jekyll-sitemap diff --git a/_includes/header.html b/_includes/header.html index 91b75af3..a19c4918 100644 --- a/_includes/header.html +++ b/_includes/header.html @@ -10,8 +10,12 @@ - - {% assign langmenu = false %} + + {% if site.languages.size > 1 %} + {% assign langmenu = true %} + {% else %} + {% assign langmenu = false %} + {% endif %} {% if page.layout == nil or page.collection == 'posts' %} {% assign langmenu = false %} @@ -23,7 +27,9 @@
{% for post in posts %} - {{ post.lang }} + {% if site.languages contains post.lang %} + {{ post.lang }} + {% endif %} {% endfor %}
From d6737e41977128412e2f1703435bdc20bfce6f9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Tue, 30 Mar 2021 13:31:44 +0200 Subject: [PATCH 05/19] Update translated files counter --- _utils/_translation_utils/COUNTER.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_utils/_translation_utils/COUNTER.txt b/_utils/_translation_utils/COUNTER.txt index 8525d4ab..f37b177f 100644 --- a/_utils/_translation_utils/COUNTER.txt +++ b/_utils/_translation_utils/COUNTER.txt @@ -1 +1 @@ -current counter: 242 \ No newline at end of file +current counter: 251 From 28dcb43922b4b01280333197f815c01ad2daf19b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Tue, 30 Mar 2021 13:49:47 +0200 Subject: [PATCH 06/19] Add initial symlinks in _data/translation For now symlink es, de, fr. We can add more when we'll have translations. --- _data/translation/de | 1 + _data/translation/es | 1 + _data/translation/fr | 1 + 3 files changed, 3 insertions(+) create mode 120000 _data/translation/de create mode 120000 _data/translation/es create mode 120000 _data/translation/fr diff --git a/_data/translation/de b/_data/translation/de new file mode 120000 index 00000000..a25f53a6 --- /dev/null +++ b/_data/translation/de @@ -0,0 +1 @@ +../../_translated/de/_data/de \ No newline at end of file diff --git a/_data/translation/es b/_data/translation/es new file mode 120000 index 00000000..65518b27 --- /dev/null +++ b/_data/translation/es @@ -0,0 +1 @@ +../../_translated/es/_data/es \ No newline at end of file diff --git a/_data/translation/fr b/_data/translation/fr new file mode 120000 index 00000000..68f25cda --- /dev/null +++ b/_data/translation/fr @@ -0,0 +1 @@ +../../_translated/fr/_data/fr \ No newline at end of file From b49ec530923f7e5fd97b4e643164d61098195ce1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Tue, 30 Mar 2021 15:45:22 +0200 Subject: [PATCH 07/19] Reduce verbosity - CI log is too long --- _utils/_translation_utils/tx_pull.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/_utils/_translation_utils/tx_pull.sh b/_utils/_translation_utils/tx_pull.sh index cc616803..82061583 100644 --- a/_utils/_translation_utils/tx_pull.sh +++ b/_utils/_translation_utils/tx_pull.sh @@ -1,10 +1,10 @@ #!/bin/bash -e # first argument is the language for which the translated files should be downloaded # the mode developer will download all unreviewed translated strings as well -tx pull --force -l $1 --mode reviewed -d --traceback -r qubes.doc* -tx pull --force -l $1 --mode reviewed -d --traceback -r qubes.pages* -tx pull --force -l $1 --mode reviewed -d --traceback -r qubes.news* +tx pull --force -l $1 --mode reviewed --traceback -r qubes.doc* +tx pull --force -l $1 --mode reviewed --traceback -r qubes.pages* +tx pull --force -l $1 --mode reviewed --traceback -r qubes.news* # the different mode here is needed for YAML files, since the developer mode does not download source strings if untranslated # the mode reviewed here will not work and will return empty strings for nontranslated ones -tx pull --force -l $1 --mode sourceastranslation -d --traceback -r qubes.data_* +tx pull --force -l $1 --mode sourceastranslation --traceback -r qubes.data_* From e232600b97b74dbcabbd719bdd6b60299ad1f7d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Tue, 30 Mar 2021 14:30:48 +0200 Subject: [PATCH 08/19] Add ci job for pushing/pulling content to Transifex Add two wrapper scripts in _utils for that. --- .gitlab-ci.yml | 41 +++++++++++++++++++++++++++++++++++++++++ _utils/transifex-pull | 36 ++++++++++++++++++++++++++++++++++++ _utils/transifex-push | 27 +++++++++++++++++++++++++++ 3 files changed, 104 insertions(+) create mode 100755 _utils/transifex-pull create mode 100755 _utils/transifex-push diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 1167191a..564dfd7d 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -2,5 +2,46 @@ include: - project: 'QubesOS/qubes-continuous-integration' file: '/gitlab-website.yml' +stages: + - prepare + - build + build:website: extends: .website + +update-transifex: + tags: + - docker + stage: prepare + rules: + - if: '$TX_TOKEN && $GITHUB_KEY' + when: always + - when: never + artifacts: + expire_in: 7 days + when: always + paths: + - site.tar.gz + variables: + GIT_SUBMODULE_STRATEGY: normal + GIT_AUTHOR_NAME: translation bot + GIT_AUTHOR_EMAIL: builder-bot@qubes-os.org + GIT_COMMITTER_NAME: translation bot + GIT_COMMITTER_EMAIL: builder-bot@qubes-os.org + PAGES_REPO_NWO: QubesOS/qubesos.github.io + TRANSLATED_LANGS: de fr es + LANG: C.UTF-8 + before_script: + - mkdir -p $HOME/.ssh && echo "$GITHUB_KEY" > $HOME/.ssh/id_ed25519 && chmod 700 $HOME/.ssh/id_ed25519 + - echo "github.com,140.82.121.4 ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAq2A7hRGmdnm9tUDbO9IDSwBK6TbQa+PXYPCPy6rbTrTtw7PHkccKrpp0yVhp5HdEIcKr6pLlVDBfOLX9QUsyCOV0wzfjIJNlGEYsdlLJizHhbn2mUjvSAHQqZETYP81eFzLQNnPHt4EVVUh7VfDESU84KezmD5QlWpXLmvU31/yMf+Se8xhHTvKSCZIFImWwoG6mbUoWf9nzpIoaSjB+weqqUUmpaaasXVal72J+UX2B+2RPW3RcT0eOzQgqlJL3RKrTJvdsjE3JEAvGq3lGHSZXy28G3skua2SmVi/w4yCE6gbODqnTWlg7+wC604ydGXA8VJiS5ap43JXiUFFAaQ==" >> $HOME/.ssh/known_hosts + - export PATH=$PATH:$HOME/bin + - sudo dnf install -y python3-pycurl python3-PyYAML python3-jsonschema python3-certifi python3-attrs /usr/bin/bundle rubygem-jekyll rubygem-nokogiri rubygem-concurrent-ruby ruby-devel gcc-c++ transifex-client crudini python3-pycurl python3-pyrsistent + - pip install python-frontmatter + - export NOKOGIRI_USE_SYSTEM_LIBRARIES=true + - gem install github-pages json html-proofer + - git submodule update --init + script: + - _utils/transifex-push + - _utils/transifex-pull $TRANSLATED_LANGS + after_script: + - tar czf site.tar.gz _site diff --git a/_utils/transifex-pull b/_utils/transifex-pull new file mode 100755 index 00000000..6c149cb1 --- /dev/null +++ b/_utils/transifex-pull @@ -0,0 +1,36 @@ +#!/bin/sh + +# Pull translated pages from Transifex + +if ! command -v tx; then + echo "transifex-client needs to be installed" >&2 + exit 1 +fi + +if [ -z "$TX_TOKEN" ]; then + echo "Please set TX_TOKEN variable to Transifex API key" >&2 + exit 1 +fi + +if [ -z "$1" ]; then + echo "Usage: $0 [ ...]" +fi + +set -e + +for lang in "$@"; do + bash _utils/_translation_utils/tx_pull.sh "$lang" + bash _utils/_translation_utils/post_transifex_pull.sh "$lang" _translated/"$lang" +done + +# switch to ssh for push +git -C _translated remote set-url origin git@github.com:QubesOS/qubes-translated + +# commit and push +git -C _translated add . + +# if nothing to commit, exit early +[ -n "$(git -C _translated status --porcelain)" ] || exit 0 + +git -C _translated commit -m 'Update translated content' +git -C _translated push origin HEAD:master diff --git a/_utils/transifex-push b/_utils/transifex-push new file mode 100755 index 00000000..7d17980d --- /dev/null +++ b/_utils/transifex-push @@ -0,0 +1,27 @@ +#!/bin/sh + +# This script uploads the website source files to Transifex for translation, including all the preparatory work + +if ! command -v tx; then + echo "transifex-client needs to be installed" >&2 + exit 1 +fi + +if [ -z "$TX_TOKEN" ]; then + echo "Please set TX_TOKEN variable to Transifex API key" >&2 + exit 1 +fi + +set -e + +# update config +bash _utils/_translation_utils/tx_config.sh + +# push the sources +tx push -s + +# update sources metadata (notranslate tags, locks etc) +bash _utils/_translation_utils/prepare_tx_config_for_notranslate_tags.sh .tx/config /tmp/tx-resources-names.txt /tmp/tx-sources-filenames.txt +python3 _utils/_translation_utils/tag_strings_as_locked.py /tmp/tx-resources-names.txt /tmp/tx-sources-filenames.txt --debug + + From a87015786c487f44d61f86e456e4854b2b6c787e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Wed, 31 Mar 2021 19:41:17 +0200 Subject: [PATCH 09/19] Add scripts to fetch+update qubes-translated submodule Fetch the update made by _utils/transifex-* scripts. The transifex-pull script performs quite detailed inspections and apply fixups. At this stage apply only a basic sanity check with the sole purpose: do not allow translated content to subvert origin english one. --- _utils/update-translated | 24 +++++ _utils/verify-translated | 201 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 225 insertions(+) create mode 100755 _utils/update-translated create mode 100755 _utils/verify-translated diff --git a/_utils/update-translated b/_utils/update-translated new file mode 100755 index 00000000..9164359c --- /dev/null +++ b/_utils/update-translated @@ -0,0 +1,24 @@ +#!/bin/bash + +set -e + +tmpbranch="new-$$" +git -C _translated fetch origin master:"$tmpbranch" +tmpdir=$(mktemp -d) +trap 'rm -rf $tmpdir; git -C _translated br -D $tmpbranch' EXIT +git clone --shared "$PWD/_translated" -b "$tmpbranch" "$tmpdir/translated" + +if ! _utils/verify-translated "$tmpdir/translated"; then + echo "Translated content did not pass sanity check, not updating" >&2 + # TODO: consider some louder alert? email? issue on github? + exit 1 +fi + +git -C _translated merge --ff-only "$tmpbranch" +git add _translated + +git commit -m 'autoupdate: _translated' +commit_id=$(git show --pretty=format:%H|head -1) +tag_name=auto_${commit_id:0:8} +git tag -s -m "Automatic tag for commit $commit_id" "$tag_name" +git push origin master $tag_name diff --git a/_utils/verify-translated b/_utils/verify-translated new file mode 100755 index 00000000..0bfbc0a2 --- /dev/null +++ b/_utils/verify-translated @@ -0,0 +1,201 @@ +#!/usr/bin/env python3 + +# Simple verifier for qubes-translated repository content sanity. +# This script looks at frontmatter of each file mostly checks that: +# - lang: key is present and matches the directory name +# - ref: key is present +# - url-related parameters (redirect_from, permalink) are properly language-scoped +# - no unexpected entries in the frontmatter are present +# +# Besides that, verifies if no unexpected files are present. +# +# Usage: +# $0 +# Supported env variables: +# - TRANSLATED_LANGS - list of expected languages + +import argparse +import os +import string +import yaml + +TRANSLATED_LANGS = ['de'] +if 'TRANSLATED_LANGS' in os.environ: + TRANSLATED_LANGS = os.environ['TRANSLATED_LANGS'].split() + +SAFE_PATH_CHARS = string.ascii_letters + string.digits + '/_-.' + +ALLOWED_FRONTMATTER_KEYS = ['title', 'lang', 'ref', 'permalink', 'redirect_from', 'redirect_to', 'layout', 'model'] + +MANDATORY_FRONTMATTER_KEYS = ['title', 'lang', 'ref', 'layout'] + +ALLOWED_EXTERNAL_REDIRECT_TARGETS = ['https://github.com/Qubes-Community/Contents/blob/master/'] + +ALLOWED_LAYOUTS = ['doc', 'doc-index', 'doc-full', 'hcl', 'downloads', 'news', 'team', 'home', 'default', 'experts', 'sidebar'] + +#TODO: consider allowing some layouts only on some pages (and consider their redirect_from too) + +parser = argparse.ArgumentParser() +parser.add_argument('directory') + +class VerificationError(Exception): + def __init__(self, path, msg): + safe_name = ''.join(l if l in SAFE_PATH_CHARS else '?' for l in path) + super().__init__('{}: {}'.format(path, msg)) + +def verify_readme(path): + with open(path) as f: + readme_text = f.read() + + if '---' in readme_text: + raise VerificationError(path, 'may not contain frontmatter') + if '<' in readme_text: + raise VerificationError(path, 'may not contain HTML') + if '{' in readme_text: + raise VerificationError(path, 'may not contain liquid templates') + + +def verify_md_file(lang, path): + with open(path) as f: + file_content = f.read() + + # there must be frontmatter + if not file_content.startswith('---\n'): + raise VerificationError(path, 'missing frontmatter') + + # better be more strict - may catch too much (if another separator is + # used), but then loading yaml will detect multiple documents + frontmatter_text = file_content[4:].split('\n---\n')[0] + # there could be _just_ frontmatter too + if frontmatter_text.endswith('\n---'): + frontmatter_text = frontmatter_text[:-4] + try: + frontmatter = yaml.safe_load(frontmatter_text) + except Exception as e: + raise VerificationError(path, 'failed to parse frontmatter: {!s}'.format(e)) from e + verify_frontmatter(lang, path, frontmatter) + + +def verify_frontmatter(lang, path, frontmatter): + # double check if all entries were verified + verified = [] + + url_prefix = '/{}/'.format(lang) + if any(key not in ALLOWED_FRONTMATTER_KEYS for key in frontmatter): + raise VerificationError(path, 'unexpected frontmatter key') + + for key in MANDATORY_FRONTMATTER_KEYS: + if key not in frontmatter: + raise VerificationError(path, key + ' missing in frontmatter') + + if lang != frontmatter['lang']: + raise VerificationError(path, 'lang mismatch') + + verified.append('lang') + + if not isinstance(frontmatter['ref'], int): + raise VerificationError(path, 'invalid ref format') + + verified.append('ref') + + if 'permalink' in frontmatter: + if not frontmatter['permalink'].startswith(url_prefix): + raise VerificationError(path, 'invalid permalink') + + verified.append('permalink') + + if 'redirect_from' in frontmatter: + if isinstance(frontmatter['redirect_from'], str): + redirect_from = [frontmatter['redirect_from']] + elif isinstance(frontmatter['redirect_from'], list): + redirect_from = frontmatter['redirect_from'] + else: + raise VerificationError(path, 'invalid redirect_from format') + + for url in redirect_from: + if '/..' in url: + raise VerificationError(path, '.. in url') + if not url.startswith(url_prefix): + raise VerificationError(path, 'invalid redirect_from') + + verified.append('redirect_from') + + if 'redirect_to' in frontmatter: + url = frontmatter['redirect_to'] + if isinstance(url, list): + if len(url) != 1: + raise VerificationError(path, 'if redirect_to is a list, must be 1-element') + url = url[0] + if not isinstance(url, str): + raise VerificationError(path, 'invalid redirect_to format') + if '/..' in url: + raise VerificationError(path, '.. in redirect_to') + if not any(url.startswith(prefix) for prefix in ALLOWED_EXTERNAL_REDIRECT_TARGETS + [url_prefix]): + raise VerificationError(path, 'forbidden redirect_to target') + + verified.append('redirect_to') + + if frontmatter['layout'] not in ALLOWED_LAYOUTS: + raise VerificationError(path, 'forbidden layout') + + verified.append('layout') + + title = frontmatter['title'] + if not isinstance(title, str): + raise VerificationError(path, 'invalid title format') + + # avoid HTML in title + if '<' in title or '%' in title: + raise VerificationError(path, 'invalid character in title') + + verified.append('title') + + # if 'model' is present, must have 'all' value + if 'model' in frontmatter: + if frontmatter['model'] != 'all': + raise VerificationError(path, 'invalid model value') + verified.append('model') + + # intentionally compare lists, not sets, to catch duplicates too + if sorted(verified) != sorted(frontmatter): + raise VerificationError(path, 'BUG, some frontmatter entries were not verified') + + # all is ok + + +def verify_lang(lang, path): + for dirpath, dirnames, filenames in os.walk(path): + for filename in filenames: + filepath = os.path.join(dirpath, filename) + if any(c not in SAFE_PATH_CHARS for c in filename): + raise VerificationError(filepath, 'unsafe characters in filename') + + if filename.endswith('.md'): + verify_md_file(lang, filepath) + elif filename.endswith('.html'): + # the frontmatter is expected the same + verify_md_file(lang, filepath) + elif filename.endswith('.yml'): + # those are loaded scoped anyway, so can mess only own language + pass + else: + raise VerificationError(filepath, 'unexpected file type') + + + +def main(): + args = parser.parse_args() + + for lang in os.listdir(args.directory): + if lang == '.git': + pass + elif lang == 'README.md': + verify_readme(os.path.join(args.directory, lang)) + elif lang in TRANSLATED_LANGS: + verify_lang(lang, os.path.join(args.directory, lang)) + else: + raise VerificationError(lang, 'unexpected language dir') + + +if __name__ == '__main__': + main() From d576bb8a339ae2d58de22c03163e48605c9597d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Wed, 31 Mar 2021 20:25:53 +0200 Subject: [PATCH 10/19] Add info on Transifex integration to the README --- README.md | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/README.md b/README.md index 8d326fc6..10751e9d 100644 --- a/README.md +++ b/README.md @@ -126,6 +126,68 @@ Please carefully read these guidelines before submitting a pull request. - [jQuery 1.7](http://api.jquery.com) - javascript helper library - [jQuery ToC MD Generator](https://github.com/dafi/tocmd-generator) - renders header menu on documentation section +Translation +------------ + +Documentation translation is done using Transifex platform: https://www.transifex.com/otf/qubes/ +The `_translated` directory should not be modified manually. Any manual change +there **will be overriden** with the content downloaded from Transifex. + +The `qubes-translated` repository is not signed and generally should not be +considered trusted for sensitive tasks. But the specific commit referenced from +this repository is validated to not interfere with English website. + +### Transifex integration details ### + +Most of the integration is automated. It is split into few parts: + +1. `_utils/transifex-push` script takes the source (English) content and + uploads to Transifex. The platform merges existing translations to the new + files, so unchanged parts that were translated before remain translated. + Transifex configuration is created from scratch here, to correctly handle + new/removed files. + +2. `_utils/transifex-pull` pulls translated content and places into + `_translated` submodule. Then a set of scripts in + `_utils/_translation_utils` perform various post processing steps, + including: + - validate syntax of retrieved files (if frontmatter is still correctly set etc) + - modify frontmatter settings (`permalink`, `lang`, `redirect_from` etc) to + match the page language + - adjust all internal links to point at pages in the same language + - run htmlproofer to verify if no broken links were introduced + + At the end, the script commit and push the new content to qubes-translated + repository. + +3. `_utils/update-translated` fetches new version of qubes-translated repo (its + master branch), verifies if any page doesn't try to subvert English version, + and if all is fine, makes a commit and push updated submodule (similar to + `_utils/update-submodules` script). + +The points 1 and 2 are running in Gitlab CI environment, without access to any +signing key and with push access only to qubes-translated repository. The third +point is running in a more trusted environment, with access to signing key and +push access to the main repository. + +### Language switcher ### + +The top level `_config.yml` file contains list of languages to be enabled. If +there is more than one (`en`), each page will have a language switch menu in +the top right corner. Only languages listed in `_config.yml` are visible in the +switcher, but there may be more available (accessing them require manually +changing language code in the URL). + +Each markdown file in the repo has `lang` and `ref` attributes (in its +frontmatter). `lang` attribute contains the language of this file +(should always be `en` outside of qubes-translated repository) and `ref` +contains a unique identifier of that page. Language switcher logic uses the +`ref` attribute to find all translations of given page. This allows translated +page to have different page name in URL, although we do not do this right now. + +`lang` and `ref` attributes are added with +`_utils/_translation_utils/prepare_for_translation.py` script. + Deprecated Documentation ------------------------ From 9a489d6a3fa085784125f426bc148bab4504f3c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Wed, 31 Mar 2021 19:49:17 +0200 Subject: [PATCH 11/19] autoupdate: _translated --- _translated | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_translated b/_translated index d3b9b03c..03e12c91 160000 --- a/_translated +++ b/_translated @@ -1 +1 @@ -Subproject commit d3b9b03c1e480a9a53ffd46309c711a4dc0b6f08 +Subproject commit 03e12c911da1c0a122db5373fddf11e37c282d3f From 1224747082733cd80be5ace46206cb380a2d698e Mon Sep 17 00:00:00 2001 From: Tobias Killer Date: Thu, 1 Apr 2021 05:32:01 +0200 Subject: [PATCH 12/19] Add `Translate This Page` button (rebased onto newer doc-widget.html by @marmarek) --- _data/includes.yml | 2 ++ _includes/doc-widget.html | 9 ++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/_data/includes.yml b/_data/includes.yml index bce18dbc..bfc71c79 100644 --- a/_data/includes.yml +++ b/_data/includes.yml @@ -11,6 +11,8 @@ icon: fa-code - text: Edit This Page icon: fa-code-fork + - text: Translate This Page + icon: fa-language - url: /security/ text: Report a Security Issue icon: fa-lock diff --git a/_includes/doc-widget.html b/_includes/doc-widget.html index 3bef12ae..c605a2a2 100644 --- a/_includes/doc-widget.html +++ b/_includes/doc-widget.html @@ -5,9 +5,14 @@ {% if page.lang == nil or page.lang == "en" %} {% assign master_edit = site.project_repo_path | append: "/qubes-doc/edit/master" | append: page.relative_path | remove_first: '_doc' %} {% assign master_blob = site.project_repo_path | append: "/qubes-doc/blob/master" | append: page.relative_path | remove_first: '_doc' %} + {% assign pagelang = "" %} + {% assign transifexresource = page.relative_path | replace: '_doc/', 'doc/' | remove: ".md" | replace: "/", "_" %} {% else %} {% assign master_edit = site.project_repo_path | append: "/qubes-translated/edit/master" | append: page.relative_path | remove_first: '_translated' %} - {% assign master_blob = site.project_repo_path | append: "/qubes-translated/blob/master" | append: page.relative_path| remove_first: '_translated' %} + {% assign master_blob = site.project_repo_path | append: "/qubes-translated/blob/master" | append: page.relative_path | remove_first: '_translated' %} + {% assign pagelang = page.lang %} + {% assign prefix = "_translated/" | append: pagelang | append: '/' %} + {% assign transifexresource = page.relative_path | remove_first: prefix | replace: '_doc/', 'doc/' | remove: ".md" | replace: "/", "_" %} {% endif %} {% for item in docs.links %} @@ -20,6 +25,8 @@ {% assign a_href = master_blob %} {% elsif item.icon == "fa-code-fork" %} {% assign a_href = master_edit %} + {% elsif item.icon == "fa-language" %} + {% assign a_href = "https://www.transifex.com/otf/qubes/translate/#" | append: pagelang | append: "/" | append: transifexresource %} {% else %} {% assign a_href = item.url %} {% endif %} From 6a2d844af3e40ef21c18e28cbaf5471c1c03ed81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Thu, 1 Apr 2021 05:39:59 +0200 Subject: [PATCH 13/19] Skip "Edit This Page" button on translated pages qubes-translated repo should not be modified directly. --- _includes/doc-widget.html | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/_includes/doc-widget.html b/_includes/doc-widget.html index c605a2a2..f45dc741 100644 --- a/_includes/doc-widget.html +++ b/_includes/doc-widget.html @@ -24,6 +24,10 @@ {% if item.icon == "fa-code" %} {% assign a_href = master_blob %} {% elsif item.icon == "fa-code-fork" %} + {% if lang != "" %} + + {% continue %} + {% endif %} {% assign a_href = master_edit %} {% elsif item.icon == "fa-language" %} {% assign a_href = "https://www.transifex.com/otf/qubes/translate/#" | append: pagelang | append: "/" | append: transifexresource %} From dbff3b4ef65e1fae0a557ebb735b39f264f8d95c Mon Sep 17 00:00:00 2001 From: Tobias Killer Date: Sun, 20 Jun 2021 04:38:37 +0200 Subject: [PATCH 14/19] Update merge_md_heading_ids.rb script --- .../merge_md_heading_ids.rb | 95 ++++++++++++------- 1 file changed, 63 insertions(+), 32 deletions(-) diff --git a/_utils/_translation_utils/merge_md_heading_ids.rb b/_utils/_translation_utils/merge_md_heading_ids.rb index c85f7933..1f59df76 100644 --- a/_utils/_translation_utils/merge_md_heading_ids.rb +++ b/_utils/_translation_utils/merge_md_heading_ids.rb @@ -94,10 +94,21 @@ def extract_headline_id(rendered_html_lines, l, c) -def try_create_id(gfm_lines, line_number, this_line, next_line, rendered_html_lines, placeholder) +def try_get_headline_column_and_line(gfm_lines, line_number, placeholder) # save headline saved_headline = gfm_lines[line_number] + this_line = gfm_lines[line_number].to_s + if this_line.eql? "" + return nil + end + + if line_number < gfm_lines.length - 1 + next_line = gfm_lines[line_number + 1].to_s + else + next_line = "" + end + hl = nil if this_line.start_with?('#') @@ -113,11 +124,7 @@ def try_create_id(gfm_lines, line_number, this_line, next_line, rendered_html_li # revert headline gfm_lines[line_number] = saved_headline - if hl == nil - return nil - end - - return extract_headline_id(rendered_html_lines, hl.l, hl.c) + return hl end @@ -147,8 +154,8 @@ def generate_unique_placeholder(rendered_html_lines) -def create_line_to_id_map(gfm_lines) - result = {} +def create_id_list(gfm_lines) + result = [] gfm_lines2 = gfm_lines[0..-1] rendered_html_lines = render(gfm_lines) @@ -156,15 +163,11 @@ def create_line_to_id_map(gfm_lines) # line-by-line: assume a headline n = gfm_lines2.length - for i in 0..(n - 1) - this_line = gfm_lines2[i] - next_line = '' - if i < n - 1 - next_line = gfm_lines2[i + 1] - end - hid = try_create_id(gfm_lines2, i, this_line, next_line, rendered_html_lines, placeholder) - if hid != nil - result[i] = hid + for line_number in 0..(n - 1) + hl = try_get_headline_column_and_line(gfm_lines2, line_number, placeholder) + if hl != nil + hid = extract_headline_id(rendered_html_lines, hl.l, hl.c) + result = result + [hid] end end return result @@ -172,22 +175,44 @@ def create_line_to_id_map(gfm_lines) -def insert_ids_to_gfm_file(line_to_id_map, gfm_lines) +def is_a_headline(gfm_lines, line_number, placeholder) + return try_get_headline_column_and_line(gfm_lines, line_number, placeholder) != nil +end + + + +def insert_ids_into_gfm_file(id_list, gfm_lines) result = gfm_lines[0..-1] + if id_list.length == 0 + return result + end n = result.length - line_to_id_map.each do |key, value| - str_to_insert = '' + "\n" - line = result[key] - if !line.nil? and line.start_with?('#') - if key + 1 >= n - result = result + [''] + rendered_html_lines = render(gfm_lines) + placeholder = generate_unique_placeholder(rendered_html_lines) + id_index = 0 + + for line_number in 0..(gfm_lines.length - 1) + if is_a_headline(gfm_lines, line_number, placeholder) + id = id_list[id_index] + if id != nil + str_to_insert = '' + "\n" + line = result[line_number] + if !line.nil? and line.start_with?('#') + if line_number + 1 >= n + result = result + [''] + end + result[line_number + 1] = str_to_insert.to_s + result[line_number + 1].to_s + else + if line_number + 2 >= n + result = result + [''] + end + result[line_number + 2] = str_to_insert.to_s + result[line_number + 2].to_s + end end - result[key + 1] = str_to_insert.to_s + result[key + 1].to_s - else - if key + 2 >= n - result = result + [''] + id_index += 1 + if id_index >= id_list.length + break end - result[key + 2] = str_to_insert.to_s + result[key + 2].to_s end end return result @@ -216,11 +241,11 @@ def merge_ids_in_gfm_files(orig_gfm_lines, trl_gfm_lines) # get body from orig orig_body = orig_gfm_lines[orig_end..-1] - # create line-to-id map - orig_line_to_id_map = create_line_to_id_map(orig_body) + # create id list + orig_id_list = create_id_list(orig_body) # insert ids - preresult = insert_ids_to_gfm_file(orig_line_to_id_map, trl_body) + preresult = insert_ids_into_gfm_file(orig_id_list, trl_body) # create translated document with adapted body result_trl_gfm = trl_yaml_front_matter.join + preresult.join @@ -300,5 +325,11 @@ def main() if __FILE__ == $0 main() + + # --- for debugging + # orig_gfm_lines = read_file(ARGV[0]) + # trl_gfm_lines = read_file(ARGV[1]) + # result = merge_ids_in_gfm_files(orig_gfm_lines, trl_gfm_lines) + # write_file(result, '/dev/stdout') end From 4da83d1bd83d5d8a016b6f4ddb3f57eab0d99477 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Tue, 22 Jun 2021 23:25:44 +0200 Subject: [PATCH 15/19] Make Q logo link to the main page in the current language --- _includes/header.html | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/_includes/header.html b/_includes/header.html index a19c4918..90427c55 100644 --- a/_includes/header.html +++ b/_includes/header.html @@ -34,7 +34,12 @@
{% endif %} - + {% if page.lang == nil or page.lang == "" or page.lang == "en" %} + {% assign linktohome = "/" %} + {% else %} + {% assign linktohome = "/" | append: page.lang | append: "/" %} + {% endif %} + Qubes OS Project Qubes OS From 84867649bb015d1daa46dcdee0746e495c974d25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Wed, 23 Jun 2021 00:33:26 +0200 Subject: [PATCH 16/19] translation: remove old translations Remove translated files when the source file get removed. This includes also renaming files. --- .../_translation_utils/post_transifex_pull.sh | 2 + .../remove_obsolete_files.py | 40 +++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 _utils/_translation_utils/remove_obsolete_files.py diff --git a/_utils/_translation_utils/post_transifex_pull.sh b/_utils/_translation_utils/post_transifex_pull.sh index 23676533..fc8cd2d9 100644 --- a/_utils/_translation_utils/post_transifex_pull.sh +++ b/_utils/_translation_utils/post_transifex_pull.sh @@ -10,6 +10,8 @@ echo "============================ post processing step 1 ====================== #read b bash _utils/_translation_utils/prepare_tx_config_postprocess.sh .tx/config /tmp/tx-mapping +echo "============================ remove obsolete files =======================================" +python3 _utils/_translation_utils/remove_obsolete_files.py "$1" "$2" /tmp/tx-mapping echo "============================ post processing step 2 ======================================" #read b diff --git a/_utils/_translation_utils/remove_obsolete_files.py b/_utils/_translation_utils/remove_obsolete_files.py new file mode 100644 index 00000000..44743f8f --- /dev/null +++ b/_utils/_translation_utils/remove_obsolete_files.py @@ -0,0 +1,40 @@ +#!/usr/bin/python3 + +import argparse +import os +import sys + +parser = argparse.ArgumentParser() +parser.add_argument('lang') +parser.add_argument('translation_dir') +parser.add_argument('tx_mapping') + +def main(): + args = parser.parse_args() + + valid_files = set() + with open(args.tx_mapping) as f_mapping: + for line in f_mapping.readlines(): + if line.startswith('file_filter = '): + valid_files.add(line.strip().split(' = ')[1].replace('', args.lang)) + + if not valid_files: + print('No files found in {}, aborting!'.format(args.tx_mapping)) + return 1 + + existing_files = set() + for dirpath, dirs, files in os.walk(args.translation_dir): + existing_files.update(os.path.join(dirpath, name) for name in files) + + if not existing_files: + print('No files found in {}, aborting!'.format(args.translation_dir)) + return 1 + + for obsolete in existing_files.difference(valid_files): + print('Removing {}'.format(obsolete)) + os.unlink(obsolete) + + +if __name__ == '__main__': + sys.exit(main()) + From 94c246a3aef6147d812668e0e06512e124c6b610 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Thu, 24 Jun 2021 04:39:04 +0200 Subject: [PATCH 17/19] Clarify variable name in lang switcher --- _includes/header.html | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/_includes/header.html b/_includes/header.html index 90427c55..37e3521b 100644 --- a/_includes/header.html +++ b/_includes/header.html @@ -21,14 +21,15 @@ {% assign langmenu = false %} {% endif %} {% if langmenu %} - {% assign posts = site.pages | concat: site.doc | concat: site.translated | where:'ref', page.ref | sort: 'lang' %} + + {% assign page_all_langs = site.pages | concat: site.doc | concat: site.translated | where:'ref', page.ref | sort: 'lang' %}
- {% for post in posts %} - {% if site.languages contains post.lang %} - {{ post.lang }} + {% for translated in page_all_langs %} + {% if site.languages contains translated.lang %} + {{ translated.lang }} {% endif %} {% endfor %}
@@ -57,8 +58,8 @@
- {% for post in posts %} - {{ post.lang }} + {% for translated in page_all_langs %} + {{ translated.lang }} {% endfor %}
From 81147a6e435cc8ba808908f6a31443bb01da7a3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Thu, 24 Jun 2021 05:00:01 +0200 Subject: [PATCH 18/19] translation: Move site verification after downloading all languages Do not attempt to verify correctness of the website before refreshing all languages. If any structural change happened (rename, permalink change etc), it will fail because of the old files, not the freshly downloaded ones. Make htmlproofer postprocessing script language agnostic. This is mostly about removing "language" parameter, which wasn't used anyway. --- _utils/_translation_utils/check_all_langs.sh | 28 +++++++++++++++++++ .../_translation_utils/post_transifex_pull.sh | 23 --------------- .../postprocess_htmlproofer.py | 21 ++++---------- _utils/transifex-pull | 2 ++ 4 files changed, 36 insertions(+), 38 deletions(-) create mode 100644 _utils/_translation_utils/check_all_langs.sh diff --git a/_utils/_translation_utils/check_all_langs.sh b/_utils/_translation_utils/check_all_langs.sh new file mode 100644 index 00000000..3755042c --- /dev/null +++ b/_utils/_translation_utils/check_all_langs.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# to be run from the git root +# $1 is directory where translated files reside and language needs to be added to internal urls +# TODO param check + +set -e + +echo "================================= build site ==================================" +#read b +bundle exec jekyll b + +all_ok=true +echo "================================= run htmlproofer ===============================" +htmlproofer ./_site --disable-external --checks-to-ignore ImageCheck --file-ignore "./_site/video-tours/index.html,./_site/.*/video-tours/index.html" --url-ignore "/qubes-issues/" --log-level debug 2&> /tmp/html.output || all_ok=false + +# exit here if all is ok +if $all_ok; then + echo 'All checks passed!' + exit +fi + +echo "================================== as a last resort in case of errors process html proofer errors =================================" +python3 _utils/_translation_utils/postprocess_htmlproofer.py /tmp/html.output "$1" + +echo "================================= build the site and run htmlproofer ====================================" +rm -rf ./_site/ +bundle exec jekyll b +htmlproofer ./_site --disable-external --checks-to-ignore ImageCheck --file-ignore "./_site/video-tours/index.html,./_site/.*/video-tours/index.html" --url-ignore "/qubes-issues/" --log-level debug diff --git a/_utils/_translation_utils/post_transifex_pull.sh b/_utils/_translation_utils/post_transifex_pull.sh index fc8cd2d9..9e6f6f56 100644 --- a/_utils/_translation_utils/post_transifex_pull.sh +++ b/_utils/_translation_utils/post_transifex_pull.sh @@ -25,26 +25,3 @@ python3 _utils/_translation_utils/postprocess_translation.py "$1" "$2" /tmp/tx- echo "============================ post processing step 4 press to cont ======================================" #read b bash _utils/_translation_utils/postprocess_translation.sh "$1" "$2" /tmp/translated_href_urls.txt - - -echo "================================= build site ==================================" -#read b -bundle exec jekyll b - -all_ok=true -echo "================================= run htmlproofer ===============================" -htmlproofer ./_site --disable-external --checks-to-ignore ImageCheck --file-ignore "./_site/video-tours/index.html,./_site/$1/video-tours/index.html" --url-ignore "/qubes-issues/" --log-level debug 2&> /tmp/html.output || all_ok=false - -# exit here if all is ok -if $all_ok; then - echo 'All checks passed!' - exit -fi - -echo "================================== as a last resort in case of errors process html proofer errors =================================" -python3 _utils/_translation_utils/postprocess_htmlproofer.py "$1" /tmp/html.output "$2" - -echo "================================= build the site and run htmlproofer ====================================" -rm -rf ./_site/ -bundle exec jekyll b -htmlproofer ./_site --disable-external --checks-to-ignore ImageCheck --file-ignore "./_site/video-tours/index.html,./_site/$1/video-tours/index.html" --url-ignore "/qubes-issues/" --log-level debug diff --git a/_utils/_translation_utils/postprocess_htmlproofer.py b/_utils/_translation_utils/postprocess_htmlproofer.py index eb6a587b..e73d98d9 100644 --- a/_utils/_translation_utils/postprocess_htmlproofer.py +++ b/_utils/_translation_utils/postprocess_htmlproofer.py @@ -1,8 +1,7 @@ #!/usr/bin/python3 ''' -python _utils/_translation_utils/postprocess_htmlproofer.py +python _utils/_translation_utils/postprocess_htmlproofer.py invoke: python _utils/_translation_utils/postprocess_htmlproofer.py de /tmp/html.output _translated/de/ -[de]: translation language [/tmp/html.output]: output from htmlproofer [_translated/de/]: the directory with the downloaded translated files from transifex ''' @@ -123,12 +122,11 @@ def process_markdown(translated_file, internal_links): -def get_all_translated_permalinks_and_redirects_to_file_mapping(translated_dir, lang): +def get_all_translated_permalinks_and_redirects_to_file_mapping(translated_dir): """ - traverse the already updated (via tx pull) root directory with all the translated files for a specific language - and get their permalinks and redirects without the specific language - translated_dir: root directory with all the translated files for a specific language - lang: the specific language + traverse the already updated (via tx pull) root directory with all the translated files + and get their permalinks and redirects + translated_dir: root directory with all the translated files return: set holding the translated permalinks and redirects """ mapping = {} @@ -246,8 +244,6 @@ def process_yml(translated, errorlinks): if __name__ == '__main__': # python _utils/_translation_utils/postprocess_htmlproofer.py de /tmp/html.output _translated/de/ parser = ArgumentParser() - # for which language should we do this - parser.add_argument("language") # the file containing the output of htmlproofer parser.add_argument("htmlproofer_output") # the directory containing the translated (downloaded via tx pull) files @@ -262,11 +258,6 @@ def process_yml(translated, errorlinks): logger.error("please check your translated directory") exit(1) - if not args.language in TRANSLATED_LANGS: - print("language not in the expected translation languages") - logger.error("please check your translation language") - exit(1) - if not isfile(args.htmlproofer_output): print("please check your html proofer output file") logger.error("please check your html proofer output file") @@ -293,7 +284,7 @@ def process_yml(translated, errorlinks): logger.debug("------------------------------------------------") logger.debug("------------------------------------------------") - mapping, yml_files = get_all_translated_permalinks_and_redirects_to_file_mapping(args.translated_dir, args.language) + mapping, yml_files = get_all_translated_permalinks_and_redirects_to_file_mapping(args.translated_dir) log_debug('mapping ', mapping) diff --git a/_utils/transifex-pull b/_utils/transifex-pull index 6c149cb1..282262c6 100755 --- a/_utils/transifex-pull +++ b/_utils/transifex-pull @@ -23,6 +23,8 @@ for lang in "$@"; do bash _utils/_translation_utils/post_transifex_pull.sh "$lang" _translated/"$lang" done +bash _utils/_translation_utils/check_all_langs.sh _translated + # switch to ssh for push git -C _translated remote set-url origin git@github.com:QubesOS/qubes-translated From 67520a6ec02f57a7a2e882efb2633b10e9211b2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Wed, 31 Mar 2021 19:50:19 +0200 Subject: [PATCH 19/19] [DO NOT MERGE] enable languages: de fr es --- _config.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/_config.yml b/_config.yml index 0de5e68a..4d5640b0 100644 --- a/_config.yml +++ b/_config.yml @@ -25,6 +25,9 @@ excerpt_separator: "" # remember to create symlinks in _data/translation too languages: - en +- de +- fr +- es plugins: - jekyll-redirect-from