diff --git a/_includes/team.html b/_includes/team.html
index 06c3fef8..c3fea148 100644
--- a/_includes/team.html
+++ b/_includes/team.html
@@ -6,7 +6,7 @@
{% assign emeritus = team-page | where_exp: "item", "item.htmlsection == 'emeritus'" | first %}
{% assign community = team-page | where_exp: "item", "item.htmlsection == 'community'" | first %}
{% assign team_link = lang | append: "/team/" %}
-{% assign teams = site.pages | concat: site.qubes-translated | where:'permalink', team_link %}
+{% assign teams = site.pages | concat: site.translated | where:'permalink', team_link %}
{% if teams.size == 0 %}
{% assign team_link = "/team/" %}
{% endif %}
diff --git a/_translated b/_translated
new file mode 160000
index 00000000..03e12c91
--- /dev/null
+++ b/_translated
@@ -0,0 +1 @@
+Subproject commit 03e12c911da1c0a122db5373fddf11e37c282d3f
diff --git a/_utils/_translation_utils/COUNTER.txt b/_utils/_translation_utils/COUNTER.txt
new file mode 100644
index 00000000..f37b177f
--- /dev/null
+++ b/_utils/_translation_utils/COUNTER.txt
@@ -0,0 +1 @@
+current counter: 251
diff --git a/_utils/_translation_utils/check_all_langs.sh b/_utils/_translation_utils/check_all_langs.sh
new file mode 100644
index 00000000..3755042c
--- /dev/null
+++ b/_utils/_translation_utils/check_all_langs.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+# to be run from the git root
+# $1 is directory where translated files reside and language needs to be added to internal urls
+# TODO param check
+
+set -e
+
+echo "================================= build site =================================="
+#read b
+bundle exec jekyll b
+
+all_ok=true
+echo "================================= run htmlproofer ==============================="
+htmlproofer ./_site --disable-external --checks-to-ignore ImageCheck --file-ignore "./_site/video-tours/index.html,./_site/.*/video-tours/index.html" --url-ignore "/qubes-issues/" --log-level debug 2&> /tmp/html.output || all_ok=false
+
+# exit here if all is ok
+if $all_ok; then
+ echo 'All checks passed!'
+ exit
+fi
+
+echo "================================== as a last resort in case of errors process html proofer errors ================================="
+python3 _utils/_translation_utils/postprocess_htmlproofer.py /tmp/html.output "$1"
+
+echo "================================= build the site and run htmlproofer ===================================="
+rm -rf ./_site/
+bundle exec jekyll b
+htmlproofer ./_site --disable-external --checks-to-ignore ImageCheck --file-ignore "./_site/video-tours/index.html,./_site/.*/video-tours/index.html" --url-ignore "/qubes-issues/" --log-level debug
diff --git a/_utils/_translation_utils/merge_md_heading_ids.py b/_utils/_translation_utils/merge_md_heading_ids.py
new file mode 100644
index 00000000..df82153b
--- /dev/null
+++ b/_utils/_translation_utils/merge_md_heading_ids.py
@@ -0,0 +1,213 @@
+#!/usr/bin/python3
+# This is a script provided by TokiDev
+# https://github.com/tokideveloper/langswitch-prototype/blob/master/_utils/merge_md_heading_ids.py
+
+import sys
+import re
+import subprocess
+
+
+def get_yaml_front_matter(gfm_lines):
+ counter = 0
+ start = 0
+ end = 0
+ for i in range(len(gfm_lines)):
+ if gfm_lines[i] == '---\n':
+ counter += 1
+ if counter == 1:
+ start = i
+ elif counter == 2:
+ end = i + 1
+ return gfm_lines[start:end], start, end
+ if counter == 1:
+ return gfm_lines[start:], start, len(gfm_lines)
+ # case counter == 0:
+ return [], 0, 0
+
+
+
+def line_only_made_of(line, char):
+ length = len(line)
+ for i in range(length - 1):
+ if line[i] != char:
+ return False
+ return line[length - 1] == '\n'
+
+
+
+def render(gfm_lines):
+ p = subprocess.run(['kramdown'], stdout=subprocess.PIPE, input=''.join(gfm_lines), encoding='utf8')
+ if p.returncode != 0:
+ return None
+ return p.stdout.splitlines(1)
+
+
+
+def look_for_headline(rendered_html_lines, headline_id):
+ for l in range(len(rendered_html_lines)):
+ x = re.search('', rendered_html_lines[l])
+ if x is None:
+ continue
+ c = x.start()
+ if c is None:
+ continue
+ else:
+ return l, c
+ return None
+
+
+
+def extract_headline_id(rendered_html_lines, l, c):
+ line = rendered_html_lines[l]
+ line = line[c:]
+ x = re.search(' 0:
+ return None
+ span = x.span()
+ line = line[(span[1] - span[0]):]
+ end = line.find('"')
+ line = line[:end]
+ return line
+
+
+
+def try_create_id(gfm_lines, line_number, this_line, next_line, rendered_html_lines, placeholder):
+ # save headline
+ saved_headline = gfm_lines[line_number]
+
+ hl = None
+
+ if this_line.startswith('#'):
+ # headline starting with '#'
+ gfm_lines[line_number] = '# ' + placeholder + '\n'
+ hl = look_for_headline(render(gfm_lines), placeholder)
+ elif len(next_line) >= 3 and (line_only_made_of(next_line, '=') or line_only_made_of(next_line, '-')):
+ # headline starting with '===' or '---'
+ gfm_lines[line_number] = placeholder + '\n'
+ hl = look_for_headline(render(gfm_lines), placeholder)
+
+ # revert headline
+ gfm_lines[line_number] = saved_headline
+
+ if hl is None:
+ return None
+
+ hl_line, hl_col = hl
+ return extract_headline_id(rendered_html_lines, hl_line, hl_col)
+
+
+
+def generate_unique_placeholder(rendered_html_lines):
+ number = 0
+ PREFIX = 'xq'
+ SUFFIX = 'z'
+ result = ''
+ while True:
+ result = PREFIX + str(number) + SUFFIX
+ solution_found = True
+ for line in rendered_html_lines:
+ if result in line:
+ number += 1
+ solution_found = False
+ break
+ if solution_found:
+ break
+ # we assume that there will be at least one solution
+ return result
+
+
+
+def create_line_to_id_map(gfm_lines):
+ result = {}
+ gfm_lines2 = gfm_lines[:]
+ rendered_html_lines = render(gfm_lines)
+
+ placeholder = generate_unique_placeholder(rendered_html_lines)
+
+ # line-by-line: assume a headline
+ n = len(gfm_lines2)
+ for i in range(n):
+ this_line = gfm_lines2[i]
+ next_line = ''
+ if i < n - 1:
+ next_line = gfm_lines2[i + 1]
+ hid = try_create_id(gfm_lines2, i, this_line, next_line, rendered_html_lines, placeholder)
+ if hid is not None:
+ result[i] = hid
+
+ return result
+
+
+
+def insert_ids_to_gfm_file(line_to_id_map, gfm_lines):
+ result = gfm_lines[:]
+ n = len(result)
+ for key, value in line_to_id_map.items():
+ str_to_insert = ''
+ line = result[key]
+ if line.startswith('#'):
+ if key + 1 >= n:
+ result = result + ['']
+ result[key + 1] = str_to_insert + result[key + 1]
+ else:
+ if key + 2 >= n:
+ result = result + ['']
+ result[key + 2] = str_to_insert + result[key + 2]
+ return result
+
+
+
+def merge_ids_in_gfm_files(orig_gfm_lines, trl_gfm_lines):
+ # assuming that both files match line by line such that matching headlines are in the same lines
+
+ # get yaml front matter from orig
+ orig_yaml_front_matter, orig_start, orig_end = get_yaml_front_matter(orig_gfm_lines)
+
+ # get yaml front matter from trl
+ trl_yaml_front_matter, trl_start, trl_end = get_yaml_front_matter(trl_gfm_lines)
+
+ # get body from trl
+ trl_body = trl_gfm_lines[trl_end:]
+
+ # get body from orig
+ orig_body = orig_gfm_lines[orig_end:]
+
+ # create line-to-id map
+ orig_line_to_id_map = create_line_to_id_map(orig_body)
+
+ # insert ids
+ preresult = insert_ids_to_gfm_file(orig_line_to_id_map, trl_body)
+
+ # create translated document with adapted body
+ result_trl_gfm = ''.join(trl_yaml_front_matter) + ''.join(preresult)
+
+ return result_trl_gfm
+
+
+def write_lines(content, filename):
+ with open(filename,'w') as f:
+ f.write(content)
+
+def read_lines(filename):
+ with open(filename, 'r') as f:
+ lines = f.readlines()
+ return lines
+
+def process_headers(mapping):
+
+ for key, item in mapping.items():
+ if not item.endswith('.yml'):
+ original_lines = read_lines(key)
+ translated_lines = read_lines(item)
+ # merge ids in gfm files
+ print(key)
+
+ result = merge_ids_in_gfm_files(original_lines, translated_lines)
+ write_lines(result, item)
+
+
diff --git a/_utils/_translation_utils/merge_md_heading_ids.rb b/_utils/_translation_utils/merge_md_heading_ids.rb
new file mode 100644
index 00000000..1f59df76
--- /dev/null
+++ b/_utils/_translation_utils/merge_md_heading_ids.rb
@@ -0,0 +1,335 @@
+#!/usr/bin/env ruby
+
+require 'kramdown'
+
+
+
+YamlFrontMatter = Struct.new(:yaml_lines, :startl, :endl)
+
+def get_yaml_front_matter(gfm_lines)
+ counter = 0
+ startl = 0
+ endl = 0
+ for i in 0..(gfm_lines.length - 1)
+ if gfm_lines[i] == "---\n"
+ counter += 1
+ if counter == 1
+ startl = i
+ elsif counter == 2
+ endl = i + 1
+ result = YamlFrontMatter.new
+ result.yaml_lines = gfm_lines[startl..(endl - 1)]
+ result.startl = startl
+ result.endl = endl
+ return result
+ end
+ end
+ end
+ if counter == 1
+ result = YamlFrontMatter.new
+ result.yaml_lines = gfm_lines[startl..-1]
+ result.startl = startl
+ result.endl = gfm_lines.length
+ return result
+ end
+ # case counter == 0:
+ result = YamlFrontMatter.new
+ result.yaml_lines = []
+ result.startl = 0
+ result.endl = 0
+ return result
+end
+
+
+
+def line_only_made_of(line, char)
+ length = line.length
+ for i in 0..(length - 2)
+ if line[i] != char
+ return false
+ end
+ end
+ return line[length - 1] == "\n"
+end
+
+
+
+def render(gfm_lines)
+ Kramdown::Document.new(gfm_lines.join).to_html.lines
+end
+
+
+
+LineColumn = Struct.new(:l, :c)
+
+def look_for_headline(rendered_html_lines, headline_id)
+ for l in 0..(rendered_html_lines.length - 1)
+ m = rendered_html_lines[l].scan(//)
+ if m.length > 0
+ c = rendered_html_lines[l].index(m[0])
+ result = LineColumn.new
+ result.l = l
+ result.c = c
+ return result
+ end
+ end
+ return nil
+end
+
+
+
+def extract_headline_id(rendered_html_lines, l, c)
+ line = rendered_html_lines[l]
+ line = line[c..-1]
+ m = line.scan(/= 3 and (line_only_made_of(next_line, '=') or line_only_made_of(next_line, '-'))
+ # headline starting with '===' or '---'
+ gfm_lines[line_number] = placeholder + "\n"
+ hl = look_for_headline(render(gfm_lines), placeholder)
+ end
+
+ # revert headline
+ gfm_lines[line_number] = saved_headline
+
+ return hl
+end
+
+
+
+def generate_unique_placeholder(rendered_html_lines)
+ number = 0
+ prefix = 'xq'
+ suffix = 'z'
+ result = ''
+ while true do
+ result = prefix + number.to_s + suffix
+ solution_found = true
+ for line in rendered_html_lines
+ if line.include? result
+ number += 1
+ solution_found = false
+ break
+ end
+ end
+ if solution_found
+ break
+ end
+ end
+ # we assume that there will be at least one solution
+ return result
+end
+
+
+
+def create_id_list(gfm_lines)
+ result = []
+ gfm_lines2 = gfm_lines[0..-1]
+ rendered_html_lines = render(gfm_lines)
+
+ placeholder = generate_unique_placeholder(rendered_html_lines)
+
+ # line-by-line: assume a headline
+ n = gfm_lines2.length
+ for line_number in 0..(n - 1)
+ hl = try_get_headline_column_and_line(gfm_lines2, line_number, placeholder)
+ if hl != nil
+ hid = extract_headline_id(rendered_html_lines, hl.l, hl.c)
+ result = result + [hid]
+ end
+ end
+ return result
+end
+
+
+
+def is_a_headline(gfm_lines, line_number, placeholder)
+ return try_get_headline_column_and_line(gfm_lines, line_number, placeholder) != nil
+end
+
+
+
+def insert_ids_into_gfm_file(id_list, gfm_lines)
+ result = gfm_lines[0..-1]
+ if id_list.length == 0
+ return result
+ end
+ n = result.length
+ rendered_html_lines = render(gfm_lines)
+ placeholder = generate_unique_placeholder(rendered_html_lines)
+ id_index = 0
+
+ for line_number in 0..(gfm_lines.length - 1)
+ if is_a_headline(gfm_lines, line_number, placeholder)
+ id = id_list[id_index]
+ if id != nil
+ str_to_insert = '' + "\n"
+ line = result[line_number]
+ if !line.nil? and line.start_with?('#')
+ if line_number + 1 >= n
+ result = result + ['']
+ end
+ result[line_number + 1] = str_to_insert.to_s + result[line_number + 1].to_s
+ else
+ if line_number + 2 >= n
+ result = result + ['']
+ end
+ result[line_number + 2] = str_to_insert.to_s + result[line_number + 2].to_s
+ end
+ end
+ id_index += 1
+ if id_index >= id_list.length
+ break
+ end
+ end
+ end
+ return result
+end
+
+
+
+def merge_ids_in_gfm_files(orig_gfm_lines, trl_gfm_lines)
+ # assuming that both files match line by line such that matching headlines are in the same lines
+
+ # get yaml front matter from orig
+ orig_yfm = get_yaml_front_matter(orig_gfm_lines)
+ orig_yaml_front_matter = orig_yfm.yaml_lines
+ orig_start = orig_yfm.startl
+ orig_end = orig_yfm.endl
+
+ # get yaml front matter from trl
+ trl_yfm = get_yaml_front_matter(trl_gfm_lines)
+ trl_yaml_front_matter = trl_yfm.yaml_lines
+ trl_start = trl_yfm.startl
+ trl_end = trl_yfm.endl
+
+ # get body from trl
+ trl_body = trl_gfm_lines[trl_end..-1]
+
+ # get body from orig
+ orig_body = orig_gfm_lines[orig_end..-1]
+
+ # create id list
+ orig_id_list = create_id_list(orig_body)
+
+ # insert ids
+ preresult = insert_ids_into_gfm_file(orig_id_list, trl_body)
+
+ # create translated document with adapted body
+ result_trl_gfm = trl_yaml_front_matter.join + preresult.join
+
+ return result_trl_gfm
+end
+
+def create_dict_from_tx_config(lang, mappingfile)
+ # read a tx.xonfig file containing only file_filter and source_file information store it in a dict and give it back
+ # mappingfile: a tx.xonfig file containing only file_filter and source_file information
+ # return: a dict containing a mapping between an original file and its downloaded tx translation
+ mapping = {}
+
+ lines = []
+ lines = read_file(mappingfile)
+
+ translated = []
+ source = []
+ n = lines.length
+ idx = 0
+ while idx < n do
+ t = lines[idx].split('file_filter =')[1].strip
+ s = lines[idx+1].split('source_file =')[1].strip
+ translated += ["./" + t.gsub("", lang)]
+ if idx >= n then
+ break
+ end
+ source += ["./" + s]
+ idx += 2
+ end
+
+ n = translated.length
+ idx = 0
+ while idx < n do
+ mapping[source[idx]] = translated[idx]
+ idx += 1
+ end
+
+ return mapping
+end
+
+def read_file(filename)
+ read_lines = []
+ File.open(filename, "r") do |f|
+ f.each_line do |line|
+ read_lines += [line]
+ end
+ end
+ return read_lines
+end
+
+def write_file(contents, filename)
+ read_lines = []
+ File.open(filename, "w") do |f|
+ f.write(contents)
+ end
+end
+
+def main()
+ if ARGV.length != 2
+ exit(1)
+ end
+
+ mapping = create_dict_from_tx_config(ARGV[0], ARGV[1])
+ mapping.each do |key, value|
+ if !key.end_with?(".yml")
+ orig_gfm_lines = read_file(key)
+ trl_gfm_lines = read_file(value)
+ # merge ids in gfm files
+ result = merge_ids_in_gfm_files(orig_gfm_lines, trl_gfm_lines)
+ write_file(result, value)
+ end
+ end
+
+end
+
+
+if __FILE__ == $0
+ main()
+
+ # --- for debugging
+ # orig_gfm_lines = read_file(ARGV[0])
+ # trl_gfm_lines = read_file(ARGV[1])
+ # result = merge_ids_in_gfm_files(orig_gfm_lines, trl_gfm_lines)
+ # write_file(result, '/dev/stdout')
+end
+
diff --git a/_utils/_translation_utils/post_transifex_pull.sh b/_utils/_translation_utils/post_transifex_pull.sh
new file mode 100644
index 00000000..9e6f6f56
--- /dev/null
+++ b/_utils/_translation_utils/post_transifex_pull.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+# to be run from the git root
+# $1 is lang
+# $2 is directory where translated files reside and language needs to be added to internal urls
+# TODO param check
+
+set -e
+
+echo "============================ post processing step 1 ======================================"
+#read b
+bash _utils/_translation_utils/prepare_tx_config_postprocess.sh .tx/config /tmp/tx-mapping
+
+echo "============================ remove obsolete files ======================================="
+python3 _utils/_translation_utils/remove_obsolete_files.py "$1" "$2" /tmp/tx-mapping
+
+echo "============================ post processing step 2 ======================================"
+#read b
+ruby _utils/_translation_utils/merge_md_heading_ids.rb "$1" /tmp/tx-mapping
+
+echo "============================ post processing step 3 press to cont ======================================"
+#read b
+python3 _utils/_translation_utils/postprocess_translation.py "$1" "$2" /tmp/tx-mapping /tmp/translated_href_urls.txt --yml
+
+
+echo "============================ post processing step 4 press to cont ======================================"
+#read b
+bash _utils/_translation_utils/postprocess_translation.sh "$1" "$2" /tmp/translated_href_urls.txt
diff --git a/_utils/_translation_utils/postprocess_htmlproofer.py b/_utils/_translation_utils/postprocess_htmlproofer.py
new file mode 100644
index 00000000..e73d98d9
--- /dev/null
+++ b/_utils/_translation_utils/postprocess_htmlproofer.py
@@ -0,0 +1,306 @@
+#!/usr/bin/python3
+'''
+python _utils/_translation_utils/postprocess_htmlproofer.py
+invoke: python _utils/_translation_utils/postprocess_htmlproofer.py de /tmp/html.output _translated/de/
+[/tmp/html.output]: output from htmlproofer
+[_translated/de/]: the directory with the downloaded translated files from transifex
+'''
+from frontmatter import Post, load, dump
+import yaml
+from io import open as iopen
+from re import search
+from sys import exit
+import sys
+from os import linesep, walk, environ
+from argparse import ArgumentParser
+from os.path import isfile, isdir
+from json import loads, dumps
+from logging import basicConfig, getLogger, DEBUG, Formatter, FileHandler
+
+
+SLASH = '/'
+# markdown frontmatter keys
+PERMALINK_KEY = 'permalink'
+REDIRECT_KEY = 'redirect_from'
+TRANSLATED_LANGS = ['de']
+if 'TRANSLATED_LANGS' in environ:
+ TRANSLATED_LANGS = environ['TRANSLATED_LANGS'].split()
+URL_KEY = 'url'
+
+
+basicConfig(level=DEBUG)
+logger = getLogger(__name__)
+LOG_FILE_NAME='/tmp/postprocess_htmlproofer.log'
+
+def configure_logging(logname):
+ handler = FileHandler(logname)
+ handler.setLevel(DEBUG)
+ formatter = Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+ handler.setFormatter(formatter)
+ logger.addHandler(handler)
+
+def log_debug(name, data):
+ logger.debug('############################################')
+ logger.debug('############################################')
+ logger.debug('###\t'+ name.capitalize() + '\t###')
+ logger.debug('--------------------------------------------')
+ if isinstance(data,dict):
+ logger.debug(dumps(data, indent=4))
+ else:
+ logger.debug(data)
+ logger.debug('############################################')
+ logger.debug('############################################')
+
+def get_new_line(line, internal_link, internal_links, permalink):
+ if internal_link in internal_links and internal_link.startswith("/"):
+ # TODO redundant
+ indd = internal_link.find('#')
+ internal_link_to_replace = internal_link[0:indd]
+ to_replace = line.replace(internal_link, internal_link_to_replace)
+ return to_replace
+ elif internal_link in internal_links and internal_link.startswith("#"):
+ to_replace = line.replace(internal_link, permalink)
+ return to_replace
+ else:
+ return line
+ #return None
+
+
+def process_markdown(translated_file, internal_links):
+ """
+ for every translated file discard the erroneous internal links
+ translated_file: marked and uploaded to transifex for translation, if not downloaded it will be printed out as a debug
+ internal_links:all internal links belonging to the translated_file that are erroneous according to htmlproofer
+ """
+ mdt = Post
+ try:
+ with iopen(translated_file) as t:
+ mdt = load(t)
+ lines = []
+ headings = []
+ permalink = mdt.get(PERMALINK_KEY)
+ if permalink == None:
+ permalink = '/'
+ for line in mdt.content.splitlines():
+ # gather information
+ inst = {}
+ if line.startswith("[") and "]:" in line:
+ s = line.find(":")
+ internal_link = line[s+1:len(line)].strip()
+ if internal_link in internal_links and internal_link.startswith("/"):
+ ind = line.rfind('#')
+ to_replace = line[0:ind]
+ lines.append(to_replace)
+ continue
+ if internal_link in internal_links and internal_link.startswith("#"):
+ to_replace = line.replace(internal_link, permalink)
+ lines.append(to_replace)
+ continue
+
+ if "[" in line and "](" in line and ")" in line:
+ count = line.count('](')
+ tmp = line
+ val = 0
+ for i in range(0, count):
+ s = line.find("](", val)
+ e = line.find(")", s + 1)
+ internal_link = line[s+2:e].strip().replace(')','')
+
+ line = get_new_line(line, internal_link, internal_links, permalink)
+ val = val + s + 1
+ lines.append(line)
+ continue
+ lines.append(line)
+
+ mdt.content = linesep.join(lines) + '\n'
+
+ with iopen(translated_file, 'wb') as replaced:
+ dump(mdt, replaced)
+
+ except FileNotFoundError as e:
+ logger.debug('Following file was not updated/downloaded from transifex: %s' % e.filename)
+
+
+
+def get_all_translated_permalinks_and_redirects_to_file_mapping(translated_dir):
+ """
+ traverse the already updated (via tx pull) root directory with all the translated files
+ and get their permalinks and redirects
+ translated_dir: root directory with all the translated files
+ return: set holding the translated permalinks and redirects
+ """
+ mapping = {}
+ perms = []
+ yml_files = []
+ for dirname, subdirlist, filelist in walk(translated_dir):
+ if dirname[0] == '.':
+ continue
+ for filename in filelist:
+ if filename[0] == '.':
+ continue
+ filepath = dirname + SLASH + filename
+ md = Post
+ with iopen(filepath) as fp:
+ md = load(fp)
+ if md.get(PERMALINK_KEY) != None:
+ perms.append(md.get(PERMALINK_KEY))
+ elif filepath.endswith('.yml'):
+ yml_files.append(filepath)
+ else:
+ logger.error('no permalink in frontmatter for file %s' % filename)
+ redirects = md.get(REDIRECT_KEY)
+ if redirects != None:
+ if isinstance(redirects,list):
+ for r in redirects:
+ perms.append(r)
+ elif isinstance(redirects,str):
+ perms.append(redirects)
+ else:
+ logger.error('ERRROR: unexpected in redirect_from: %s' % redirects)
+ exit(1)
+ else:
+ logger.debug('no redirect_from in frontmatter for file %s' % filepath)
+ mapping[filepath] = perms
+ perms = []
+ return mapping, yml_files
+
+
+# TODO simplify
+def get_error_output_from_htmlproofer(htmlproofer_output):
+ errors_tmp = []
+ with iopen(htmlproofer_output,'r') as h:
+ lines = h.readlines()
+ errors_tmp = [x for x in lines if not(x.startswith('Checking') or x.startswith('Ran') or x.startswith('Running') or x.startswith('\n') or x.startswith('htmlproofer'))]
+
+ count = 0
+ errors = {}
+ internal_link = []
+ u = ''
+ pattern = 'a href='
+ for i in range(len(errors_tmp)):
+ if pattern in errors_tmp[i]:
+ i1 = errors_tmp[i].find(pattern, 0)
+ i2 = errors_tmp[i].find('"', i1 + len(pattern))
+ i3 = errors_tmp[i].find('"', i2 +1 )
+
+ i_l = errors_tmp[i][i2+1:i3]
+ if '">' in i_l:
+ i_l = search('(.*)">', i_l).group(1)
+ internal_link.append(i_l)
+ count += 1
+ if './_site' in errors_tmp[i]:
+ if count > 0:
+ errors[u] = internal_link
+ internal_link = []
+ u = search('./_site(.*)index.html',errors_tmp[i]).group(1)
+ count = 0
+ errors[u] = internal_link
+ return errors
+
+def replace_url(to_replace, errorlinks):
+ """
+ recursively remove header from the URL in an yaml file.
+ to_replace: the translated yaml content as a dictionary
+ errorlinks: all internal links that are deadend and need to be cut off before # meaning get rid of the headers
+ """
+ if not isinstance(to_replace,dict):
+ return
+ for (k_r, v_r) in to_replace.items():
+ if isinstance(v_r, list):
+ for i in v_r:
+ replace_url(i, errorlinks)
+ elif URL_KEY == k_r:
+ val = to_replace[k_r]
+ if val is not None and '#' in val:
+ tmp_val = val[0:val.find('#')]
+ to_replace[URL_KEY]= tmp_val if (val in errorlinks) else val
+
+def process_yml(translated, errorlinks):
+ """
+ for every given source-translated yml file pair add the language to the urls if they belong to already translated files,
+ if not retain the original ones
+ translated: translated yml file
+ errorlinks: all internal links that are deadend and need to be cut off before # meaning get rid of the headers
+ """
+ docs = []
+ try:
+ with iopen(translated) as tp:
+ docs = yaml.safe_load(tp)
+ if docs == None:
+ logger.error("Empty translated file %s" %translated)
+ exit(1)
+ for a in docs:
+ replace_url(a, errorlinks)
+ except FileNotFoundError as e:
+ logger.debug('Following file was NOT updated/downloaded from transifex: %s' % e.filename)
+
+ try:
+ if len(docs)>0:
+ with iopen(translated, 'w') as replace:
+ yaml.dump(docs, replace, sort_keys=False)
+ except FileNotFoundError as e:
+ logger.debug('do nothing for file: %s. it is OK.' % e.filename)
+
+if __name__ == '__main__':
+ # python _utils/_translation_utils/postprocess_htmlproofer.py de /tmp/html.output _translated/de/
+ parser = ArgumentParser()
+ # the file containing the output of htmlproofer
+ parser.add_argument("htmlproofer_output")
+ # the directory containing the translated (downloaded via tx pull) files
+ parser.add_argument("translated_dir")
+ args = parser.parse_args()
+
+ configure_logging(LOG_FILE_NAME)
+
+
+ if not isdir(args.translated_dir):
+ print("please check your translated directory")
+ logger.error("please check your translated directory")
+ exit(1)
+
+ if not isfile(args.htmlproofer_output):
+ print("please check your html proofer output file")
+ logger.error("please check your html proofer output file")
+ sys.exit(1)
+
+ errors = get_error_output_from_htmlproofer(args.htmlproofer_output)
+
+ if not errors:
+ print("nothing to do, no errors to postprocess")
+ sys.exit(1)
+
+ logger.debug("------------------------------------------------")
+ logger.debug("------------------------------------------------")
+ logger.debug("------------------------------------------------")
+ logger.debug("-------------STRINGS TAGGED NOTRANSLATE---------")
+ logger.debug("------------------------------------------------")
+ logger.debug("------------------------------------------------")
+
+
+ error_links = list(sorted({el for val in errors.values() for el in val}))
+ log_debug("HTML ERRORS", errors)
+ log_debug("HTML ERRORS", error_links)
+ logger.debug("------------------------------------------------")
+ logger.debug("------------------------------------------------")
+ logger.debug("------------------------------------------------")
+
+ mapping, yml_files = get_all_translated_permalinks_and_redirects_to_file_mapping(args.translated_dir)
+
+
+ log_debug('mapping ', mapping)
+ log_debug('yml files ', yml_files)
+
+ file_to_internal_links = {}
+ for key, item in mapping.items():
+ for k, i in errors.items():
+ if k in item:
+ file_to_internal_links[key] = i
+
+ log_debug(" file to internal links mapping", file_to_internal_links)
+ for key, item in file_to_internal_links.items():
+ process_markdown(key, item)
+
+ # traverse all yml data files and cut the translated urls if they are in error_urls
+ for yml in yml_files:
+ process_yml(yml, error_links)
+
diff --git a/_utils/_translation_utils/postprocess_translation.py b/_utils/_translation_utils/postprocess_translation.py
new file mode 100644
index 00000000..db12a6ba
--- /dev/null
+++ b/_utils/_translation_utils/postprocess_translation.py
@@ -0,0 +1,450 @@
+#!/usr/bin/python3
+# adds language pattern in permalink line and all found relative links in the current open file recursively from a given root dir
+# evoke like: python _utils/postprocess_translation.py de _translated/de/ _utils/tx-mapping _utils/translated_hrefs_urls.txt --yml
+#param1 is the language in short form
+#param2 is the root translated dir
+#param3 is current transifex mapping between original and translated files in the format:
+# file_filter=
+# source_file=
+#param3 is the output of the script prepare_tx_config.sh
+#param4 is the name for the file containing all the permalinks of translated/downloaded via tx client files. it is afterwards used by postprocess_translation.sh script
+#param5 is optional indicating .yml files to be processed as in _data directory with no frontmatter whatsoever
+
+from yaml import safe_load
+from yaml import dump as ydump
+import frontmatter
+from io import open as iopen
+from os.path import isfile, isdir
+from os import linesep, walk, environ
+from re import findall
+from sys import exit
+from argparse import ArgumentParser
+from json import loads, dumps
+from collections import deque
+from logging import basicConfig, getLogger, DEBUG, Formatter, FileHandler
+
+patterns = (
+ "](/",
+ "]: /",
+ "href=\"/",
+ "url: /",
+ "href=\'/",
+)
+# TODO vereinfachen der if bedingung mit einer liste von ommitted urls patterns
+news = "/news/"
+qubes_issues = "/qubes-issues/"
+# constants and such
+# yml keys:
+YML_KEYS = ['url', 'topic', 'title', 'category', 'folder', 'htmlsection', 'tweet', 'avatar', 'img',
+ 'article', 'quote', 'name', 'occupation', 'author', 'more', 'text',
+ 'video', 'intro', 'version', 'subtitle', 'download', 'security', 'bug', 'help',
+ 'join', 'partner', 'cert', 'picture', 'email', 'website', 'mail', 'links', 'id',
+ 'paragraph', 'snippet', 'column', 'hover', 'digest', 'signature', 'pgp', 'green', 'red', 'blue', 'trump',
+ 'tts1', 'tts2', 'txp', 'txaq', 'pxaq', 'column1', 'column2', 'column3', 'yes_short', 'no_short', 'no_extended', 'tba',
+ 'bold', 'item', 'note', 'section', 'row', 'r_version',
+ 'go', 'search', 'metatopic', 'ddg', 'hover']
+URL_KEY = 'url'
+# md frontmatterkeys:
+PERMALINK_KEY = 'permalink'
+REDIRECT_KEY = 'redirect_from'
+REDIRECT_TO = 'redirect_to'
+LANG_KEY = 'lang'
+TRANSLATED_KEY = 'translated'
+LAYOUT_KEY = 'layout'
+SLASH = '/'
+MD_URL_SPLIT_PATTERNS = ['/)','/#']
+TRANSLATED_LANGS = ['de']
+if 'TRANSLATED_LANGS' in environ:
+ TRANSLATED_LANGS = environ['TRANSLATED_LANGS'].split()
+#EXCLUDE_FILES = ['download.md' ]
+
+
+basicConfig(level=DEBUG)
+logger = getLogger(__name__)
+LOG_FILENAME='/tmp/postprocess_translation.log'
+
+def configure_logging(logname):
+ handler = FileHandler(logname)
+ handler.setLevel(DEBUG)
+ formatter = Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+ handler.setFormatter(formatter)
+ logger.addHandler(handler)
+
+def log_debug(name, data):
+ logger.debug('############################################')
+ logger.debug('############################################')
+ logger.debug('###\t'+ name.capitalize() + '\t###')
+ logger.debug('--------------------------------------------')
+ if isinstance(data,dict):
+ logger.debug(dumps(data, indent=4))
+ else:
+ logger.debug(data)
+ logger.debug('############################################')
+ logger.debug('############################################')
+
+
+def write_to_file(filename, lines):
+ """
+ write the given data structure to a file
+ filename: the name of the file to be written to
+ lines: the content
+ """
+ with iopen(filename,'w') as c:
+ c.write('\n'.join(str(line) for line in lines))
+ c.truncate()
+
+def process_markdown(source_file, translated_file, permalinks, lang):
+ """
+ for every uploaded via tx client markdown file for translation, replace the markdown frontmatter with the frontmatter of the original file,
+ set the specific language, set translated to yes and for all downloaded/updated via transifex files, respectively permalinks,
+ add the specific language to the internal url
+ source_file: original file
+ translated_file: marked and uploaded to transifex for translation, if not downloaded it will be printed out as a debug
+ permalinks:all internal links (permalink and redirect_from) belonging to the files dwonloaded from transifex
+ lang: the translation language
+ """
+ mdt = frontmatter.Post
+ try:
+ with iopen(source_file) as s, iopen(translated_file) as t:
+ mds = frontmatter.load(s)
+ mdt = frontmatter.load(t)
+ if mds.get(PERMALINK_KEY) != None:
+ mdt[PERMALINK_KEY] = SLASH + lang + mds.get(PERMALINK_KEY)
+ elif PERMALINK_KEY in mdt:
+ # if missing in source, remove from translated too
+ del mdt[PERMALINK_KEY]
+
+ if mds.get(REDIRECT_KEY) != None:
+ redirects = mds.get(REDIRECT_KEY)
+ if isinstance(redirects, str):
+ redirects = [redirects]
+ # just in case
+ if any('..' in elem for elem in redirects):
+ logger.error('\'..\' found in redirect_from in file %s' % source_file)
+ exit(1)
+ mdt[REDIRECT_KEY] = [(SLASH + lang + elem.replace('/en/', SLASH) if not elem.startswith(SLASH + lang + SLASH) else elem)
+ for elem in redirects]
+
+ if mds.get(PERMALINK_KEY) != None and mds[PERMALINK_KEY] in mdt[REDIRECT_KEY]:
+ mdt[REDIRECT_KEY].remove(mds[PERMALINK_KEY])
+ if mdt.get(PERMALINK_KEY) != None and mdt[PERMALINK_KEY] in mdt[REDIRECT_KEY]:
+ mdt[REDIRECT_KEY].remove(mdt[PERMALINK_KEY])
+
+ tmp = sorted(set(mdt[REDIRECT_KEY]))
+ mdt[REDIRECT_KEY] = tmp
+ elif REDIRECT_KEY in mdt:
+ # if missing in source, remove from translated too
+ del mdt[REDIRECT_KEY]
+
+ if mds.get(LAYOUT_KEY) != None:
+ mdt[LAYOUT_KEY] = mds[LAYOUT_KEY]
+
+ if mds.get(REDIRECT_TO) != None:
+ redirect = mds.get(REDIRECT_TO)
+ if isinstance(redirect, list):
+ redirect = redirect[0]
+ if redirect.startswith('/') and not redirect.startswith(SLASH + lang + SLASH) and not redirect.startswith(news):
+ mdt[REDIRECT_TO] = SLASH + lang + redirect
+ else:
+ mdt[REDIRECT_TO] = redirect
+ elif REDIRECT_TO in mdt:
+ del mdt[REDIRECT_TO]
+
+ mdt[LANG_KEY] = lang
+ # TODO we do not need the translated key anymore
+ #mdt[TRANSLATED_KEY] = 'yes'
+ ## for testing purposes only
+ #if mdt.get('title') != None:
+ # mdt['title'] = lang.upper() +"!: " + mdt.get('title')
+
+ # replace links
+ lines = []
+ for line in mdt.content.splitlines():
+ for pattern in patterns:
+ if pattern in line:
+ tmp = line.split(pattern)
+ line = tmp[0]
+ for part in range(1, len(tmp)):
+ if '../' in tmp[part]:
+ logger.error('\'..\' found in internal url: %s' % tmp[part])
+ exit(1)
+
+ # TODO we can translate news you know
+ if not tmp[part].startswith(lang + SLASH) and \
+ not tmp[part].startswith('news') and \
+ not tmp[part].startswith('attachment') and \
+ not tmp[part].startswith('qubes-issues') and \
+ split_and_check(tmp[part],permalinks):
+ line += pattern + lang + SLASH + tmp[part]
+ # TODO this is the case with links at the bottom of the file
+ elif not tmp[part].startswith(SLASH) and \
+ SLASH + tmp[part] in permalinks:
+ line += pattern + lang + SLASH + tmp[part]
+ # TODO if a url contains a language but the url belongs to a file that is not translated should i actually remove the language -> overengineering?
+# elif tmp[part].startswith(lang+SLASH) and not split_and_check(tmp[part][len(lang)+1],permalinks):
+ # line += pattern + tmp[part][len(lang)+1]
+ else:
+ line += pattern + tmp[part]
+ lines.append(line)
+
+ mdt.content = linesep.join(lines) + '\n'
+
+ with iopen(translated_file, 'wb') as replaced:
+ frontmatter.dump(mdt, replaced)
+
+ except FileNotFoundError as e:
+ logger.debug('Following file was not updated/downloaded from transifex: %s' % e.filename)
+
+
+
+def split_and_check(md_line, permalinks):
+ """
+ for every given line in a markdown line containing an internal link
+ return if the internal link belongs to a file already downloaded and translated from transifex
+ md_line: line in a markdown line containing an internal link
+ permalinks: all internal links (permalink and redirect_from) belonging to the files dwonloaded from transifex
+ """
+ for pattern in MD_URL_SPLIT_PATTERNS:
+ if pattern in md_line:
+ sp = md_line.split(pattern)
+ t = sp[0]
+ t = SLASH + t if not t.startswith(SLASH) else t
+ t = t + SLASH if not t.endswith(SLASH) else t
+ if t in permalinks:
+ return True
+ else:
+ logger.debug("Following link: %s belongs to a file NOT translated/downloaded from transifex" %t)
+ return False
+
+def check_yml_attributes(to_replace, original):
+ """
+ recursively check if the title, folder and category attributes of the translated yaml file
+ are not empty strings
+ if they are: replace them with the original content
+ it assumes that the order between original and translated files loaded as dictionary is preserved
+ to_replace: the translated yaml content as a dictionary
+ original: the original yaml content as a dictionary
+ """
+
+ if not (isinstance(to_replace,dict) and isinstance(original,dict)):
+ return
+ for (k_r, v_r), (k_o, v_o) in zip(to_replace.items(), original.items()):
+ if isinstance(v_r, list) and isinstance(v_o, list):
+ for i, j in zip(v_r, v_o):
+ check_yml_attributes(i, j)
+ for yml_key in YML_KEYS:
+ if yml_key == k_r and yml_key == k_o and to_replace[yml_key] == '':
+ to_replace[yml_key] = original[yml_key]
+ elif k_r != k_o:
+ logger.error("ERROR, ordered of the loaded yml file is not preserved %s" % k_r +':' + k_o)
+ exit(1)
+
+
+def replace_url(to_replace, original, lang, permalinks):
+ """
+ recursively add language to the original value of the key URL if the file with the given URL is translated and save it to the translated yaml file.
+ if the file is not translated keep the original url
+ it assumes that the order between original and translated files loaded as dictionary is preserved
+ to_replace: the translated yaml content as a dictionary
+ original: the oritignal yaml content as a dictionary
+ lang: language, for example de
+ permalinks: urls of the translated/downloaded files from transifex
+ """
+ if not (isinstance(to_replace,dict) and isinstance(original,dict)):
+ return
+ for (k_r, v_r), (k_o, v_o) in zip(to_replace.items(), original.items()):
+ if isinstance(v_r, list) and isinstance(v_o, list):
+ for i, j in zip(v_r, v_o):
+ replace_url(i, j, lang, permalinks)
+ elif URL_KEY == k_r and URL_KEY == k_o:
+ val = original[k_r]
+ if val is not None and '#' in val:
+ tmp_val = val[0:val.find('#')]
+ to_replace[URL_KEY]= SLASH + lang + val if (tmp_val in permalinks) else val
+ else:
+ to_replace[URL_KEY]= SLASH + lang + val if (val in permalinks) else val
+ elif k_r != k_o:
+ logger.error("ERROR, ordered of the loaded yml file is not preserved %s" % k_r +':' + k_o)
+ exit(1)
+
+
+
+def process_yml(source, translated, lang, permalinks):
+ """
+ for every given source-translated yml file pair add the language to the urls if they belong to already translated files,
+ if not retain the original ones
+ source: original yml file
+ translated: translated yml file
+ lang: language, for example de
+ permalinks: all internal links (permalink and redirect_from) belonging to the files downloaded from transifex
+ """
+ docs = []
+ try:
+ with iopen(source) as fp, iopen(translated) as tp:
+ docs_original = safe_load(fp)
+ docs = safe_load(tp)
+ if docs == None:
+ logger.error("Empty translated file %s" %translated)
+ exit(1)
+ for a, b in zip(docs, docs_original):
+ replace_url(a, b, lang, permalinks)
+ check_yml_attributes(a, b)
+ except FileNotFoundError as e:
+ logger.debug('Following file was NOT updated/downloaded from transifex: %s' % e.filename)
+
+ try:
+ if len(docs)>0:
+ with iopen(translated, 'w') as replace:
+ ydump(docs, replace, sort_keys=False)
+ except FileNotFoundError as e:
+ logger.debug('do nothing for file: %s. it is OK.' % e.filename)
+
+
+def get_all_the_hrefs(translated_dir):
+ """
+ traverse the already updated (via tx pull) root directory with all the translated files for a specific language
+ and get all the internal urls that are embedded in hmtl code in an href attribute
+ translated_dir: root directory with all the translated files for a specific language
+ return: set holding all the internal urls that are embedded in hmtl code in an href attribute
+ """
+
+ href = set()
+ reg ='(?<=href=\").*?(?=\")'
+ for dirname, subdirlist, filelist in walk(translated_dir):
+ if dirname[0] == '.':
+ continue
+ for filename in filelist:
+ if filename[0] == '.':
+ continue
+ filepath = dirname + SLASH + filename
+ try:
+ with iopen(filepath) as fp:
+ lines = fp.readlines()
+ for line in lines:
+ t = findall(reg, line)
+ if len(t)>0:
+ for i in t:
+ href.add(i)
+ except FileNotFoundError as e:
+ logger.error('problem opening a file in the translated dir: %s' %e.filename)
+ exit(1)
+ return href
+
+def get_all_translated_permalinks_and_redirects(translated_dir,lang):
+ """
+ traverse the already updated (via tx pull) root directory with all the translated files for a specific language
+ and get their permalinks and redirects without the specific language
+ translated_dir: root directory with all the translated files for a specific language
+ lang: the specific language
+ return: set holding the original (language code is removed) permalinks and redirects
+ """
+
+ perms = set()
+ for dirname, subdirlist, filelist in walk(translated_dir):
+ if dirname[0] == '.':
+ continue
+ for filename in filelist:
+ if filename[0] == '.':
+ continue
+ filepath = dirname + SLASH + filename
+ md = frontmatter.Post
+ with iopen(filepath) as fp:
+ md = frontmatter.load(fp)
+ if md.get(PERMALINK_KEY) != None:
+ perms.add(md.get(PERMALINK_KEY)[len(lang)+1:] if md.get(PERMALINK_KEY).startswith(SLASH+lang +SLASH) else md.get(PERMALINK_KEY))
+ else:
+ logger.error('no permalink in frontmatter for file %s' % filename)
+ redirects = md.get(REDIRECT_KEY)
+ if redirects != None:
+ if isinstance(redirects,list):
+ for r in redirects:
+ perms.add(r[len(lang)+1:] if r.startswith(SLASH + lang + SLASH) else r)
+ elif isinstance(redirects,str):
+ perms.add(redirects)
+ else:
+ logger.error('ERRROR: unexpected in redirect_from: %s' % redirects)
+ exit(1)
+ else:
+ logger.debug('no redirect_from in frontmatter for file %s' % filepath)
+ return perms
+
+def create_dict_from_tx_config(mappingfile, lang):
+ """
+ read a tx.xonfig file containing only file_filter and source_file information store it in a dict and give it back
+ mappingfile: a tx.xonfig file containing only file_filter and source_file information
+ return: a dict containing a mapping between an original file and its downloaded tx translation
+ """
+ mapping = {}
+ with iopen(mappingfile) as fp:
+ lines = fp.readlines()
+ translated = ['./'+x.split('file_filter =')[1].strip().replace('',lang) for x in lines if lines.index(x)%2==0]
+ source = ['./'+x.split('source_file =')[1].strip() for x in lines if lines.index(x)%2==1]
+
+ for x in translated:
+ mapping.update({source[translated.index(x)]:x})
+ return mapping
+
+
+def main(translated_dir, lang, yml, mapping, href_filename):
+ perms = get_all_translated_permalinks_and_redirects(translated_dir, lang)
+ log_debug('all translated permalinks/redirects', perms)
+
+ hrefs = get_all_the_hrefs(args.translateddir)
+
+ log_debug('all the hrefs', hrefs)
+ write_to_file(href_filename, perms.intersection(hrefs))
+
+ # for each pair of source and translated file postprocess the translated file
+ for key, item in mapping.items():
+ if yml and item.endswith('.yml'):
+ process_yml(key, item, lang, perms)
+ #if not item.endswith('.yml') and not item.endswith('downloads.md'):
+ if not item.endswith('.yml'):
+ process_markdown(key, item, perms, lang)
+
+
+
+if __name__ == '__main__':
+
+ # python _utils/postprocess_translation.py de _translated/de/ _utils/tx-mapping _utils/translated_hrefs_urls.txt --yml
+ parser = ArgumentParser()
+ # for which language should we do this
+ parser.add_argument("language")
+ # the directory containing the translated (downloaded via tx pull) files
+ parser.add_argument("translateddir")
+ # provide the mappingfile from tx configuration containing the file_filter to source_file mapping
+ parser.add_argument("tx_mappingfile")
+ # name of the file to contain/write to all the internal urls that are embedded in hmtl code in a href attribute
+ # for later processing postprocess_translation.sh
+ parser.add_argument("translated_hrefs_filename")
+ # whether or not to process yml files
+ parser.add_argument("--yml", action='store_true')
+ args = parser.parse_args()
+
+
+ if not isfile(args.tx_mappingfile):
+ print("please check your transifex mapping file")
+ exit(1)
+
+ if not isdir(args.translateddir):
+ print("please check your translated directory")
+ exit(1)
+
+ if not args.language in TRANSLATED_LANGS:
+ print("language not in the expected translation languages")
+ exit(1)
+
+ configure_logging(LOG_FILENAME)
+
+
+ log_debug('START', {})
+
+ source_translation_mapping = create_dict_from_tx_config(args.tx_mappingfile, args.language)
+
+
+ log_debug('source/translation file mapping', source_translation_mapping)
+
+ main(args.translateddir, args.language, args.yml, source_translation_mapping, args.translated_hrefs_filename)
+
+
+
diff --git a/_utils/_translation_utils/postprocess_translation.sh b/_utils/_translation_utils/postprocess_translation.sh
new file mode 100644
index 00000000..f775084e
--- /dev/null
+++ b/_utils/_translation_utils/postprocess_translation.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+# $1 is lang
+# $2 is directory where translated files reside and language needs to be added to internal urls
+# $3 is file with internal urls that belong to files already translated and downloaded from transifex to be replaced with lang/url
+# where $3 is a file dumped by postprocess_translation.py
+# this script exists because is easier to correctly process html code with sed. python messes it up.
+# example of evoking the script:
+# bash _utils/_translation_utils/test.sh de _translated/de/ _utils/translated_hrefs_urls.txt
+
+
+pattern="href=\"\/"
+pattern_reset="href=\"\/"$1"\/"
+escaped_slash="\/"
+
+# find the patterns that contain href=/$lang pattern and reset
+find $2 -name '*.md' -or -name '*.html' | xargs sed -i "s/$pattern_reset/$pattern/g"
+
+while read line; do
+ # check for traversing patterns in $3: check if every line begins with /word
+ if [ -z `grep -oP '^(/(\w+))*' <<< $line` ]
+ then
+ echo "the string does not begin as it should"
+ exit 0
+ fi
+ #escape '/' with '\/'
+ l="${line//\//$escaped_slash}"
+ search_pattern="href=\""$l"\""
+ replace_pattern="href=\"\/"$1$l"\""
+
+ # search and destroy
+ find $2 -name '*.md' -or -name '*.html' | xargs sed -i "s/$search_pattern/$replace_pattern/g"
+done < $3
+
diff --git a/_utils/_translation_utils/prepare_for_translation.py b/_utils/_translation_utils/prepare_for_translation.py
new file mode 100644
index 00000000..d35b9ac6
--- /dev/null
+++ b/_utils/_translation_utils/prepare_for_translation.py
@@ -0,0 +1,128 @@
+#!/usr/bin/python3
+'''
+this script adds lang and ref attribute (starting from counter) to existing markdown files after permalink line recursively from a given root dir
+invocation: python prepare_for_translation.py en _doc/ ref_counter_file
+param1 is the language in short form
+param2 is a directory or a single file
+param3 is a file containing the value of the current reference counter with exactly onle line in the form of:
+current counter: x
+'''
+from io import open as iopen
+from os.path import isfile
+import os
+from sys import exit
+from argparse import ArgumentParser
+from frontmatter import Post, load, dump
+from logging import basicConfig, getLogger, DEBUG, Formatter, FileHandler
+
+
+PERMALINK_KEY = 'permalink'
+REDIRECT_KEY = 'redirect_from'
+LANG_KEY = 'lang'
+REF_KEY = 'ref'
+FILENAME_EXTENSIONS = ['.png', '.svg', '.ico', '.jpg', '.css', '.scss', '.js', '.yml', '.sh', '.py', '.sed', '.dia', '.pdf', '.gif', '.eot', '.woff', '.ttf', '.otf', '.woff2', '.sig', '.json']
+
+def read_counter(counterfile):
+ if not isfile(counterfile):
+ print('check your files')
+ exit()
+ with iopen(counterfile,'r') as c:
+ counter_line = c.readline()
+ counter_a = counter_line.split('current counter: ')
+ return int(counter_a[1])
+
+def write_counter_to_file(counter, counterfile):
+ if not isfile(counterfile):
+ print('check your files')
+ exit()
+ with iopen(counterfile,'w') as c:
+ counter_line ='current counter: ' + str(counter)
+ c.writelines(counter_line)
+ c.truncate()
+
+def check_file_name(file_name):
+ return file_name[0] == '.' or any([file_name.endswith(t) for t in FILENAME_EXTENSIONS]) == True
+
+def check_dir_name(dir_name):
+ return dir_name[0] == '.' or '/.' in dir_name
+
+
+def main(root_dir, lang, counter):
+ # if this is only a file
+ if os.path.isfile(root_dir):
+ if not check_file_name(root_dir):
+ with iopen(root_dir) as fp:
+ md = load(fp)
+ if not md.metadata:
+ return counter
+ # remove permalink in redirects if it is a list
+ if md.get(PERMALINK_KEY) != None and md.get(REDIRECT_KEY) != None and md[PERMALINK_KEY] in md[REDIRECT_KEY]:
+ redirects = md.get(REDIRECT_KEY)
+ if not isinstance(redirects, str):
+ md[REDIRECT_KEY].remove(md[PERMALINK_KEY])
+ if md.get(LANG_KEY) == None:
+ md[LANG_KEY] = lang
+ if md.get(REF_KEY) == None:
+ md[REF_KEY] = counter
+ counter += 1
+ with iopen(root_dir, 'wb') as replaced:
+ dump(md, replaced)
+ replaced.write(b'\n')
+
+ return counter
+
+ for dir_name, subdir_list, file_list in os.walk(root_dir):
+ print('current directory: %s' % dir_name)
+ print(os.path.basename(dir_name))
+
+ if check_dir_name(dir_name):
+ print('\t%s' % dir_name)
+ print('1continue')
+ continue
+
+ for file_name in file_list:
+ print('\t%s' % file_name)
+ # lazy
+ if check_file_name(file_name):
+ print('continue')
+ continue
+ file_path = dir_name + "/" + file_name
+ with iopen(file_path) as fp:
+ md = load(fp)
+ if not md.metadata:
+ print('no metadata in %s' % file_path)
+ continue
+ # remove permalink in redirects if it is a list
+ if md.get(PERMALINK_KEY) != None and md.get(REDIRECT_KEY) != None and md[PERMALINK_KEY] in md[REDIRECT_KEY]:
+ redirects = md.get(REDIRECT_KEY)
+ if not isinstance(redirects, str):
+ md[REDIRECT_KEY].remove(md[PERMALINK_KEY])
+ if md.get(LANG_KEY) == None:
+ md[LANG_KEY] = "en"
+ if md.get(REF_KEY) == None:
+ md[REF_KEY] = counter
+ counter += 1
+
+ with iopen(file_path, 'wb') as replaced:
+ dump(md, replaced)
+ replaced.write(b'\n')
+
+ return counter
+
+
+
+if __name__ == '__main__':
+ parser = ArgumentParser()
+ parser.add_argument("language")
+ parser.add_argument("directory")
+ parser.add_argument("refcounterfile")
+ args = parser.parse_args()
+
+ counter_file = args.refcounterfile
+ counter = read_counter(counter_file)
+
+ print('\n CURRENT REF COUNTER IS %s' % counter)
+ ref_counter = main(args.directory, args.language, counter)
+
+ print('\n NEW CURRENT REF COUNTER IS %s' % ref_counter)
+ write_counter_to_file(ref_counter, counter_file)
diff --git a/_utils/_translation_utils/prepare_tx_config_for_notranslate_tags.sh b/_utils/_translation_utils/prepare_tx_config_for_notranslate_tags.sh
new file mode 100755
index 00000000..8ffbee43
--- /dev/null
+++ b/_utils/_translation_utils/prepare_tx_config_for_notranslate_tags.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+# $1 is tx/config file
+# $2 filename to contain only the resources' names
+# $3 filename to contain only the source files' names
+sed '/^$/d' $1 | sed '/^s/d' | sed '/^t/d' | sed '/^h/d' | sed '/^f/d' | sed '/\[main]/d' | sed 's/\[//' | sed 's/\]//' | sed 's/.*\.//' > $2
+sed '/^$/d' $1 | sed '/source_lang/d' | sed '/^t/d' | sed '/^h/d' | sed '/\[main]/d' | sed '/\[/d' | sed '/^f/d' > $3
diff --git a/_utils/_translation_utils/prepare_tx_config_postprocess.sh b/_utils/_translation_utils/prepare_tx_config_postprocess.sh
new file mode 100755
index 00000000..8f2cf10b
--- /dev/null
+++ b/_utils/_translation_utils/prepare_tx_config_postprocess.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+# $1 is .tx/config file
+# $2 the new mapping file to be used by postprocess_translation.py
+sed '/^$/d' $1 | sed '/source_lang/d' | sed '/^t/d' | sed '/^h/d' | sed '/\[main]/d' | sed '/\[/d' > $2
+#sed -i 's/aux\/news_strings.yml/news\/index.html/g' $2
+#sed -i 's/aux\/news_categories_strings.yml/news\/categories\/index.html/g' $2
+
diff --git a/_utils/_translation_utils/remove_obsolete_files.py b/_utils/_translation_utils/remove_obsolete_files.py
new file mode 100644
index 00000000..44743f8f
--- /dev/null
+++ b/_utils/_translation_utils/remove_obsolete_files.py
@@ -0,0 +1,40 @@
+#!/usr/bin/python3
+
+import argparse
+import os
+import sys
+
+parser = argparse.ArgumentParser()
+parser.add_argument('lang')
+parser.add_argument('translation_dir')
+parser.add_argument('tx_mapping')
+
+def main():
+ args = parser.parse_args()
+
+ valid_files = set()
+ with open(args.tx_mapping) as f_mapping:
+ for line in f_mapping.readlines():
+ if line.startswith('file_filter = '):
+ valid_files.add(line.strip().split(' = ')[1].replace('', args.lang))
+
+ if not valid_files:
+ print('No files found in {}, aborting!'.format(args.tx_mapping))
+ return 1
+
+ existing_files = set()
+ for dirpath, dirs, files in os.walk(args.translation_dir):
+ existing_files.update(os.path.join(dirpath, name) for name in files)
+
+ if not existing_files:
+ print('No files found in {}, aborting!'.format(args.translation_dir))
+ return 1
+
+ for obsolete in existing_files.difference(valid_files):
+ print('Removing {}'.format(obsolete))
+ os.unlink(obsolete)
+
+
+if __name__ == '__main__':
+ sys.exit(main())
+
diff --git a/_utils/_translation_utils/requirements_notranslate.txt b/_utils/_translation_utils/requirements_notranslate.txt
new file mode 100644
index 00000000..849dd0f4
--- /dev/null
+++ b/_utils/_translation_utils/requirements_notranslate.txt
@@ -0,0 +1,10 @@
+attrs==19.3.0
+certifi==2019.11.28
+importlib-metadata==1.5.0
+jsonschema==3.2.0
+pycurl==7.43.0.5
+pyrsistent==0.15.7
+python-frontmatter==0.5.0
+PyYAML==5.3
+six==1.14.0
+zipp==3.1.0
diff --git a/_utils/_translation_utils/tag_strings_as_locked.py b/_utils/_translation_utils/tag_strings_as_locked.py
new file mode 100644
index 00000000..b4830408
--- /dev/null
+++ b/_utils/_translation_utils/tag_strings_as_locked.py
@@ -0,0 +1,399 @@
+#!/usr/bin/python3
+'''
+invoke: python tag_as_locked.py tx-resource-names.txt tx-sources-filenames.txt api-token --debug --manual
+param1: tx-resources-names.txt: provide the file from tx configuration containing the resource names
+param2: tx-sources-filenames.txt: provide the file from tx configuration containing only the original source filenames
+param3: api-token: provide the developer api transifex token for auth
+param4: debug: whether or not to write debug json files
+param5: manual: whether or not to tag file by a file by waiting for a keyboard input
+'''
+from pycurl import Curl, HTTP_CODE, error, WRITEFUNCTION
+from frontmatter import Post, load
+from certifi import where
+from io import BytesIO
+from io import open as iopen
+from os import environ
+from os.path import isfile
+from re import match
+import sys
+from sys import exit
+from argparse import ArgumentParser
+from json import loads, dumps
+from jsonschema import validate
+from jsonschema.exceptions import ValidationError
+from collections import deque
+from logging import basicConfig, getLogger, DEBUG, Formatter, FileHandler
+
+# TODO should we also mark notranslate as also reviewed ? it may need manual labor afterwards though ?
+# Here should also go a comment that this snippet of code should be extended if the data files are to be altered and there are part that have to stay the same
+# This can be done by fetching the strings from the tx api via curl:
+# curl -i -L --user api:XXXXXXXXXXXXXXX -X GET https://www.transifex.com/api/2/project/qubes/resource/no_html_data_hcl/translation/en/strings/
+# and searching for the key pattern that should be marked as locked and thus untranslatabel and immutable
+
+KEY_REGEX_LOCK_PATTERNS = ['^\[(\d)*\](.sub-pages.)\[(\d)*\](.url)$',
+ '^\[(\d)*\](.sub-pages.)\[(\d)*\](.sub-pages.)\[(\d)*\](.url)$',
+ '^\[(\d)*\](.sub-pages.)\[(\d)*\](.icon)$',
+ '^(\[(\d)*\])(.url)$', '^(\[(\d)*\])(.icon)$','^(\[(\d)*\])(.category)$',
+ '^(\[(\d)*\])(.tech.)(\[(\d)*\])(.img)$', '^(\[(\d)*\])(.tech.)(\[(\d)*\])(.url)$',
+ '^(\[(\d)*\])(.award.)(\[(\d)*\])(.url)$', '^(\[(\d)*\])(.award.)(\[(\d)*\])(.img)$',
+ '^(\[(\d)*\])(.media.)(\[(\d)*\])(.img)$', '^(\[(\d)*\])(.media.)(\[(\d)*\])(.article)$',
+ '^(\[(\d)*\])(.attachment)$', '^(\[(\d)*\])(.expert.)(\[(\d)*\])(.tweet)$',
+ '^(\[(\d)*\])(.expert.)(\[(\d)*\])(.avatar)$', '^(\[(\d)*\])(.expert.)(\[(\d)*\])(.img)$',
+ '^(\[(\d)*\])(.htmlsection)$', '^(\[(\d)*\])(.folder)$','redirect_from.\[(\d)*\]',
+ '^(\[(\d)*\])(.links.)(\[(\d)*\])(.url)$', '^(\[(\d)*\])(.links.)(\[(\d)*\])(.id)$',
+ '^(\[(\d)*\])(.columns.)(\[(\d)*\])(.url)$', '^(\[(\d)*\])(.subsections.)(\[(\d)*\])(.url)$',
+ '^(\[(\d)*\])(.htmlsections.)(\[(\d)*\])(.url)$', '^(\[(\d)*\])(.partners.)(\[(\d)*\])(.url)$',
+ '^(\[(\d)*\])(.partners.)(\[(\d)*\])(.id)$', '^(\[(\d)*\])(.partners.)(\[(\d)*\])(.img)$',
+ '^(\[(\d)*\])(.partners.)(\[(\d)*\])(.paragraph.)(\[(\d)*\])(.snippets.)(\[(\d)*\])(.url)$',
+ '^(\[(\d)*\])(.paragraphs.)(\[(\d)*\])(.paragraph.)(\[(\d)*\])(.snippets.)(\[(\d)*\])(.url)$',
+ '^(\[(\d)*\])(.list.)(\[(\d)*\])(.item.)(\[(\d)*\])(.snippets.)(\[(\d)*\])(.url)$',
+ '^(\[(\d)*\])(.releases.)(\[(\d)*\])(.snippets.)(\[(\d)*\])(.url)$',
+ '^(\[(\d)*\])(.note.)(\[(\d)*\])(.snippets.)(\[(\d)*\])(.url)$',
+ '^(\[(\d)*\])(.htmlsections.)(\[(\d)*\])(.htmlsection)$',
+ '^(\[(\d)*\])(.subsections.)(\[(\d)*\])(.snippets.)(\[(\d)*\])(.url)$',
+ '^(\[(\d)*\])(.subsections.)(\[(\d)*\])(.section.)(\[(\d)*\])(.url)$',
+ '^(\[(\d)*\])(.paragraph.)(\[(\d)*\])(.snippets.)(\[(\d)*\])(.url)$',
+ '^categories.(\[(\d)*\])(.slug)$', '^papers.(\[(\d)*\])(.title)$','^papers.(\[(\d)*\])(.author)$', '^papers.(\[(\d)*\])(.url)$', '^papers.(\[(\d)*\])(.category)$',
+ '^(\[(\d)*\])(.name)$', '^(\[(\d)*\])(.type)$', '^(\[(\d)*\])(.picture)$', '^(\[(\d)*\])(.email)$', '^(\[(\d)*\])(.fingerprint)$', '^(\[(\d)*\])(.github)$', '^(\[(\d)*\])(.website)$',
+ '^(\[(\d)*\])(.section.)(\[(\d)*\])(.snippets.)(\[(\d)*\])(.url)$', '^(\[(\d)*\])(.section)$',
+ '^(\[(\d)*\])(.releases.)(\[(\d)*\])(.r_version)$', '^(\[(\d)*\])(.section.)(\[(\d)*\])(.note)$']
+
+KEY_REGEX_PATTERNS = ['^\[\d\](.sub-pages.)\[\d\](.url)$', '^\[\d\](.sub-pages.)\[\d\](.sub-pages.)\[\d\](.url)$', '^\[\d\](.sub-pages.)\[\d\](.icon)$', '^(\[\d\])(.url)$', '^(\[\d\])(.icon)$','^(\[\d\])(.category)$', '^(\[\d\])(.tech.)(\[\d\])(.img)$', '^(\[\d\])(.tech.)(\[\d\])(.url)$', '^(\[\d\])(.award.)(\[\d\])(.url)$', '^(\[\d\])(.award.)(\[\d\])(.img)$', '^(\[\d\])(.media.)(\[\d\])(.img)$', '^(\[\d\])(.media.)(\[\d\])(.article)$', '^(\[\d\])(.attachment)$', '^(\[\d\])(.expert.)(\[\d\])(.tweet)$', '^(\[\d\])(.expert.)(\[\d\])(.avatar)$', '^(\[\d\])(.expert.)(\[\d\])(.img)$', '^(\[\d\])(.htmlsection)$', '^(\[\d\])(.folder)$','redirect_from.\[\d\]', '^(\[\d\])(.links.)(\[\d\])(.url)$', '^(\[\d\])(.links.)(\[\d\])(.id)$']
+
+KEY_PATTERNS = ['lang', 'layout', 'permalink', 'redirect_from']
+SOURCE_PATTERNS = ['* * * * *', '']
+# TODO use re2 per default?
+# examples for the first regex:
+# ![edit-button-mobile](/attachment/wiki/doc-edit/02-button1.png)
+# ![commit](/attachment/wiki/doc-gel/07-commit-msg.png)
+# for the second one a liquid include html line
+
+SOURCE_REGEX_PATTERNS = [
+ '^\!\[(\w{0,50}(-){0,50}(\.){0,2}\w{0,50}){0,10}\]\((\/(\w{0,50}(-){0,8}\w{0,50})){0,10}(\w{0,50}(-){0,50}\w{0,50}){0,10}.\w{0,10}\)',
+ '{%[^\S\n]{1,8}include[\s\w-]*\.html[^\S\n]{1,8}%}'
+ ]
+START_END_PATTERNS = {'{%': '%}', '{{': '}}', '{{':'>', '', '[':']', '