WycliffeAssociates · 16dprice · Jan 25, 2021 · Jan 25, 2021 · Jun 2, 2021 · Jun 2, 2021
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,6 @@
+# IDE
+*.swp
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]

diff --git a/libraries/client/preprocessors.py b/libraries/client/preprocessors.py
@@ -1,13 +1,15 @@
 from __future__ import unicode_literals, print_function
 import os
 import re
+import json
 from glob import glob
 from shutil import copy
 from libraries.app.app import App
 from libraries.door43_tools.bible_books import BOOK_NUMBERS, BOOK_NAMES, BOOK_CHAPTER_VERSES
 from libraries.general_tools.file_utils import write_file, read_file
 from libraries.resource_container.ResourceContainer import RC
 from converters import txt2md
+from libraries.wa_catalog_utils.utils import Utils
 
 
 def do_preprocess(rc, repo_dir, output_dir):
@@ -183,6 +185,7 @@ def run(self):
                 for chapter in self.get_chapters(project_path):
                     markdown = '# {0}\n\n'.format(chapter['title'])
                     for frame in chapter['frames']:
+                        # TODO: will most likely need to change this but can't test this as of 6/3/21
                         markdown += '![Frame {0}](https://cdn.door43.org/obs/jpg/360px/obs-en-{0}.jpg)\n\n' \
                             .format(frame.get('id'))
                         markdown += frame['text'] + '\n\n'
@@ -415,29 +418,32 @@ def run(self):
                                                                                              project.identifier)))
         return True
 
+
     def fix_links(self, content):
-        # convert RC links, e.g. rc://en/tn/help/1sa/16/02 => https://git.door43.org/Door43/en_tn/1sa/16/02.md
-        content = re.sub(r'rc://([^/]+)/([^/]+)/([^/]+)/([^\s\p{P})\]\n$]+)',
-                         r'https://git.door43.org/Door43/\1_\2/src/master/\4.md', content, flags=re.IGNORECASE)
         # fix links to other sections within the same manual (only one ../ and a section name)
         # e.g. [Section 2](../section2/01.md) => [Section 2](#section2)
         content = re.sub(r'\]\(\.\./([^/)]+)/01.md\)', r'](#\1)', content)
+
         # fix links to other manuals (two ../ and a manual name and a section name)
         # e.g. [how to translate](../../translate/accurate/01.md) => [how to translate](translate.html#accurate)
         for idx, project in enumerate(self.rc.projects):
             project_path_basename = os.path.basename(project.path)
             pattern = re.compile(r'\]\(\.\./\.\./{0}/([^/)]+)/01.md\)'.format(project_path_basename))
             replace = r']({0}-{1}.html#\1)'.format(str(idx+1).zfill(2), project.identifier)
             content = re.sub(pattern, replace, content)
+
         # fix links to other sections that just have the section name but no 01.md page (preserve http:// links)
         # e.g. See [Verbs](figs-verb) => See [Verbs](#figs-verb)
         content = re.sub(r'\]\(([^# :/)]+)\)', r'](#\1)', content)
+
         # convert URLs to links if not already
         content = re.sub(r'([^"(])((http|https|ftp)://[A-Z0-9/?&_.:=#-]+[A-Z0-9/?&_:=#-])', r'\1[\2](\2)',
                          content, flags=re.IGNORECASE)
+
         # URLS wth just www at the start, no http
         content = re.sub(r'([^A-Z0-9"(/])(www\.[A-Z0-9/?&_.:=#-]+[A-Z0-9/?&_:=#-])', r'\1[\2](http://\2)',
                          content, flags=re.IGNORECASE)
+
         return content
 
 
@@ -562,33 +568,44 @@ def run(self):
         return True
 
     def fix_links(self, content, section):
-        # convert tA RC links, e.g. rc://en/ta/man/translate/figs-euphemism => https://git.door43.org/Door43/en_ta/translate/figs-euphemism/01.md
-        content = re.sub(r'rc://([^/]+)/ta/([^/]+)/([^\s)\]\n$]+)',
-                         r'https://git.door43.org/Door43/\1_ta/src/master/\3/01.md', content,
-                         flags=re.IGNORECASE)
+        # convert tA RC links, e.g. rc://en/ta/man/translate/figs-euphemism =>
+        # https://content.bibletranslationtools.org/WycliffeAssociates/en_tm/src/branch/master/jit/figs-euphemism/01.md
+        content = re.sub(
+            r'rc://([^/]+)/ta/([^/]+)/([^/]+)/([^\s)\]\n$]+)',
+            r'https://content.bibletranslationtools.org/WycliffeAssociates/\1_tm/src/branch/master/jit/\4/01.md',
+            content,
+            flags=re.IGNORECASE
+        )
+
         # convert other RC links, e.g. rc://en/tn/help/1sa/16/02 => https://git.door43.org/Door43/en_tn/1sa/16/02.md
         content = re.sub(r'rc://([^/]+)/([^/]+)/([^/]+)/([^\s)\]\n$]+)',
-                         r'https://git.door43.org/Door43/\1_\2/src/master/\4.md', content,
+                         r'https://content.bibletranslationtools.org/WycliffeAssociates/\1_\2/src/master/\4.md', content,
                          flags=re.IGNORECASE)
+
         # fix links to other sections within the same manual (only one ../ and a section name that matches section_link)
         # e.g. [covenant](../kt/covenant.md) => [covenant](#covenant)
         pattern = r'\]\(\.\.\/{0}\/([^/]+).md\)'.format(section)
         content = re.sub(pattern, r'](#\1)', content)
+
         # fix links to other sections within the same manual (only one ../ and a section name)
         # e.g. [commit](../other/commit.md) => [commit](other.html#commit)
         for s in TwPreprocessor.section_titles:
             pattern = re.compile(r'\]\(\.\./{0}/([^/]+).md\)'.format(s))
             replace = r']({0}.html#\1)'.format(s)
             content = re.sub(pattern, replace, content)
+
         # fix links to other sections that just have the section name but no 01.md page (preserve http:// links)
         # e.g. See [Verbs](figs-verb) => See [Verbs](#figs-verb)
         content = re.sub(r'\]\(([^# :/)]+)\)', r'](#\1)', content)
+
         # convert URLs to links if not already
         content = re.sub(r'([^"(])((http|https|ftp)://[A-Z0-9/?&_.:=#-]+[A-Z0-9/?&_:=#-])', r'\1[\2](\2)',
                          content, flags=re.IGNORECASE)
+
         # URLS wth just www at the start, no http
         content = re.sub(r'([^A-Z0-9"(/])(www\.[A-Z0-9/?&_.:=#-]+[A-Z0-9/?&_:=#-])', r'\1[\2](http://\2)',
                          content, flags=re.IGNORECASE)
+
         return content
 
 
@@ -599,6 +616,8 @@ class TnPreprocessor(Preprocessor):
         'book_codes': {}
     }
 
+    language_commit_number_dict = {}
+
     def __init__(self, *args, **kwargs):
         super(TnPreprocessor, self).__init__(*args, **kwargs)
         self.books = []
@@ -685,22 +704,70 @@ def move_to_front(self, files, move_str):
                 files.pop()
                 files.insert(0, last_file)
 
+    @staticmethod
+    def get_book_number(book_slug):
+        json_book_path = os.path.dirname(os.path.realpath(__file__)) + '/resources/books.json'
+        with open(json_book_path) as f:
+            book_data = json.load(f)
+
+        return book_data[book_slug]['num']
+
+    def fix_rc_links(self, content):
+        # matches on rc links like rc://fr/tn/help/1co/08/14
+        rc_link_values_pattern = r'rc://([^/]+)/([^/]+)/help/([^/]+)/([^/]+)/([\d]+)'
+        rc_link_values = re.search(rc_link_values_pattern, content, flags=re.IGNORECASE)
+        catalog_utils = Utils()
+
+        while rc_link_values is not None:
+            commit_number = catalog_utils.get_wa_catalog_commit_number(
+                '{}_{}'.format(rc_link_values.group(1), rc_link_values.group(2))
+            )
+            book_slug = rc_link_values.group(3)
+            verse = rc_link_values.group(4)
+            chapter = rc_link_values.group(5)
+            book_number = self.get_book_number(book_slug)
+
+            content = re.sub(
+                r'rc://([^/]+)/([^/]+)/([^/]+)/([^\s\p{P})\]\n$]+)',
+                r'http://read.bibletranslationtools.org/u/WA-Catalog/\1_\2/{}/{}-{}.html#tn-chunk-{}-{}-{}',
+                content,
+                1,
+                flags=re.IGNORECASE
+            ).format(commit_number, book_number, book_slug.upper(), book_slug, verse.zfill(3), chapter.zfill(3))
+
+            rc_link_values = re.search(rc_link_values_pattern, content, flags=re.IGNORECASE)
+
+        return content
+
     def fix_links(self, content):
-        # convert tA RC links, e.g. rc://en/ta/man/translate/figs-euphemism => https://git.door43.org/Door43/en_ta/translate/figs-euphemism/01.md
-        content = re.sub(r'rc://([^/]+)/ta/([^/]+)/([^\s)\]\n$]+)',
-                         r'https://git.door43.org/Door43/\1_ta/src/master/\3/01.md', content,
-                         flags=re.IGNORECASE)
+        # convert tA RC links, e.g. rc://en/ta/man/translate/figs-euphemism =>
+        # https://content.bibletranslationtools.org/WycliffeAssociates/en_tm/src/branch/master/jit/figs-euphemism/01.md
+        content = re.sub(
+            r'rc://([^/]+)/ta/([^/]+)/([^/]+)/([^\s)\]\n$]+)',
+            r'https://content.bibletranslationtools.org/WycliffeAssociates/\1_tm/src/branch/master/jit/\4/01.md',
+            content,
+            flags=re.IGNORECASE
+        )
+
+        # convert RC links, e.g. rc://en/tn/help/rut/04/14 =>
+        # http://read.bibletranslationtools.org/u/WA-Catalog/en_tn/ccdb2a707b/8-RUT.html#tn-chunk-rut-004-014
+        content = self.fix_rc_links(content)
+
         # convert other RC links, e.g. rc://en/tn/help/1sa/16/02 => https://git.door43.org/Door43/en_tn/1sa/16/02.md
         content = re.sub(r'rc://([^/]+)/([^/]+)/([^/]+)/([^\s)\]\n$]+)',
-                         r'https://git.door43.org/Door43/\1_\2/src/master/\4.md', content,
+                         r'https://content.bibletranslationtools.org/WycliffeAssociates/\1_\2/src/master/\4.md', content,
                          flags=re.IGNORECASE)
+
         # fix links to other sections that just have the section name but no 01.md page (preserve http:// links)
         # e.g. See [Verbs](figs-verb) => See [Verbs](#figs-verb)
         content = re.sub(r'\]\(([^# :/)]+)\)', r'](#\1)', content)
+
         # convert URLs to links if not already
         content = re.sub(r'([^"(])((http|https|ftp)://[A-Z0-9/?&_.:=#-]+[A-Z0-9/?&_:=#-])', r'\1[\2](\2)',
                          content, flags=re.IGNORECASE)
+
         # URLS wth just www at the start, no http
         content = re.sub(r'([^A-Z0-9"(/])(www\.[A-Z0-9/?&_.:=#-]+[A-Z0-9/?&_:=#-])', r'\1[\2](http://\2)',
                          content, flags=re.IGNORECASE)
+
         return content
diff --git a/libraries/client/resources/books.json b/libraries/client/resources/books.json
@@ -0,0 +1,68 @@
+{
+  "gen": { "name": "Genesis", "num": 1, "anth": "ot" },
+  "exo": { "name": "Exodus", "num": 2, "anth": "ot" },
+  "lev": { "name": "Leviticus", "num": 3, "anth": "ot" },
+  "num": { "name": "Numbers", "num": 4, "anth": "ot" },
+  "deu": { "name": "Deuteronomy", "num": 5, "anth": "ot" },
+  "jos": { "name": "Joshua", "num": 6, "anth": "ot" },
+  "jdg": { "name": "Judges", "num": 7, "anth": "ot" },
+  "rut": { "name": "Ruth", "num": 8, "anth": "ot" },
+  "1sa": { "name": "1 Samuel", "num": 9, "anth": "ot" },
+  "2sa": { "name": "2 Samuel", "num": 10, "anth": "ot" },
+  "1ki": { "name": "1 Kings", "num": 11, "anth": "ot" },
+  "2ki": { "name": "2 Kings", "num": 12, "anth": "ot" },
+  "1ch": { "name": "1 Chronicles", "num": 13, "anth": "ot" },
+  "2ch": { "name": "2 Chronicles", "num": 14, "anth": "ot" },
+  "ezr": { "name": "Ezra", "num": 15, "anth": "ot" },
+  "neh": { "name": "Nehemiah", "num": 16, "anth": "ot" },
+  "est": { "name": "Esther", "num": 17, "anth": "ot" },
+  "job": { "name": "Job", "num": 18, "anth": "ot" },
+  "psa": { "name": "Psalms", "num": 19, "anth": "ot" },
+  "pro": { "name": "Proverbs", "num": 20, "anth": "ot" },
+  "ecc": { "name": "Ecclesiastes", "num": 21, "anth": "ot" },
+  "sng": { "name": "Song of Solomon", "num": 22, "anth": "ot" },
+  "isa": { "name": "Isaiah", "num": 23, "anth": "ot" },
+  "jer": { "name": "Jeremiah", "num": 24, "anth": "ot" },
+  "lam": { "name": "Lamentations", "num": 25, "anth": "ot" },
+  "ezk": { "name": "Ezekiel", "num": 26, "anth": "ot" },
+  "dan": { "name": "Daniel", "num": 27, "anth": "ot" },
+  "hos": { "name": "Hosea", "num": 28, "anth": "ot" },
+  "jol": { "name": "Joel", "num": 29, "anth": "ot" },
+  "amo": { "name": "Amos", "num": 30, "anth": "ot" },
+  "oba": { "name": "Obadiah", "num": 31, "anth": "ot" },
+  "jon": { "name": "Jonah", "num": 32, "anth": "ot" },
+  "mic": { "name": "Micah", "num": 33, "anth": "ot" },
+  "nam": { "name": "Nahum", "num": 34, "anth": "ot" },
+  "hab": { "name": "Habakkuk", "num": 35, "anth": "ot" },
+  "zep": { "name": "Zephaniah", "num": 36, "anth": "ot" },
+  "hag": { "name": "Haggai", "num": 37, "anth": "ot" },
+  "zec": { "name": "Zechariah", "num": 38, "anth": "ot" },
+  "mal": { "name": "Malachi", "num": 39, "anth": "ot" },
+  "mat": { "name": "Matthew", "num": 41, "anth": "nt" },
+  "mrk": { "name": "Mark", "num": 42, "anth": "nt" },
+  "luk": { "name": "Luke", "num": 43, "anth": "nt" },
+  "jhn": { "name": "John", "num": 44, "anth": "nt" },
+  "act": { "name": "Acts", "num": 45, "anth": "nt" },
+  "rom": { "name": "Romans", "num": 46, "anth": "nt" },
+  "1co": { "name": "1 Corinthians", "num": 47, "anth": "nt" },
+  "2co": { "name": "2 Corinthians", "num": 48, "anth": "nt" },
+  "gal": { "name": "Galatians", "num": 49, "anth": "nt" },
+  "eph": { "name": "Ephesians", "num": 50, "anth": "nt" },
+  "php": { "name": "Philippians", "num": 51, "anth": "nt" },
+  "col": { "name": "Colossians", "num": 52, "anth": "nt" },
+  "1th": { "name": "1 Thessalonians", "num": 53, "anth": "nt" },
+  "2th": { "name": "2 Thessalonians", "num": 54, "anth": "nt" },
+  "1ti": { "name": "1 Timothy", "num": 55, "anth": "nt" },
+  "2ti": { "name": "2 Timothy", "num": 56, "anth": "nt" },
+  "tit": { "name": "Titus", "num": 57, "anth": "nt" },
+  "phm": { "name": "Philemon", "num": 58, "anth": "nt" },
+  "heb": { "name": "Hebrews", "num": 59, "anth": "nt" },
+  "jas": { "name": "James", "num": 60, "anth": "nt" },
+  "1pe": { "name": "1 Peter", "num": 61, "anth": "nt" },
+  "2pe": { "name": "2 Peter", "num": 62, "anth": "nt" },
+  "1jn": { "name": "1 John", "num": 63, "anth": "nt" },
+  "2jn": { "name": "2 John", "num": 64, "anth": "nt" },
+  "3jn": { "name": "3 John", "num": 65, "anth": "nt" },
+  "jud": { "name": "Jude", "num": 66, "anth": "nt" },
+  "rev": { "name": "Revelation", "num": 67, "anth": "nt"}
+}
diff --git a/libraries/linters/tn_linter.py b/libraries/linters/tn_linter.py
@@ -5,6 +5,7 @@
 from libraries.door43_tools.bible_books import BOOK_NUMBERS
 from libraries.general_tools import file_utils
 from libraries.linters.markdown_linter import MarkdownLinter
+from libraries.wa_catalog_utils.utils import Utils
 
 
 class TnLinter(MarkdownLinter):
@@ -78,17 +79,19 @@ def find_invalid_links(self, folder, f, contents):
                 file_path_abs = os.path.abspath(file_path)
                 exists = os.path.exists(file_path_abs)
                 if not exists:
-                    a = self.get_file_link(f, folder)
+                    a = self.get_file_link(f)
                     msg = "{0}: contains invalid link: ({1})".format(a, link)
                     self.log.warnings.append(msg)
                     App.logger.debug(msg)
 
-    def get_file_link(self, f, folder):
-        parts = folder.split(self.source_dir)
-        sub_path = self.source_dir  # default
-        if len(parts) == 2:
-            sub_path = parts[1][1:]
-        url = "https://git.door43.org/{0}/{1}/src/master/{2}/{3}".format(self.repo_owner, self.repo_name,
-                                                                         sub_path, f)
-        a = '<a href="{0}">{1}/{2}</a>'.format(url, sub_path, f)
+    def get_file_link(self, f):
+        catalog_utils = Utils()
+        commit_number = catalog_utils.get_wa_catalog_commit_number(self.repo_name)
+        f = re.sub(r'([A-Z0-9]+).md', r'\1', f, flags=re.IGNORECASE)
+
+        url = "https://read.bibletranslationtools.org/u/WycliffeAssociates/{}/{}/#{}".format(
+            self.repo_name, commit_number, f
+        )
+        a = '<a href="{0}">{1}</a>'.format(url, f)
+
         return a
diff --git a/libraries/linters/tw_linter.py b/libraries/linters/tw_linter.py
@@ -4,13 +4,16 @@
 from libraries.app.app import App
 from libraries.general_tools import file_utils
 from libraries.linters.markdown_linter import MarkdownLinter
+from libraries.wa_catalog_utils.utils import Utils
 
 
 class TwLinter(MarkdownLinter):
 
     # match links of form '](link)'
     link_marker_re = re.compile(r'\]\(([^\n()]+)\)', re.UNICODE)
 
+    language_commit_number_dict = {}
+
     def lint(self):
         """
         Checks for issues with translationWords
@@ -43,17 +46,19 @@ def find_invalid_links(self, folder, f, contents):
                 file_path_abs = os.path.abspath(file_path)
                 exists = os.path.exists(file_path_abs)
                 if not exists:
-                    a = self.get_file_link(f, folder)
+                    a = self.get_file_link(f)
                     msg = "{0}: contains invalid link: ({1})".format(a, link)
                     self.log.warnings.append(msg)
                     App.logger.debug(msg)
 
-    def get_file_link(self, f, folder):
-        parts = folder.split(self.source_dir)
-        sub_path = self.source_dir  # default
-        if len(parts) == 2:
-            sub_path = parts[1][1:]
-        url = "https://git.door43.org/{0}/{1}/src/master/{2}/{3}".format(self.repo_owner, self.repo_name,
-                                                                         sub_path, f)
-        a = '<a href="{0}">{1}/{2}</a>'.format(url, sub_path, f)
+    def get_file_link(self, f):
+        catalog_utils = Utils()
+        commit_number = catalog_utils.get_wa_catalog_commit_number(self.repo_name)
+        f = re.sub(r'([A-Z0-9]+).md', r'\1', f, flags=re.IGNORECASE)
+
+        url = "https://read.bibletranslationtools.org/u/WycliffeAssociates/{}/{}/#{}".format(
+            self.repo_name, commit_number, f
+        )
+        a = '<a href="{0}">{1}</a>'.format(url, f)
+
         return a
diff --git a/libraries/wa_catalog_utils/__init__.py b/libraries/wa_catalog_utils/__init__.py
diff --git a/libraries/wa_catalog_utils/utils.py b/libraries/wa_catalog_utils/utils.py
@@ -0,0 +1,21 @@
+import requests
+import re
+
+
+class Utils:
+
+    catalog_url = 'http://read.bibletranslationtools.org/u/WA-Catalog/'
+    commit_number_pattern = r'http[A-Z0-9./:_\-]+/([A-Z0-9]{10})/$'
+
+    # used for singleton pattern
+    language_commit_number_dict = {}
+
+    def get_wa_catalog_commit_number(self, language_and_format):
+        if language_and_format in self.language_commit_number_dict.keys():
+            return self.language_commit_number_dict[language_and_format]
+
+        res = requests.get('{}/{}'.format(self.catalog_url, language_and_format))
+        commit_number = re.sub(self.commit_number_pattern, r'\1', res.url, flags=re.IGNORECASE)
+
+        self.language_commit_number_dict[language_and_format] = commit_number
+        return commit_number
diff --git a/requirements.txt b/requirements.txt
@@ -1,6 +1,7 @@
 requests==2.13.0
 responses==0.5.1
 boto3==1.4.4
+botocore==1.12.253
 beautifulsoup4==4.6.0
 gogs_client==1.0.3
 coveralls==1.1
@@ -11,6 +12,7 @@ future==0.16.0
 pyparsing==2.1.10
 usfm-tools==0.0.22
 mock==2.0.0
+watchdog==0.10.6
 sphinx==1.5.2
 sphinx-autobuild==0.6.0
 sphinx-rtd-theme==0.1.9