Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix various instances of d43 links #17

Open
wants to merge 19 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# IDE
*.swp

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
Expand Down
93 changes: 80 additions & 13 deletions libraries/client/preprocessors.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
from __future__ import unicode_literals, print_function
import os
import re
import json
from glob import glob
from shutil import copy
from libraries.app.app import App
from libraries.door43_tools.bible_books import BOOK_NUMBERS, BOOK_NAMES, BOOK_CHAPTER_VERSES
from libraries.general_tools.file_utils import write_file, read_file
from libraries.resource_container.ResourceContainer import RC
from converters import txt2md
from libraries.wa_catalog_utils.utils import Utils


def do_preprocess(rc, repo_dir, output_dir):
Expand Down Expand Up @@ -183,6 +185,7 @@ def run(self):
for chapter in self.get_chapters(project_path):
markdown = '# {0}\n\n'.format(chapter['title'])
for frame in chapter['frames']:
# TODO: will most likely need to change this but can't test this as of 6/3/21
markdown += '![Frame {0}](https://cdn.door43.org/obs/jpg/360px/obs-en-{0}.jpg)\n\n' \
.format(frame.get('id'))
markdown += frame['text'] + '\n\n'
Expand Down Expand Up @@ -415,29 +418,32 @@ def run(self):
project.identifier)))
return True


def fix_links(self, content):
# convert RC links, e.g. rc://en/tn/help/1sa/16/02 => https://git.door43.org/Door43/en_tn/1sa/16/02.md
content = re.sub(r'rc://([^/]+)/([^/]+)/([^/]+)/([^\s\p{P})\]\n$]+)',
r'https://git.door43.org/Door43/\1_\2/src/master/\4.md', content, flags=re.IGNORECASE)
# fix links to other sections within the same manual (only one ../ and a section name)
# e.g. [Section 2](../section2/01.md) => [Section 2](#section2)
content = re.sub(r'\]\(\.\./([^/)]+)/01.md\)', r'](#\1)', content)

# fix links to other manuals (two ../ and a manual name and a section name)
# e.g. [how to translate](../../translate/accurate/01.md) => [how to translate](translate.html#accurate)
for idx, project in enumerate(self.rc.projects):
project_path_basename = os.path.basename(project.path)
pattern = re.compile(r'\]\(\.\./\.\./{0}/([^/)]+)/01.md\)'.format(project_path_basename))
replace = r']({0}-{1}.html#\1)'.format(str(idx+1).zfill(2), project.identifier)
content = re.sub(pattern, replace, content)

# fix links to other sections that just have the section name but no 01.md page (preserve http:// links)
# e.g. See [Verbs](figs-verb) => See [Verbs](#figs-verb)
content = re.sub(r'\]\(([^# :/)]+)\)', r'](#\1)', content)

# convert URLs to links if not already
content = re.sub(r'([^"(])((http|https|ftp)://[A-Z0-9/?&_.:=#-]+[A-Z0-9/?&_:=#-])', r'\1[\2](\2)',
content, flags=re.IGNORECASE)

# URLS wth just www at the start, no http
content = re.sub(r'([^A-Z0-9"(/])(www\.[A-Z0-9/?&_.:=#-]+[A-Z0-9/?&_:=#-])', r'\1[\2](http://\2)',
content, flags=re.IGNORECASE)

return content


Expand Down Expand Up @@ -562,33 +568,44 @@ def run(self):
return True

def fix_links(self, content, section):
# convert tA RC links, e.g. rc://en/ta/man/translate/figs-euphemism => https://git.door43.org/Door43/en_ta/translate/figs-euphemism/01.md
content = re.sub(r'rc://([^/]+)/ta/([^/]+)/([^\s)\]\n$]+)',
r'https://git.door43.org/Door43/\1_ta/src/master/\3/01.md', content,
flags=re.IGNORECASE)
# convert tA RC links, e.g. rc://en/ta/man/translate/figs-euphemism =>
# https://content.bibletranslationtools.org/WycliffeAssociates/en_tm/src/branch/master/jit/figs-euphemism/01.md
content = re.sub(
r'rc://([^/]+)/ta/([^/]+)/([^/]+)/([^\s)\]\n$]+)',
r'https://content.bibletranslationtools.org/WycliffeAssociates/\1_tm/src/branch/master/jit/\4/01.md',
content,
flags=re.IGNORECASE
)

# convert other RC links, e.g. rc://en/tn/help/1sa/16/02 => https://git.door43.org/Door43/en_tn/1sa/16/02.md
content = re.sub(r'rc://([^/]+)/([^/]+)/([^/]+)/([^\s)\]\n$]+)',
r'https://git.door43.org/Door43/\1_\2/src/master/\4.md', content,
r'https://content.bibletranslationtools.org/WycliffeAssociates/\1_\2/src/master/\4.md', content,
flags=re.IGNORECASE)

# fix links to other sections within the same manual (only one ../ and a section name that matches section_link)
# e.g. [covenant](../kt/covenant.md) => [covenant](#covenant)
pattern = r'\]\(\.\.\/{0}\/([^/]+).md\)'.format(section)
content = re.sub(pattern, r'](#\1)', content)

# fix links to other sections within the same manual (only one ../ and a section name)
# e.g. [commit](../other/commit.md) => [commit](other.html#commit)
for s in TwPreprocessor.section_titles:
pattern = re.compile(r'\]\(\.\./{0}/([^/]+).md\)'.format(s))
replace = r']({0}.html#\1)'.format(s)
content = re.sub(pattern, replace, content)

# fix links to other sections that just have the section name but no 01.md page (preserve http:// links)
# e.g. See [Verbs](figs-verb) => See [Verbs](#figs-verb)
content = re.sub(r'\]\(([^# :/)]+)\)', r'](#\1)', content)

# convert URLs to links if not already
content = re.sub(r'([^"(])((http|https|ftp)://[A-Z0-9/?&_.:=#-]+[A-Z0-9/?&_:=#-])', r'\1[\2](\2)',
content, flags=re.IGNORECASE)

# URLS wth just www at the start, no http
content = re.sub(r'([^A-Z0-9"(/])(www\.[A-Z0-9/?&_.:=#-]+[A-Z0-9/?&_:=#-])', r'\1[\2](http://\2)',
content, flags=re.IGNORECASE)

return content


Expand All @@ -599,6 +616,8 @@ class TnPreprocessor(Preprocessor):
'book_codes': {}
}

language_commit_number_dict = {}

def __init__(self, *args, **kwargs):
super(TnPreprocessor, self).__init__(*args, **kwargs)
self.books = []
Expand Down Expand Up @@ -685,22 +704,70 @@ def move_to_front(self, files, move_str):
files.pop()
files.insert(0, last_file)

@staticmethod
def get_book_number(book_slug):
json_book_path = os.path.dirname(os.path.realpath(__file__)) + '/resources/books.json'
with open(json_book_path) as f:
book_data = json.load(f)

return book_data[book_slug]['num']

def fix_rc_links(self, content):
# matches on rc links like rc://fr/tn/help/1co/08/14
rc_link_values_pattern = r'rc://([^/]+)/([^/]+)/help/([^/]+)/([^/]+)/([\d]+)'
rc_link_values = re.search(rc_link_values_pattern, content, flags=re.IGNORECASE)
catalog_utils = Utils()

while rc_link_values is not None:
commit_number = catalog_utils.get_wa_catalog_commit_number(
'{}_{}'.format(rc_link_values.group(1), rc_link_values.group(2))
)
book_slug = rc_link_values.group(3)
verse = rc_link_values.group(4)
chapter = rc_link_values.group(5)
book_number = self.get_book_number(book_slug)

content = re.sub(
r'rc://([^/]+)/([^/]+)/([^/]+)/([^\s\p{P})\]\n$]+)',
r'http://read.bibletranslationtools.org/u/WA-Catalog/\1_\2/{}/{}-{}.html#tn-chunk-{}-{}-{}',
content,
1,
flags=re.IGNORECASE
).format(commit_number, book_number, book_slug.upper(), book_slug, verse.zfill(3), chapter.zfill(3))

rc_link_values = re.search(rc_link_values_pattern, content, flags=re.IGNORECASE)

return content

def fix_links(self, content):
# convert tA RC links, e.g. rc://en/ta/man/translate/figs-euphemism => https://git.door43.org/Door43/en_ta/translate/figs-euphemism/01.md
content = re.sub(r'rc://([^/]+)/ta/([^/]+)/([^\s)\]\n$]+)',
r'https://git.door43.org/Door43/\1_ta/src/master/\3/01.md', content,
flags=re.IGNORECASE)
# convert tA RC links, e.g. rc://en/ta/man/translate/figs-euphemism =>
# https://content.bibletranslationtools.org/WycliffeAssociates/en_tm/src/branch/master/jit/figs-euphemism/01.md
content = re.sub(
r'rc://([^/]+)/ta/([^/]+)/([^/]+)/([^\s)\]\n$]+)',
r'https://content.bibletranslationtools.org/WycliffeAssociates/\1_tm/src/branch/master/jit/\4/01.md',
content,
flags=re.IGNORECASE
)

# convert RC links, e.g. rc://en/tn/help/rut/04/14 =>
# http://read.bibletranslationtools.org/u/WA-Catalog/en_tn/ccdb2a707b/8-RUT.html#tn-chunk-rut-004-014
content = self.fix_rc_links(content)

# convert other RC links, e.g. rc://en/tn/help/1sa/16/02 => https://git.door43.org/Door43/en_tn/1sa/16/02.md
content = re.sub(r'rc://([^/]+)/([^/]+)/([^/]+)/([^\s)\]\n$]+)',
r'https://git.door43.org/Door43/\1_\2/src/master/\4.md', content,
r'https://content.bibletranslationtools.org/WycliffeAssociates/\1_\2/src/master/\4.md', content,
flags=re.IGNORECASE)

# fix links to other sections that just have the section name but no 01.md page (preserve http:// links)
# e.g. See [Verbs](figs-verb) => See [Verbs](#figs-verb)
content = re.sub(r'\]\(([^# :/)]+)\)', r'](#\1)', content)

# convert URLs to links if not already
content = re.sub(r'([^"(])((http|https|ftp)://[A-Z0-9/?&_.:=#-]+[A-Z0-9/?&_:=#-])', r'\1[\2](\2)',
content, flags=re.IGNORECASE)

# URLS wth just www at the start, no http
content = re.sub(r'([^A-Z0-9"(/])(www\.[A-Z0-9/?&_.:=#-]+[A-Z0-9/?&_:=#-])', r'\1[\2](http://\2)',
content, flags=re.IGNORECASE)

return content
68 changes: 68 additions & 0 deletions libraries/client/resources/books.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
{
"gen": { "name": "Genesis", "num": 1, "anth": "ot" },
"exo": { "name": "Exodus", "num": 2, "anth": "ot" },
"lev": { "name": "Leviticus", "num": 3, "anth": "ot" },
"num": { "name": "Numbers", "num": 4, "anth": "ot" },
"deu": { "name": "Deuteronomy", "num": 5, "anth": "ot" },
"jos": { "name": "Joshua", "num": 6, "anth": "ot" },
"jdg": { "name": "Judges", "num": 7, "anth": "ot" },
"rut": { "name": "Ruth", "num": 8, "anth": "ot" },
"1sa": { "name": "1 Samuel", "num": 9, "anth": "ot" },
"2sa": { "name": "2 Samuel", "num": 10, "anth": "ot" },
"1ki": { "name": "1 Kings", "num": 11, "anth": "ot" },
"2ki": { "name": "2 Kings", "num": 12, "anth": "ot" },
"1ch": { "name": "1 Chronicles", "num": 13, "anth": "ot" },
"2ch": { "name": "2 Chronicles", "num": 14, "anth": "ot" },
"ezr": { "name": "Ezra", "num": 15, "anth": "ot" },
"neh": { "name": "Nehemiah", "num": 16, "anth": "ot" },
"est": { "name": "Esther", "num": 17, "anth": "ot" },
"job": { "name": "Job", "num": 18, "anth": "ot" },
"psa": { "name": "Psalms", "num": 19, "anth": "ot" },
"pro": { "name": "Proverbs", "num": 20, "anth": "ot" },
"ecc": { "name": "Ecclesiastes", "num": 21, "anth": "ot" },
"sng": { "name": "Song of Solomon", "num": 22, "anth": "ot" },
"isa": { "name": "Isaiah", "num": 23, "anth": "ot" },
"jer": { "name": "Jeremiah", "num": 24, "anth": "ot" },
"lam": { "name": "Lamentations", "num": 25, "anth": "ot" },
"ezk": { "name": "Ezekiel", "num": 26, "anth": "ot" },
"dan": { "name": "Daniel", "num": 27, "anth": "ot" },
"hos": { "name": "Hosea", "num": 28, "anth": "ot" },
"jol": { "name": "Joel", "num": 29, "anth": "ot" },
"amo": { "name": "Amos", "num": 30, "anth": "ot" },
"oba": { "name": "Obadiah", "num": 31, "anth": "ot" },
"jon": { "name": "Jonah", "num": 32, "anth": "ot" },
"mic": { "name": "Micah", "num": 33, "anth": "ot" },
"nam": { "name": "Nahum", "num": 34, "anth": "ot" },
"hab": { "name": "Habakkuk", "num": 35, "anth": "ot" },
"zep": { "name": "Zephaniah", "num": 36, "anth": "ot" },
"hag": { "name": "Haggai", "num": 37, "anth": "ot" },
"zec": { "name": "Zechariah", "num": 38, "anth": "ot" },
"mal": { "name": "Malachi", "num": 39, "anth": "ot" },
"mat": { "name": "Matthew", "num": 41, "anth": "nt" },
"mrk": { "name": "Mark", "num": 42, "anth": "nt" },
"luk": { "name": "Luke", "num": 43, "anth": "nt" },
"jhn": { "name": "John", "num": 44, "anth": "nt" },
"act": { "name": "Acts", "num": 45, "anth": "nt" },
"rom": { "name": "Romans", "num": 46, "anth": "nt" },
"1co": { "name": "1 Corinthians", "num": 47, "anth": "nt" },
"2co": { "name": "2 Corinthians", "num": 48, "anth": "nt" },
"gal": { "name": "Galatians", "num": 49, "anth": "nt" },
"eph": { "name": "Ephesians", "num": 50, "anth": "nt" },
"php": { "name": "Philippians", "num": 51, "anth": "nt" },
"col": { "name": "Colossians", "num": 52, "anth": "nt" },
"1th": { "name": "1 Thessalonians", "num": 53, "anth": "nt" },
"2th": { "name": "2 Thessalonians", "num": 54, "anth": "nt" },
"1ti": { "name": "1 Timothy", "num": 55, "anth": "nt" },
"2ti": { "name": "2 Timothy", "num": 56, "anth": "nt" },
"tit": { "name": "Titus", "num": 57, "anth": "nt" },
"phm": { "name": "Philemon", "num": 58, "anth": "nt" },
"heb": { "name": "Hebrews", "num": 59, "anth": "nt" },
"jas": { "name": "James", "num": 60, "anth": "nt" },
"1pe": { "name": "1 Peter", "num": 61, "anth": "nt" },
"2pe": { "name": "2 Peter", "num": 62, "anth": "nt" },
"1jn": { "name": "1 John", "num": 63, "anth": "nt" },
"2jn": { "name": "2 John", "num": 64, "anth": "nt" },
"3jn": { "name": "3 John", "num": 65, "anth": "nt" },
"jud": { "name": "Jude", "num": 66, "anth": "nt" },
"rev": { "name": "Revelation", "num": 67, "anth": "nt"}
}
21 changes: 12 additions & 9 deletions libraries/linters/tn_linter.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from libraries.door43_tools.bible_books import BOOK_NUMBERS
from libraries.general_tools import file_utils
from libraries.linters.markdown_linter import MarkdownLinter
from libraries.wa_catalog_utils.utils import Utils


class TnLinter(MarkdownLinter):
Expand Down Expand Up @@ -78,17 +79,19 @@ def find_invalid_links(self, folder, f, contents):
file_path_abs = os.path.abspath(file_path)
exists = os.path.exists(file_path_abs)
if not exists:
a = self.get_file_link(f, folder)
a = self.get_file_link(f)
msg = "{0}: contains invalid link: ({1})".format(a, link)
self.log.warnings.append(msg)
App.logger.debug(msg)

def get_file_link(self, f, folder):
parts = folder.split(self.source_dir)
sub_path = self.source_dir # default
if len(parts) == 2:
sub_path = parts[1][1:]
url = "https://git.door43.org/{0}/{1}/src/master/{2}/{3}".format(self.repo_owner, self.repo_name,
sub_path, f)
a = '<a href="{0}">{1}/{2}</a>'.format(url, sub_path, f)
def get_file_link(self, f):
catalog_utils = Utils()
commit_number = catalog_utils.get_wa_catalog_commit_number(self.repo_name)
f = re.sub(r'([A-Z0-9]+).md', r'\1', f, flags=re.IGNORECASE)

url = "https://read.bibletranslationtools.org/u/WycliffeAssociates/{}/{}/#{}".format(
self.repo_name, commit_number, f
)
a = '<a href="{0}">{1}</a>'.format(url, f)

return a
23 changes: 14 additions & 9 deletions libraries/linters/tw_linter.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,16 @@
from libraries.app.app import App
from libraries.general_tools import file_utils
from libraries.linters.markdown_linter import MarkdownLinter
from libraries.wa_catalog_utils.utils import Utils


class TwLinter(MarkdownLinter):

# match links of form '](link)'
link_marker_re = re.compile(r'\]\(([^\n()]+)\)', re.UNICODE)

language_commit_number_dict = {}

def lint(self):
"""
Checks for issues with translationWords
Expand Down Expand Up @@ -43,17 +46,19 @@ def find_invalid_links(self, folder, f, contents):
file_path_abs = os.path.abspath(file_path)
exists = os.path.exists(file_path_abs)
if not exists:
a = self.get_file_link(f, folder)
a = self.get_file_link(f)
msg = "{0}: contains invalid link: ({1})".format(a, link)
self.log.warnings.append(msg)
App.logger.debug(msg)

def get_file_link(self, f, folder):
parts = folder.split(self.source_dir)
sub_path = self.source_dir # default
if len(parts) == 2:
sub_path = parts[1][1:]
url = "https://git.door43.org/{0}/{1}/src/master/{2}/{3}".format(self.repo_owner, self.repo_name,
sub_path, f)
a = '<a href="{0}">{1}/{2}</a>'.format(url, sub_path, f)
def get_file_link(self, f):
catalog_utils = Utils()
commit_number = catalog_utils.get_wa_catalog_commit_number(self.repo_name)
f = re.sub(r'([A-Z0-9]+).md', r'\1', f, flags=re.IGNORECASE)

url = "https://read.bibletranslationtools.org/u/WycliffeAssociates/{}/{}/#{}".format(
self.repo_name, commit_number, f
)
a = '<a href="{0}">{1}</a>'.format(url, f)

return a
Empty file.
21 changes: 21 additions & 0 deletions libraries/wa_catalog_utils/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import requests
import re


class Utils:

catalog_url = 'http://read.bibletranslationtools.org/u/WA-Catalog/'
commit_number_pattern = r'http[A-Z0-9./:_\-]+/([A-Z0-9]{10})/$'

# used for singleton pattern
language_commit_number_dict = {}

def get_wa_catalog_commit_number(self, language_and_format):
if language_and_format in self.language_commit_number_dict.keys():
return self.language_commit_number_dict[language_and_format]

res = requests.get('{}/{}'.format(self.catalog_url, language_and_format))
commit_number = re.sub(self.commit_number_pattern, r'\1', res.url, flags=re.IGNORECASE)

self.language_commit_number_dict[language_and_format] = commit_number
return commit_number
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
requests==2.13.0
responses==0.5.1
boto3==1.4.4
botocore==1.12.253
beautifulsoup4==4.6.0
gogs_client==1.0.3
coveralls==1.1
Expand All @@ -11,6 +12,7 @@ future==0.16.0
pyparsing==2.1.10
usfm-tools==0.0.22
mock==2.0.0
watchdog==0.10.6
sphinx==1.5.2
sphinx-autobuild==0.6.0
sphinx-rtd-theme==0.1.9
Expand Down
Loading