From 0eaa3fa61a7c4dbb6f243d3bed973bde1081bf59 Mon Sep 17 00:00:00 2001 From: Rod S Date: Wed, 8 Sep 2021 16:54:13 -0700 Subject: [PATCH] Updated and ran black --- nototools/android_patches.py | 72 +++---- nototools/autofix_for_phase3.py | 36 ++-- nototools/check_familyname_and_styles.py | 14 +- nototools/cldr_data.py | 6 +- nototools/cmap_data.py | 32 +-- nototools/collect_cldr_punct.py | 4 +- nototools/coverage.py | 10 +- nototools/create_image.py | 8 +- nototools/dump_otl.py | 8 +- nototools/extract_ohchr_attributions.py | 6 +- nototools/fix_khmer_and_lao_coverage.py | 4 +- nototools/font_data.py | 2 +- nototools/generate_coverage_data.py | 6 +- nototools/generate_dingbats_html.py | 215 +++++++++++---------- nototools/generate_sample_from_exemplar.py | 12 +- nototools/generate_samples.py | 4 +- nototools/generate_website_2_data.py | 64 +++--- nototools/generate_website_data.py | 10 +- nototools/glyph_image/glyph_image_pair.py | 4 +- nototools/grab_download.py | 30 +-- nototools/lang_data.py | 36 ++-- nototools/lint_config.py | 14 +- nototools/mti_cmap_data.py | 4 +- nototools/noto_cmap_reqs.py | 60 +++--- nototools/noto_fonts.py | 53 ++--- nototools/noto_lint.py | 7 +- nototools/notoconfig.py | 6 +- nototools/subset.py | 3 +- nototools/test_vertical_extents.py | 15 +- nototools/ttc_utils.py | 44 ++--- nototools/unicode_data.py | 200 ++++++++++--------- nototools/update_udhr_samples.py | 171 ++++++++++------ requirements.txt | 2 +- 33 files changed, 606 insertions(+), 556 deletions(-) diff --git a/nototools/android_patches.py b/nototools/android_patches.py index d7dd4744..29a5530f 100755 --- a/nototools/android_patches.py +++ b/nototools/android_patches.py @@ -39,18 +39,18 @@ def patch_hyphen(srcdir, dstdir, copy_unchanged=True): """Add hyphen-minus glyphs to fonts that need it. - This is to enable languages to be hyphenated properly, - since Minikin's itemizer currently shows tofus if an - automatically hyphenated word is displated in a font - that has neither HYPHEN nor HYPHEN-MINUS. + This is to enable languages to be hyphenated properly, + since Minikin's itemizer currently shows tofus if an + automatically hyphenated word is displated in a font + that has neither HYPHEN nor HYPHEN-MINUS. - The list of font names comes from LANG_TO_SCRIPT in - tools/font/fontchain_lint.py. + The list of font names comes from LANG_TO_SCRIPT in + tools/font/fontchain_lint.py. - (In practice only U+002D HYPHEN-MINUS is added, since Noto LGC fonts - don't have U+2010 HYPHEN.) + (In practice only U+002D HYPHEN-MINUS is added, since Noto LGC fonts + don't have U+2010 HYPHEN.) - Bug: 21570828""" + Bug: 21570828""" # Names of fonts for which Android requires a hyphen. # This list omits Japanese and Korean. @@ -106,15 +106,15 @@ def patch_hyphen(srcdir, dstdir, copy_unchanged=True): def _remove_cjk_emoji(cjk_font_names, srcdir, dstdir): """ - Remove default emoji characters from CJK fonts. + Remove default emoji characters from CJK fonts. - Twenty-six characters that Unicode Technical Report #51 "Unicode - Emoji" defines as defaulting to emoji styles used to be displayed as - black and white ("text" style) before this. This patch removes those - characters from Noto CJK fonts, so they get displayed as color. + Twenty-six characters that Unicode Technical Report #51 "Unicode + Emoji" defines as defaulting to emoji styles used to be displayed as + black and white ("text" style) before this. This patch removes those + characters from Noto CJK fonts, so they get displayed as color. - (1c4749e20391a4) - """ + (1c4749e20391a4) + """ # Since subsetting changes tables in a way that would prevent a compact # .ttc file, this simply removes entries from the cmap table. This @@ -190,7 +190,7 @@ def _remove_from_cmap(infile, outfile, exclude=[]): def patch_cjk_ttc(ttc_srcfile, ttc_dstfile): """Take the source ttc, break it apart, remove the cjk emoji - from each file, then repackage them into a new ttc.""" + from each file, then repackage them into a new ttc.""" tmp_dir = tempfile.mkdtemp() font_names = ttc_utils.ttcfile_extract(ttc_srcfile, tmp_dir) @@ -205,7 +205,7 @@ def patch_cjk_ttc(ttc_srcfile, ttc_dstfile): def patch_cjk_ttcs(srcdir, dstdir): """Call patch_cjk_ttc for each ttc file in srcdir, writing the - result to dstdir using the same name.""" + result to dstdir using the same name.""" if not path.isdir(srcdir): print("%s is not a directory" % srcdir) @@ -349,30 +349,30 @@ def _format_set(char_set, name, filename): def subset_symbols(srcdir, dstdir): """Subset Noto Sans Symbols in a curated way. - Noto Sans Symbols is now subsetted in a curated way. Changes include: + Noto Sans Symbols is now subsetted in a curated way. Changes include: - * Currency symbols now included in Roboto are removed. + * Currency symbols now included in Roboto are removed. - * All combining marks for symbols (except for combining keycap) are - added, to combine with other symbols if needed. + * All combining marks for symbols (except for combining keycap) are + added, to combine with other symbols if needed. - * Characters in symbol blocks that are also covered by Noto CJK fonts - are added, for better harmony with the rest of the fonts in non-CJK - settings. The dentistry characters at U+23BE..23CC are not added, - since they appear to be Japan-only and full-width. + * Characters in symbol blocks that are also covered by Noto CJK fonts + are added, for better harmony with the rest of the fonts in non-CJK + settings. The dentistry characters at U+23BE..23CC are not added, + since they appear to be Japan-only and full-width. - * Characters that UTR #51 defines as default text are added, although - they may also exist in the color emoji font, to make sure they get - a default text style. + * Characters that UTR #51 defines as default text are added, although + they may also exist in the color emoji font, to make sure they get + a default text style. - * Characters that UTR #51 defines as default emoji are removed, to - make sure they don't block the fallback to the color emoji font. + * Characters that UTR #51 defines as default emoji are removed, to + make sure they don't block the fallback to the color emoji font. - * A few math symbols that are currently included in Roboto are added, - to prepare for potentially removing them from Roboto when they are - lower-quality in Roboto. + * A few math symbols that are currently included in Roboto are added, + to prepare for potentially removing them from Roboto when they are + lower-quality in Roboto. - Based on subset_noto_sans_symbols.py from AOSP external/noto-fonts.""" + Based on subset_noto_sans_symbols.py from AOSP external/noto-fonts.""" # TODO see if we need to change this subset based on Noto Serif coverage # (so the serif fallback chain would support them) @@ -429,7 +429,7 @@ def patch_post_table(srcdir, dstdir): def patch_fonts(srcdir, dstdir): """Remove dstdir and repopulate with patched contents of srcdir (and - its 'cjk' subdirectory if it exists).""" + its 'cjk' subdirectory if it exists).""" srcdir = tool_utils.resolve_path(srcdir) dstdir = tool_utils.resolve_path(dstdir) diff --git a/nototools/autofix_for_phase3.py b/nototools/autofix_for_phase3.py index b4a485f6..a18b0ae9 100755 --- a/nototools/autofix_for_phase3.py +++ b/nototools/autofix_for_phase3.py @@ -58,8 +58,8 @@ def _check_version(version): def _check_version_info(version_info): """ensure version info looks reasonable, for example: - 'GOOG;noto-fonts:20170220:a8a215d2e889'. Raise an exception - if it does not.""" + 'GOOG;noto-fonts:20170220:a8a215d2e889'. Raise an exception + if it does not.""" m = _version_info_re.match(version_info) if not m: raise Exception( @@ -89,7 +89,7 @@ def _check_version_info(version_info): def _get_version_info(fonts): """If fonts are all from noto-fonts, use information from the current - state of the repo to build a version string. Otherwise return None.""" + state of the repo to build a version string. Otherwise return None.""" # add '/' to distinguish between noto-fonts/ and noto-fonts-alpha/ for repo_tag in ["[fonts]", "[fonts_alpha]", "[source]"]: @@ -137,7 +137,7 @@ def _check_autohint(script): def _expand_font_names(font_names, result=None): """font names can include names of files containing a list of names, open - those recursively and add to the set.""" + those recursively and add to the set.""" def strip_comment(line): ix = line.find("#") @@ -240,14 +240,14 @@ def _mm_to_version_str(mm): def get_new_version(font, relfont, nversion): """Return a new version number. font is the font we're updating, - relfont is the released version of this font if it exists, or None, - and nversion is the new version, 'keep', or None. If a new version is - passed to us, use it unless it is lower than either existing version, - in which case we raise an exception. If the version is 'keep' and - there is an existing release version, keep that. Otherwise bump the - release version, if it exists, or convert the old version to a 2.0 version - as appropriate. If the old version is a 2.0 version (e.g. Armenian was - was '2.30' in phase 2), that value is mapped to 2.40.""" + relfont is the released version of this font if it exists, or None, + and nversion is the new version, 'keep', or None. If a new version is + passed to us, use it unless it is lower than either existing version, + in which case we raise an exception. If the version is 'keep' and + there is an existing release version, keep that. Otherwise bump the + release version, if it exists, or convert the old version to a 2.0 version + as appropriate. If the old version is a 2.0 version (e.g. Armenian was + was '2.30' in phase 2), that value is mapped to 2.40.""" version = _extract_version(font) rversion = _extract_version(relfont) if relfont else None @@ -311,8 +311,8 @@ def _is_ui_metrics(f): def _autohint_code(f, script): """Return 'not-hinted' if we don't hint this, else return the ttfautohint - code, which might be None if ttfautohint doesn't support the script. - Note that LGC and MONO return None.""" + code, which might be None if ttfautohint doesn't support the script. + Note that LGC and MONO return None.""" if script == "no-script": return script @@ -364,7 +364,7 @@ def _alert(val_name, cur_val, new_val): def _alert_and_check(val_name, cur_val, expected_val, max_diff): """if max_diff >= 0, curval must be <= expected_val + maxdiff, - else curval must be >= expected_val + maxdiff""" + else curval must be >= expected_val + maxdiff""" _alert(val_name, cur_val, expected_val) if max_diff >= 0: err = cur_val > expected_val + max_diff @@ -376,9 +376,9 @@ def _alert_and_check(val_name, cur_val, expected_val, max_diff): def _get_release_fontpath(f, rel_dir): """If rel_dir is not None, look for a font under 'hinted' or 'unhinted' - depending on which of these is in the path f. If neither is in f, - look under rel_dir, and then rel_dir/unhinted. If a match is found, - return the path.""" + depending on which of these is in the path f. If neither is in f, + look under rel_dir, and then rel_dir/unhinted. If a match is found, + return the path.""" if rel_dir is None: return None diff --git a/nototools/check_familyname_and_styles.py b/nototools/check_familyname_and_styles.py index 7c601d99..b00a52e2 100644 --- a/nototools/check_familyname_and_styles.py +++ b/nototools/check_familyname_and_styles.py @@ -53,10 +53,10 @@ def _get_stylenames(styles): """Returns the list of style names for the encoded styles. These are the - (master-ish) style names encoded as weights / widths / italic, where each - field is one of the above key values. - If there's not an italic then the italic is omitted, if there's only - regular width and no italic then widths are omitted.""" + (master-ish) style names encoded as weights / widths / italic, where each + field is one of the above key values. + If there's not an italic then the italic is omitted, if there's only + regular width and no italic then widths are omitted.""" m = _extended_style_re.match(styles) if not m: raise ValueError('could not match style "%s"' % styles) @@ -99,15 +99,15 @@ def check_familyname(name, styles): def generate_family_filenames(name, styles): """Name is the family name portion of a Noto filename. Styles is the - encoding of the styles, see _get_stylenames.""" + encoding of the styles, see _get_stylenames.""" stylenames = _get_stylenames(styles) return [name + "-" + s + ".ttf" for s in stylenames] def _for_all_familynames(namefile, fn): """Call fn passing the family name and style descriptor for - all families in namefile. '#' is a comment to eol, blank lines are - ignored.""" + all families in namefile. '#' is a comment to eol, blank lines are + ignored.""" styles = None with open(namefile, "r") as f: for name in f: diff --git a/nototools/cldr_data.py b/nototools/cldr_data.py index 251bb361..b44711b1 100755 --- a/nototools/cldr_data.py +++ b/nototools/cldr_data.py @@ -668,7 +668,7 @@ def get_exemplar_and_source(loc_tag): def loc_tag_to_lsrv(loc_tag): """Convert a locale tag to a tuple of lang, script, region, and variant. - Supplies likely script if missing.""" + Supplies likely script if missing.""" m = LSRV_RE.match(loc_tag) if not m: if _DEBUG: @@ -732,8 +732,8 @@ def _init_lang_scr_to_lit_pops(): def get_lang_scr_to_lit_pops(): """Return a mapping from lang_scr to a list of tuples of region and - population sorted in descending order by population. - """ + population sorted in descending order by population. + """ if not _lang_scr_to_lit_pops: _init_lang_scr_to_lit_pops() return _lang_scr_to_lit_pops diff --git a/nototools/cmap_data.py b/nototools/cmap_data.py index 71cfbb41..47989ffd 100644 --- a/nototools/cmap_data.py +++ b/nototools/cmap_data.py @@ -43,7 +43,7 @@ def _prettify(root, indent=""): """Pretty-print the root element if it has no text and children - by adding to the root text and each child's tail.""" + by adding to the root text and each child's tail.""" if not root.text and len(root): indent += " " sfx = "\n" + indent @@ -163,9 +163,9 @@ def create_metadata(program, args=None, date=datetime.date.today()): def create_table(header, rows): """Create a TableData object from the header and rows. Header - is a string, rows is a list of strings. In each, columns are - separated by ',' which cannot otherwise appear in the text. - Each row must have the same number of columns as the header does.""" + is a string, rows is a list of strings. In each, columns are + separated by ',' which cannot otherwise appear in the text. + Each row must have the same number of columns as the header does.""" header = [t.strip() for t in header.split(",")] RowData = collections.namedtuple("RowData", header) rowdatas = [] @@ -182,14 +182,14 @@ def create_table(header, rows): def create_table_from_map(script_to_cmap): """Create a table from a map from script to cmaps. Outputs - the script code, script name, count of code points, the - codepoint ranges in hex separated by space, the count of - excluded/fallback code points, and their ranges separated by - space. script_to_cmap can have values either of cmap or of - a tuple of cmap, xcmap; in the first case xcmap is assumed - None. xcmaps that are None are marked as having an xcount of -1. - This makes it possible to distinguish an empty xcmap from one - that doesn't exist.""" + the script code, script name, count of code points, the + codepoint ranges in hex separated by space, the count of + excluded/fallback code points, and their ranges separated by + space. script_to_cmap can have values either of cmap or of + a tuple of cmap, xcmap; in the first case xcmap is assumed + None. xcmaps that are None are marked as having an xcount of -1. + This makes it possible to distinguish an empty xcmap from one + that doesn't exist.""" table_header = "script,name,count,ranges,xcount,xranges".split(",") RowData = collections.namedtuple("RowData", table_header) @@ -224,7 +224,13 @@ def create_map_from_table(table): def _test(): meta = create_metadata("test", [("this", 5), ("that", 12.3)]) - table = create_table("foo,bar", ["1,5.3", "2,6.4",]) + table = create_table( + "foo,bar", + [ + "1,5.3", + "2,6.4", + ], + ) cmapdata = CmapData(meta, table) print(cmapdata) xml_text = write_cmap_data(cmapdata) diff --git a/nototools/collect_cldr_punct.py b/nototools/collect_cldr_punct.py index b0b64e6d..84497445 100755 --- a/nototools/collect_cldr_punct.py +++ b/nototools/collect_cldr_punct.py @@ -85,8 +85,8 @@ def _get_cldr_files(cldr_dirs): def _collect_script_to_punct(files): """Builds script to punct from provided cldr files. Builds 'LGC' - data from component scripts. Adds ASCII single and double quotes if - corresponding quotes are in the punct.""" + data from component scripts. Adds ASCII single and double quotes if + corresponding quotes are in the punct.""" script_to_punct = collections.defaultdict(set) curly_quotes_to_standard = [ diff --git a/nototools/coverage.py b/nototools/coverage.py index 271e5e36..4d6e47dd 100755 --- a/nototools/coverage.py +++ b/nototools/coverage.py @@ -32,12 +32,12 @@ def character_set(font): """Returns the character coverage of a font. - Args: - font: The input font's file name, or a TTFont. + Args: + font: The input font's file name, or a TTFont. - Returns: - A frozenset listing the characters supported in the font. - """ + Returns: + A frozenset listing the characters supported in the font. + """ if isinstance(font, str): font = ttLib.TTFont(font, fontNumber=0) cmap_table = font["cmap"] diff --git a/nototools/create_image.py b/nototools/create_image.py index 22180336..b966ce0d 100755 --- a/nototools/create_image.py +++ b/nototools/create_image.py @@ -55,8 +55,8 @@ def setup_fonts_conf(): """We first look for fonts.conf under the root nototools, and if we don't - find it we write it. The fontconfig cache also goes there. This of course - requires nototools to be writable.""" + find it we write it. The fontconfig cache also goes there. This of course + requires nototools to be writable.""" # We require notoconfig because we don't know where this code is located, # nor whether the font directories might be relative to it. @@ -129,7 +129,7 @@ def __repr__(self): def make_drawparams(**kwargs): """Create a DrawParams from kwargs, but converting weight, style, and stretch - from values from string to the pango value types if needed.""" + from values from string to the pango value types if needed.""" dp = DrawParams(**kwargs) dp.weight = _get_weight(kwargs.get("weight", "normal")) dp.style = _get_style(kwargs.get("style", "normal")) @@ -291,7 +291,7 @@ def create_png(text, output_path, **kwargs): def create_img(text, output_path, **kwargs): """Creates a PNG or SVG image based on the output_path extension, - from the given text""" + from the given text""" ext = (path.splitext(output_path)[1]).lower() if ext == ".png": create_png(text, output_path, **kwargs) diff --git a/nototools/dump_otl.py b/nototools/dump_otl.py index 152ad67f..895a7864 100755 --- a/nototools/dump_otl.py +++ b/nototools/dump_otl.py @@ -66,10 +66,14 @@ def printable_glyph_list(glyph_list, quote=False): def dump_lang_sys(script, lang, lang_sys): """Dumps a language system.""" - print("%s %s:" % (script, lang),) + print( + "%s %s:" % (script, lang), + ) assert lang_sys.LookupOrder is None if lang_sys.ReqFeatureIndex != 65535: - print("<%s>" % lang_sys.ReqFeatureIndex,) + print( + "<%s>" % lang_sys.ReqFeatureIndex, + ) print(lang_sys.FeatureIndex) diff --git a/nototools/extract_ohchr_attributions.py b/nototools/extract_ohchr_attributions.py index 9b4224c7..8e476400 100755 --- a/nototools/extract_ohchr_attributions.py +++ b/nototools/extract_ohchr_attributions.py @@ -181,9 +181,9 @@ def handle_data(self, data): def get_ohchr_status(ohchr_code, lang, attrib): """Decide the status based on the attribution text. - 'original' are in the public domain and need no attribution. - 'UN' are official UN translations and should be attributed as such. - 'other' are not official UN translations and should be attributed as such.""" + 'original' are in the public domain and need no attribution. + 'UN' are official UN translations and should be attributed as such. + 'other' are not official UN translations and should be attributed as such.""" if ohchr_code in ["eng", "frn", "spn", "rus", "chn", "arz"]: return "original" diff --git a/nototools/fix_khmer_and_lao_coverage.py b/nototools/fix_khmer_and_lao_coverage.py index d85ef3c8..360a863e 100755 --- a/nototools/fix_khmer_and_lao_coverage.py +++ b/nototools/fix_khmer_and_lao_coverage.py @@ -30,7 +30,7 @@ def merge_chars_from_bank(orig_font, bank_font, target_font, chars): """Merge glyphs from a bank font to another font. - + Only the glyphs themselves, the horizontal metrics, and the cmaps will be copied. """ @@ -59,7 +59,7 @@ def merge_chars_from_bank(orig_font, bank_font, target_font, chars): def main(argv): """Fix all the fonts given in the command line. - + If they are Lao fonts, make sure they have ZWSP and dotted circle. If they are Khmer fonts, make sure they have ZWSP, joiners, and dotted circle.""" diff --git a/nototools/font_data.py b/nototools/font_data.py index 4e0bfb12..48b2478c 100755 --- a/nototools/font_data.py +++ b/nototools/font_data.py @@ -108,7 +108,7 @@ def get_os2_unicoderange_bitmap(font): def set_os2_unicoderange_bitmap(font, bitmap): """Set the UnicodeRange fields in the os/2 table from the 128 bits of the - long integer bitmap.""" + long integer bitmap.""" os2_table = font["OS/2"] mask = (1 << 32) - 1 os2_table.ulUnicodeRange1 = bitmap & mask diff --git a/nototools/generate_coverage_data.py b/nototools/generate_coverage_data.py index e4550b64..dad01d8a 100755 --- a/nototools/generate_coverage_data.py +++ b/nototools/generate_coverage_data.py @@ -56,7 +56,7 @@ def get_cps_from_cmap_data_file(data_file): def _create_metadata(**kwargs): """Create a MetaData object from the args. 'date' defaults to today's - date.""" + date.""" date = str(kwargs.pop("date", datetime.date.today())) program = str(kwargs.pop("program", "generate_coverage_data")) arglist = [(k, v) for k, v in sorted(kwargs.items()) if v is not None] @@ -77,7 +77,7 @@ def create(name, cps, paths=None, cmap_data=None): def _common_path_prefix(items): """Assuming items is an array of paths using path.sep as a path separator, - return a common path prefix of the items.""" + return a common path prefix of the items.""" prefix = None if len(items) <= 1: return "" @@ -142,7 +142,7 @@ def _build_cmap_elem(cmapdata): def _prettify(root, indent=""): """Pretty-print the root element if it has no text and children - by adding to the root text and each child's tail.""" + by adding to the root text and each child's tail.""" if not root.text and len(root): indent += " " sfx = "\n" + indent diff --git a/nototools/generate_dingbats_html.py b/nototools/generate_dingbats_html.py index 9ebfd072..b434b446 100755 --- a/nototools/generate_dingbats_html.py +++ b/nototools/generate_dingbats_html.py @@ -92,7 +92,7 @@ def _cleanlines(textfile): class CodeList(object): """An ordered list of code points (ints). These might map to other (PUA) code - points that the font knows how to display.""" + points that the font knows how to display.""" @staticmethod def fromfile(filename): @@ -500,19 +500,19 @@ def _generate_header(): def _load_fonts(data_list, data_dir, codelist_map): """data_list is a list of tuples of two to four items. The first item is - the key, the second is the name of the font file in data_dir. The - second can be None, otherwise it must exist. The third item, if - present, is the name to use for the font, otherwise it will be read - from the font, it must be present where there is no font. The - fourth item, if present, is the name of a codelist file, it must be present - where there is no font. If present and None, the the unicode cmap from the - font is used. otherwise the font file name is stripped of its extension and - try to find a file from which to create a codelist. - Multiple tuples can share the same key, these form one column and the order - of the files composing the tuple defines the order in which they are searched - for a glyph. - Returns a list of tuples of key, keyinfo, where keyinfo is - a list of tuples of filepath, name, codelist.""" + the key, the second is the name of the font file in data_dir. The + second can be None, otherwise it must exist. The third item, if + present, is the name to use for the font, otherwise it will be read + from the font, it must be present where there is no font. The + fourth item, if present, is the name of a codelist file, it must be present + where there is no font. If present and None, the the unicode cmap from the + font is used. otherwise the font file name is stripped of its extension and + try to find a file from which to create a codelist. + Multiple tuples can share the same key, these form one column and the order + of the files composing the tuple defines the order in which they are searched + for a glyph. + Returns a list of tuples of key, keyinfo, where keyinfo is + a list of tuples of filepath, name, codelist.""" def _load_font(data, codelist_map): if len(data) < 4: @@ -559,10 +559,10 @@ def _load_font(data, codelist_map): def _select_used_fonts(codelist, fonts, prefer_fonts, omit_fonts): """Return the fonts we want to use to display the codelist, in order. - If not None, prefer_fonts is a key or list of keys for fonts to order - at the end. If not None, omit_fonts is key or list of keys to omit - even if they would otherwise be used by default, however prefer_fonts - takes precedence over omit_fonts if the same key is in both.""" + If not None, prefer_fonts is a key or list of keys for fonts to order + at the end. If not None, omit_fonts is key or list of keys to omit + even if they would otherwise be used by default, however prefer_fonts + takes precedence over omit_fonts if the same key is in both.""" if prefer_fonts is not None: if isinstance(prefer_fonts, basestring): @@ -604,10 +604,10 @@ def _select_used_fonts(codelist, fonts, prefer_fonts, omit_fonts): def _load_targets(target_data, fonts, data_dir, codelist_map): """Target data is a list of tuples of target names, codelist files, an - optional preferred font key or list of keys, and an optional omitted font - key or list of keys. All files should be in data_dir. Codelist_map is a - cache in case the codelist file has already been read. Returns a list of - tuples of target name, codelist, and fontlist.""" + optional preferred font key or list of keys, and an optional omitted font + key or list of keys. All files should be in data_dir. Codelist_map is a + cache in case the codelist file has already been read. Returns a list of + tuples of target name, codelist, and fontlist.""" def _create_suffix(charlist): return charlist.decode("unicode-escape") @@ -644,8 +644,8 @@ def _select_font(fonts, font_id): def _create_codeset_from_expr(expr_list, flag_sets, data_dir, codelist_map): """Processes expr_list in order, building a codeset. - See _read_flag_data_from_file for information on expr_list. - This can modify flag_sets and codelist_map.""" + See _read_flag_data_from_file for information on expr_list. + This can modify flag_sets and codelist_map.""" result = () for op, exp in expr_list: @@ -661,8 +661,9 @@ def _create_codeset_from_expr(expr_list, flag_sets, data_dir, codelist_map): if codes_or_spec is None: # we only know about '_emoji_' and '_math_' if exp == "_emoji_": - codes = unicode_data.get_emoji() - unicode_data.get_unicode_emoji_variants( - "proposed_extra" + codes = ( + unicode_data.get_emoji() + - unicode_data.get_unicode_emoji_variants("proposed_extra") ) elif exp == "_math_": codes = unicode_data.chars_with_property("Math") @@ -692,13 +693,13 @@ def _create_codeset_from_expr(expr_list, flag_sets, data_dir, codelist_map): def _load_flags(flag_data, data_dir, codelist_map): """Flag data is a list of tuples of defined sets or flags and expressions, see - _read_flag_data_from_file for more info. - This returns a map from set name to a tuple of (cp_set, bool) where True - means the flag is set for a cp if it is in the cp_set, and false means the - flag is set if the cp is not in the cp_set. + _read_flag_data_from_file for more info. + This returns a map from set name to a tuple of (cp_set, bool) where True + means the flag is set for a cp if it is in the cp_set, and false means the + flag is set if the cp is not in the cp_set. - This can fail since the code processing the flag_data does not actually try - to load the codelists.""" + This can fail since the code processing the flag_data does not actually try + to load the codelists.""" flag_sets = {} flag_map = {} @@ -753,53 +754,53 @@ def _read_font_data_from_file(filename): def _read_target_data_from_file(filename): """Target data uses # to indicate a comment to end of line. - Comments are stripped, then an empty or blank line is ignored. - - Targets are either tables or sequences, the default - is a table. - - Each line in a table target defines a tuple of four values: - target name, codelist, preferred font ids, and omitted font - ids. Each line in a sequence target defines a tuple of - four values: target name, codelist, suffix, and font id. - A line can also start with one of tree directives, - !define, !default, or !type. - - If a line starts with '!define ' we expect a key followed - by '=' and then one or more names separated by space. The - names are turned into a list, and entered into a dictionary - for the key. Once defined a key cannot be redefined. - - If a line starts with '!default ' we expect a key of either - 'prefer' or 'omit' optionally followed by '=' and a list of - names to prefer or omit; these will become the default - values until the next '!default ' directive. If there is - no '=' the value is reset. An omitted or empty prefer or - omit field will get the fallback, to explicitly request None - and override the fallback the field should contain 'None'. - - If a line starts with '!type ' we expect either 'table' or - 'sequence' to follow. This will become the type of the - following lines until the next '!type ' directive. - - Normally, a line consists of 2-4 fields separated by ';'. - The first two are a target name and a codelist spec. - - For table targets, the third is the preferred font ids - separated by space, previously !defined keys can be used - here instead of this list and the list defined for that key - will be used. The fourth is the omitted font ids separated - by space, they are treated similarly. If the preferred or - omit field is missing or empty and a default value for it - has been set, that value is used. - - For sequence targets, the third is a hex sequence indicating - the suffix string to apply after each codepoint, and the - fourth is the font id; these must both be present. - - This returns a list of the tuples of the type name followed - by the data for that type. - """ + Comments are stripped, then an empty or blank line is ignored. + + Targets are either tables or sequences, the default + is a table. + + Each line in a table target defines a tuple of four values: + target name, codelist, preferred font ids, and omitted font + ids. Each line in a sequence target defines a tuple of + four values: target name, codelist, suffix, and font id. + A line can also start with one of tree directives, + !define, !default, or !type. + + If a line starts with '!define ' we expect a key followed + by '=' and then one or more names separated by space. The + names are turned into a list, and entered into a dictionary + for the key. Once defined a key cannot be redefined. + + If a line starts with '!default ' we expect a key of either + 'prefer' or 'omit' optionally followed by '=' and a list of + names to prefer or omit; these will become the default + values until the next '!default ' directive. If there is + no '=' the value is reset. An omitted or empty prefer or + omit field will get the fallback, to explicitly request None + and override the fallback the field should contain 'None'. + + If a line starts with '!type ' we expect either 'table' or + 'sequence' to follow. This will become the type of the + following lines until the next '!type ' directive. + + Normally, a line consists of 2-4 fields separated by ';'. + The first two are a target name and a codelist spec. + + For table targets, the third is the preferred font ids + separated by space, previously !defined keys can be used + here instead of this list and the list defined for that key + will be used. The fourth is the omitted font ids separated + by space, they are treated similarly. If the preferred or + omit field is missing or empty and a default value for it + has been set, that value is used. + + For sequence targets, the third is a hex sequence indicating + the suffix string to apply after each codepoint, and the + fourth is the font id; these must both be present. + + This returns a list of the tuples of the type name followed + by the data for that type. + """ def add_index_list_or_defined(info, index, fallback, defines): """Extend or update info[index], possibly using defines""" @@ -876,9 +877,9 @@ def add_index_list_or_defined(info, index, fallback, defines): def _flagged_name(cp, flag_sets): """Prepend any flags to cp's unicode name, and return. Flag_sets - is a map from flag name to a tuple of cp set and boolean. - True means add flag if cp in set, False means add flag if it is - not in the set.""" + is a map from flag name to a tuple of cp set and boolean. + True means add flag if cp in set, False means add flag if it is + not in the set.""" try: name = unicode_data.name(cp) except: @@ -1039,35 +1040,35 @@ def _scan_expr(expr, def_names, used_names): def _read_flag_data_from_file(filename): """Read flag data file and generate a list of tuples for creating - the flag data map. If filename is None, returns an empty list. + the flag data map. If filename is None, returns an empty list. - Lines in the file either define a set used by a flag, or define - a flag. Define lines start with '!define ' followed by the name - of the set (_0-9A-Za-z), '=', and the definition (a codelist). + Lines in the file either define a set used by a flag, or define + a flag. Define lines start with '!define ' followed by the name + of the set (_0-9A-Za-z), '=', and the definition (a codelist). - Definition lines have three fields separated by semicolon, - the name of the flag, 'in' or 'not in', and the definition - which can either be a codelist or an expression formed from - names of !defined sets joined with '&' (intersection), '|' - (union), or '-' (set difference). These operations are performed - in order left to right, there's no predecence. + Definition lines have three fields separated by semicolon, + the name of the flag, 'in' or 'not in', and the definition + which can either be a codelist or an expression formed from + names of !defined sets joined with '&' (intersection), '|' + (union), or '-' (set difference). These operations are performed + in order left to right, there's no predecence. - Predefined sets are '_emoji_', the unicode extended emoji values, - and '_math_', codepoints with the 'Math' property. + Predefined sets are '_emoji_', the unicode extended emoji values, + and '_math_', codepoints with the 'Math' property. - '#' is a comment to end-of line. Blank lines are ignored. + '#' is a comment to end-of line. Blank lines are ignored. - It's an error if there are multiple defined sets - with the same name or multiple flags with the same name. + It's an error if there are multiple defined sets + with the same name or multiple flags with the same name. - This returns a list of 3-tuples, one for each set used by a - flag, then one for each flag. Tuple for defined sets are - ('!define', set_name, set_spec), - there set_spec is None if the set_name is special, like '_emoji_'. - Tuples for flags are - (flag_name, True/False, [(op,expr)]), - where the list of op, expr tuples has the op character - ('|' '&', '-') and a define name or a codelist.""" + This returns a list of 3-tuples, one for each set used by a + flag, then one for each flag. Tuple for defined sets are + ('!define', set_name, set_spec), + there set_spec is None if the set_name is special, like '_emoji_'. + Tuples for flags are + (flag_name, True/False, [(op,expr)]), + where the list of op, expr tuples has the op character + ('|' '&', '-') and a define name or a codelist.""" if not filename: return [] @@ -1164,7 +1165,7 @@ def generate_html( relpath, ): """If not None, relpath is the relative path from the outfile to - the datadir, for use when generating font paths.""" + the datadir, for use when generating font paths.""" template = string.Template(_HTML_HEADER_TEMPLATE) styles = _generate_styles(fonts, relpath) @@ -1193,7 +1194,7 @@ def generate_html( def _build_cp_to_targets(targets): """Return a map from cp to a list of pairs of target group index and - name.""" + name.""" cp_to_targets = collections.defaultdict(list) # for i, (name, codelist, _) in enumerate(targets): for i, target in enumerate(targets): diff --git a/nototools/generate_sample_from_exemplar.py b/nototools/generate_sample_from_exemplar.py index 76e8edef..59d21dc5 100755 --- a/nototools/generate_sample_from_exemplar.py +++ b/nototools/generate_sample_from_exemplar.py @@ -252,8 +252,8 @@ def get_char_to_lang_map(loc_map): def char_lang_info(num_locales, char_to_lang_map): """Returns a tuple containing - - characters ordered by the number of langs that use them - - a list mapping number of shared langs to number of chars shared by those langs""" + - characters ordered by the number of langs that use them + - a list mapping number of shared langs to number of chars shared by those langs""" freq_list = [] hist = [0] * (num_locales + 1) @@ -297,7 +297,7 @@ def show_shared_langs_hist(hist): def get_upper_case_list(char_list): """Return the upper case versions where they differ. - If no char in the list is a lower case variant, the result is empty.""" + If no char in the list is a lower case variant, the result is empty.""" # keep in same order as input list. upper_case_chars = [] for cp in char_list: @@ -325,8 +325,8 @@ def show_tiers(char_list, num_tiers, tier_size): def get_rare_char_info(char_to_lang_map, shared_lang_threshold): """Returns a tuple of: - - a set of 'rare_chars' (those used threshold langs or fewer), - - a mapping from each locale with rare chars to a set of its rare chars""" + - a set of 'rare_chars' (those used threshold langs or fewer), + - a mapping from each locale with rare chars to a set of its rare chars""" rare_chars = set() locs_with_rare_chars = collections.defaultdict(set) @@ -370,7 +370,7 @@ def select_rare_chars_for_loc( script, locs_with_rare_chars, shared_lang_threshold, char_to_lang_map ): """Return a list of 2-tuples of loc and selected rare chars, - ordered by decreasing literate population of the locale.""" + ordered by decreasing literate population of the locale.""" rarity_threshold_map = {} for lang_tag in locs_with_rare_chars: diff --git a/nototools/generate_samples.py b/nototools/generate_samples.py index b302dfc5..c30e9665 100755 --- a/nototools/generate_samples.py +++ b/nototools/generate_samples.py @@ -218,7 +218,7 @@ def _get_items(self, group, sort): def _strip_comments(definition_lines): """Not as straightforward as usual, because comments can be escaped - by backslash, and backslash can escape space.""" + by backslash, and backslash can escape space.""" out_lines = [] for line in definition_lines: pos = 0 @@ -412,7 +412,7 @@ def _find_matching_close_paren(text, pos): def _parse_pattern(value, groups): """Return a list of lists (groups) or tuples of lists - (parenthesized groups).""" + (parenthesized groups).""" pat_list = [] while value: m = _PAT_RE.match(value) diff --git a/nototools/generate_website_2_data.py b/nototools/generate_website_2_data.py index 3c55ca68..c68406e0 100755 --- a/nototools/generate_website_2_data.py +++ b/nototools/generate_website_2_data.py @@ -357,7 +357,7 @@ def get_named_lang_scrs(family_id_to_lang_scr_to_sample_key): def get_lang_scr_sort_order(lang_scrs): """Return a sort order for lang_scrs based on the english name, but - clustering related languages.""" + clustering related languages.""" def lang_key(lang_scr): name = lang_data.lang_script_to_names(lang_scr)[0] @@ -419,14 +419,14 @@ def lang_key(lang_scr): def get_charset_info(charset): """Returns an encoding of the charset as pairs of lengths of runs of chars - to skip and chars to include. Each length is written as length - 1 in - hex-- except when length == 1, which is written as the empty string-- and - separated from the next length by a comma. Thus successive commas - indicate a length of 1, a 1 indicates a length of 2, and so on. Since - the minimum representable length is 1, the base is -1 so that the first - run (a skip) of 1 can be output as a comma to then start the first - included character at 0 if need be. Only as many pairs of values as are - needed to encode the last run of included characters.""" + to skip and chars to include. Each length is written as length - 1 in + hex-- except when length == 1, which is written as the empty string-- and + separated from the next length by a comma. Thus successive commas + indicate a length of 1, a 1 indicates a length of 2, and so on. Since + the minimum representable length is 1, the base is -1 so that the first + run (a skip) of 1 can be output as a comma to then start the first + included character at 0 if need be. Only as many pairs of values as are + needed to encode the last run of included characters.""" ranges = coverage.convert_set_to_ranges(charset) prev = -1 @@ -451,8 +451,8 @@ def get_charset_info(charset): def get_sample_names_for_lang_scr_typ(lang_scr, typ): """Sample names are of the form 'lang-scr(-var)*typ.txt', return - names starting with lang-scr and ending with typ, stripping the extension, - and sorted with lang-scr_typ first and the rest in alphabetical order.""" + names starting with lang-scr and ending with typ, stripping the extension, + and sorted with lang-scr_typ first and the rest in alphabetical order.""" global _sample_names if not _sample_names: @@ -508,15 +508,15 @@ def sample_text_from_exemplar(exemplar): def get_sample_infos(lang_scr): """Return a list of tuples of: - - a short sample text string - - an attribution key, one of - UN: official UN translation, needs attribution - other: not an official UN translation, needs non-attribution - original: public domain translation, does not need attribution - none: we have no attribution info on this, does not need attribution - - source key. - The list is in order of priority: language texts, udhr samples, exemplars for - the language, sample chars for the script, exemplars for the script.""" + - a short sample text string + - an attribution key, one of + UN: official UN translation, needs attribution + other: not an official UN translation, needs non-attribution + original: public domain translation, does not need attribution + none: we have no attribution info on this, does not need attribution + - source key. + The list is in order of priority: language texts, udhr samples, exemplars for + the language, sample chars for the script, exemplars for the script.""" assert "-" in lang_scr @@ -555,9 +555,9 @@ def add_exemplars(lang_scr): def get_family_id_to_default_lang_scr(family_id_to_lang_scrs, families): """Return a mapping from family id to default lang tag, for families - that have multiple lang tags. This is based on likely subtags and - the script of the family (Latn for LGC). - """ + that have multiple lang tags. This is based on likely subtags and + the script of the family (Latn for LGC). + """ family_id_to_default_lang_scr = {} for family_id, lang_scrs in family_id_to_lang_scrs.items(): @@ -804,8 +804,8 @@ def get_readme_path(self, readme_key): def build_readmes(self): """Create README files for the zips. These are named README - and are put into /tmp/readmes/{fonts|cjk|emoji|all} before - being copied to zip files.""" + and are put into /tmp/readmes/{fonts|cjk|emoji|all} before + being copied to zip files.""" date_str = str(datetime.date.today()) names = self.get_readme_keys() @@ -1338,7 +1338,7 @@ def build_ttc_zips(self): def build_subset_zips(self): """Generate zipped versions of the CJK subset families for access via - the link on the cjk help page.""" + the link on the cjk help page.""" # The font family code skips the subset files, but we want them in the # package directory. Like the ttcs, we handle them separately. @@ -1641,13 +1641,13 @@ def use_in_web(font): def get_repo_info(skip_checks): """Looks at the three noto fonts repos (fonts, cjk, emoji) and - gets information about the current state of each. Returns - a mapping from 'fonts', 'cjk', and 'emoji' to the corresponding - info. + gets information about the current state of each. Returns + a mapping from 'fonts', 'cjk', and 'emoji' to the corresponding + info. - If skip_checks is not set, checks that the repos are in a good - state (at a known annotated tag and there are no pending commits), - otherwise an exception is raised.""" + If skip_checks is not set, checks that the repos are in a good + state (at a known annotated tag and there are no pending commits), + otherwise an exception is raised.""" repo_info = {} errors = [] diff --git a/nototools/generate_website_data.py b/nototools/generate_website_data.py index 86ae5c5d..9deb11d2 100755 --- a/nototools/generate_website_data.py +++ b/nototools/generate_website_data.py @@ -77,7 +77,7 @@ def convert_to_four_letter(script): - """"Converts a script name from a Noto font file name to ISO 15924 code.""" + """ "Converts a script name from a Noto font file name to ISO 15924 code.""" if script in ODD_SCRIPTS: script = ODD_SCRIPTS[script] elif script in unicode_data._script_long_name_to_code: @@ -747,8 +747,7 @@ def css_style(style_value): def fonts_are_basically_the_same(font1, font2): - """Returns true if the fonts are the same, except perhaps hint or platform. - """ + """Returns true if the fonts are the same, except perhaps hint or platform.""" return ( font1.family == font2.family and font1.script == font2.script @@ -952,7 +951,10 @@ def create_families_object(target_platform): font_list = [] for font in members: font_list.append( - {"style": css_style(font.style), "weight": css_weight(font.weight),} + { + "style": css_style(font.style), + "weight": css_weight(font.weight), + } ) if len(font_list) not in [1, 2, 4, 7]: print(key, font_list) diff --git a/nototools/glyph_image/glyph_image_pair.py b/nototools/glyph_image/glyph_image_pair.py index ee7fb3db..3ad08013 100755 --- a/nototools/glyph_image/glyph_image_pair.py +++ b/nototools/glyph_image/glyph_image_pair.py @@ -101,7 +101,9 @@ def _print_dbg(self): ) n = 0 for r in range(self.rows): - print("x" if self._covered_rows[r] else " ",) + print( + "x" if self._covered_rows[r] else " ", + ) print( " ".join( "%3d%1s" % (self.data[m], marks[self._marked[m]]) diff --git a/nototools/grab_download.py b/nototools/grab_download.py index c124ef54..1d8b5818 100755 --- a/nototools/grab_download.py +++ b/nototools/grab_download.py @@ -29,7 +29,7 @@ def grab_files(dst, files, src_vendor, name_date_re, extract_fn): """Get date from each filename in files, create a folder for it, under - dst/drops, then extract the files to it.""" + dst/drops, then extract the files to it.""" # The zip indicates that the corresponding drop is good and built from it. But # we might have messed up along the way, so: @@ -96,23 +96,23 @@ def matching_files_in_dir(src, namere): def invoke_main(src_vendor, name_date_re, extract_fn, default_params={}): """Grab the files. - src_vendor is a string, currently either Adobe or Monotype. - name_date_re is a regex, it should extract name, year, month, and day fields from the filename - extract_fn is a fn to to extract a file, it takes two args, a dest dir and the zip file name. + src_vendor is a string, currently either Adobe or Monotype. + name_date_re is a regex, it should extract name, year, month, and day fields from the filename + extract_fn is a fn to to extract a file, it takes two args, a dest dir and the zip file name. - default_params are default values for argparse. They can be: - - default_srcdir - - default_dstdir - - default_regex + default_params are default values for argparse. They can be: + - default_srcdir + - default_dstdir + - default_regex - The default regex and the name_date_re are superficially similar, but different in - purpose. The default_regex is used to select files under the src directory. The - name_date_re is used to extract the date from the file name. Both apply to the - file name, but the default_regex can be anything, while name_date_re needs to select - four groups, where the 2nd, 3rd, and 4th are the year, month, and day (yes this is - brittle, but all of this is). + The default regex and the name_date_re are superficially similar, but different in + purpose. The default_regex is used to select files under the src directory. The + name_date_re is used to extract the date from the file name. Both apply to the + file name, but the default_regex can be anything, while name_date_re needs to select + four groups, where the 2nd, 3rd, and 4th are the year, month, and day (yes this is + brittle, but all of this is). - The dest directory must exist and should have 'zips' and 'drops' subdirs.""" + The dest directory must exist and should have 'zips' and 'drops' subdirs.""" if not src_vendor: print("must define src_vendor") diff --git a/nototools/lang_data.py b/nototools/lang_data.py index e94214cb..9b37b728 100755 --- a/nototools/lang_data.py +++ b/nototools/lang_data.py @@ -45,9 +45,9 @@ def is_excluded_script(script_code): def script_includes(script_code): """Returns a set of script codes 'included' by the provided one. Intended to - deal with script codes like 'Jpan' used to describe writing systems that - use/require multiple scripts. The script code itself (and other subsets) - are also included in the result.""" + deal with script codes like 'Jpan' used to describe writing systems that + use/require multiple scripts. The script code itself (and other subsets) + are also included in the result.""" if script_code not in scripts(): raise ValueError("!not a script code: %s" % script_code) if script_code == "Hrkt": @@ -61,9 +61,9 @@ def script_includes(script_code): def _create_lang_data(): """Generates language data from CLDR plus extensions. - Returns a mapping from lang to a tuple of: - - a set of scripts used in some region - - a set of scripts not used in any region.""" + Returns a mapping from lang to a tuple of: + - a set of scripts used in some region + - a set of scripts not used in any region.""" all_lang_scripts = collections.defaultdict(set) used_lang_scripts = collections.defaultdict(set) @@ -151,13 +151,13 @@ def _remove_keys_from_dict(keys, some_dict): def _create_script_to_default_lang(lang_script_data): """Iterates over all the scripts in lang_script_data, and returns a map - from each script to the default language code, generally based on cldr - likely subtag data. This assigns 'en' to Latn by fiat (cldr defaults to - 'und'). Some other scripts (e.g. Dsrt) just get 'und'. + from each script to the default language code, generally based on cldr + likely subtag data. This assigns 'en' to Latn by fiat (cldr defaults to + 'und'). Some other scripts (e.g. Dsrt) just get 'und'. - This checks that the default lang for a script actually uses that script - in lang_script_data, when the default lang is not 'und'. - """ + This checks that the default lang for a script actually uses that script + in lang_script_data, when the default lang is not 'und'. + """ script_to_default_lang = {} all_scripts = set() @@ -209,12 +209,12 @@ def _create_script_to_default_lang(lang_script_data): def _create_lang_script_to_names(lang_script_data): """Generate a map from lang-script to English (and possibly native) names. - Whether the script is included in the name depends on the number of used - and unused scripts. If there's one used script, that script is omitted. - Else if there's no used script and one unused script, that script is - omitted. Else the script is included. If there's no English name for - the lang_script, it is excluded. - """ + Whether the script is included in the name depends on the number of used + and unused scripts. If there's one used script, that script is omitted. + Else if there's no used script and one unused script, that script is + omitted. Else the script is included. If there's no English name for + the lang_script, it is excluded. + """ lang_to_names = {} for lang in lang_script_data: diff --git a/nototools/lint_config.py b/nototools/lint_config.py index 5ebad2c9..60e58215 100755 --- a/nototools/lint_config.py +++ b/nototools/lint_config.py @@ -90,8 +90,8 @@ def parse_int_ranges(range_string, is_hex=True, sep=" "): """Returns a set of ints from a string of numbers or ranges separated by sep. - A range is two values separated by hyphen with no intervening separator. - Result can be empty if range_string is empty.""" + A range is two values separated by hyphen with no intervening separator. + Result can be empty if range_string is empty.""" result = set() count = 0 base = 16 if is_hex else 10 @@ -123,7 +123,7 @@ def parse_int_ranges(range_string, is_hex=True, sep=" "): def write_int_ranges(int_values, in_hex=True, sep=" "): """From a set or list of ints, generate a string representation that can - be parsed by parse_int_ranges to return the original values (not order_preserving).""" + be parsed by parse_int_ranges to return the original values (not order_preserving).""" if not int_values: return "" @@ -261,8 +261,8 @@ def __init__( version=None, ): """Each arg is either a string, or a pair of a fn of two args returning bool, and an object. - When the arg is a pair, the target string is passed to the fn as the first arg and the - second element of the pair is passed as the second arg.""" + When the arg is a pair, the target string is passed to the fn as the first arg and the + second element of the pair is passed as the second arg.""" self.filename = filename self.name = name @@ -503,7 +503,7 @@ class TestSpec(object): # 4: optional '--' followed by comment to end of line def _process_data(data): """data is a hierarchy of tags. any level down to root can be enabled or disabled. this - builds a representation of the tag hierarchy from the text description.""" + builds a representation of the tag hierarchy from the text description.""" _data_line_re = re.compile( r"(\s*)([a-z0-9_]+)(?:\s+([^\s]+)\s+([^\s]+))?\s*(?:--\s*(.+)\s*)?$" ) @@ -711,7 +711,7 @@ def check(self, tag): def valuetype(self, tag): """If the tag filters values, return the type of the value ('gid' or 'cp') - being filtered, or None.""" + being filtered, or None.""" if tag in self.tag_filters: return self.tag_filters[tag][0] return None diff --git a/nototools/mti_cmap_data.py b/nototools/mti_cmap_data.py index b4f1ec94..9f274f8c 100755 --- a/nototools/mti_cmap_data.py +++ b/nototools/mti_cmap_data.py @@ -63,8 +63,8 @@ def get_script_to_cmaps(csvdata): # would be difficult at this point, so we just note the addition. """This returns a map from 'script' to a tuple of cmap, xcmap where - xcmap is None if the header has not been checked, and contains the - marked codepoints otherwise (and might be empty).""" + xcmap is None if the header has not been checked, and contains the + marked codepoints otherwise (and might be empty).""" header = None data = None diff --git a/nototools/noto_cmap_reqs.py b/nototools/noto_cmap_reqs.py index 1bd7251c..4cbe4fd8 100755 --- a/nototools/noto_cmap_reqs.py +++ b/nototools/noto_cmap_reqs.py @@ -286,12 +286,12 @@ def finish(self): def _build_block_to_primary_script(): """Create a map from block to the primary script in a block. - If there are no characters defined in the block, it gets the script 'EXCL', - for 'exclude.' We don't define characters in this block. - If the most common script accounts for less than 80% of the defined characters - in the block, we use the primary from assigned_primaries, which might be None. - It's an error if there's no default primary and it's not listed in - assigned_primaries.""" + If there are no characters defined in the block, it gets the script 'EXCL', + for 'exclude.' We don't define characters in this block. + If the most common script accounts for less than 80% of the defined characters + in the block, we use the primary from assigned_primaries, which might be None. + It's an error if there's no default primary and it's not listed in + assigned_primaries.""" assigned_primaries = { "Basic Latin": "Latn", @@ -342,13 +342,19 @@ def _build_block_to_primary_script(): max_script = assigned_primaries[block] # print('assigning primary', block_info, '->', max_script) else: - print("ERROR: no inherited primary\n %s\n %s\n" % (block, block_info), file=sys.stderr) + print( + "ERROR: no inherited primary\n %s\n %s\n" % (block, block_info), + file=sys.stderr, + ) max_script = None elif max_script == "Zinh": if block in inherited_primaries: max_script = inherited_primaries[block] else: - print("ERROR: no inherited primary\n %s\n %s\n" % (block, block_info), file=sys.stderr) + print( + "ERROR: no inherited primary\n %s\n %s\n" % (block, block_info), + file=sys.stderr, + ) max_script = None block_to_script[block] = max_script return block_to_script @@ -367,7 +373,7 @@ def _primary_script_for_block(block): def _remove_unicode_assignments(cmap_ops): """The starting point is based on the script and script extensions data from - Unicode. Sometimes the assignments seem premature.""" + Unicode. Sometimes the assignments seem premature.""" cmap_ops.phase("remove unicode assignments") # Jelle says A8F1 makes no sense for Bengali since other characters needed @@ -378,7 +384,7 @@ def _remove_unicode_assignments(cmap_ops): def _unassign_inherited_and_common_with_extensions(cmap_ops): """Inherited and common characters with an extension that is neither of - these get removed from inherited/common scripts.""" + these get removed from inherited/common scripts.""" def remove_cps_with_extensions(script): for cp in cmap_ops.script_chars(script): @@ -395,7 +401,7 @@ def remove_cps_with_extensions(script): def _reassign_inherited(cmap_ops): """Assign all 'Zinh' chars to the primary script in their block. - Fail if there's no primary script. 'Zinh' is removed from script_to_chars.""" + Fail if there's no primary script. 'Zinh' is removed from script_to_chars.""" cmap_ops.phase("reassign inherited") for cp in cmap_ops.script_chars("Zinh"): primary_script = _primary_script_for_block(unicode_data.block(cp)) @@ -412,7 +418,7 @@ def _reassign_inherited(cmap_ops): def _reassign_common(cmap_ops): """Move 'Zyyy' chars in blocks where 'Zyyy' is not primary to the primary - script.""" + script.""" cmap_ops.phase("reassign common") for cp in cmap_ops.script_chars("Zyyy"): primary_script = _primary_script_for_block(unicode_data.block(cp)) @@ -424,7 +430,7 @@ def _reassign_common(cmap_ops): def _unassign_latin(cmap_ops): """Remove some characters that extensions assigns to Latin but which we don't - need there.""" + need there.""" unwanted_latn = tool_utils.parse_int_ranges( """ 0951 0952 # devanagari marks @@ -795,8 +801,8 @@ def _reassign_symbols(cmap_ops): def _reassign_emoji(cmap_ops): """Reassign all emoji to emoji-color. Then assign all emoji with default - text presentation, plus those with variation selectors, plus select - others, to SYM2.""" + text presentation, plus those with variation selectors, plus select + others, to SYM2.""" cmap_ops.phase("reassign emoji") @@ -2823,7 +2829,7 @@ def _check_CJK(): def _assign_bidi_mirroring(cmap_ops): """Ensure that if a bidi mirroring char is in a font, its mirrored char - is too.""" + is too.""" cmap_ops.phase("bidi mirroring") script_to_chars = cmap_ops.create_script_to_chars() mirrored = unicode_data.mirrored_chars() @@ -2858,8 +2864,8 @@ def _unassign_lgc_from_symbols(cmap_ops): def _assign_programming_lang_symbols(cmap_ops): """Assign characters used in programming languages, which generally - should be in MONO and in some cases need to be compatible with math - in general.""" + should be in MONO and in some cases need to be compatible with math + in general.""" def add_mirrored(cps): mirrored_cps = set() @@ -2997,9 +3003,9 @@ def add_mirrored(cps): def _assign_symbols_from_groups(cmap_ops): """Use 'group data' to assign various symbols to Zmth, Zsym, SYM2, - MONO, MUSIC' based on character groups. This fine-tunes the block - assignments (some related symbols are scattered across blocks, - and symbols blocks are themselves mixed).""" + MONO, MUSIC' based on character groups. This fine-tunes the block + assignments (some related symbols are scattered across blocks, + and symbols blocks are themselves mixed).""" cmap_ops.phase("assign symbols from groups") with open("codepoint_groups.txt", "r") as f: @@ -3073,7 +3079,7 @@ def _assign_symbols_from_groups(cmap_ops): def _assign_mono(cmap_ops): """Monospace should be similar to LGC, with the addition of box drawing - and block elements. It should also include all CP437 codepoints.""" + and block elements. It should also include all CP437 codepoints.""" cmap_ops.phase("assign mono") lgc_chars = cmap_ops.script_chars("LGC") @@ -3106,9 +3112,9 @@ def _assign_sym2(cmap_ops): def _assign_math(cmap_ops): """No longer use STIX character set, we will just fallback for characters - not in math. However, we want much of math to work without fallback, for - instance we need character ranges for the combining marks, and want a serif - form of the ASCII, so we duplicate more than usual.""" + not in math. However, we want much of math to work without fallback, for + instance we need character ranges for the combining marks, and want a serif + form of the ASCII, so we duplicate more than usual.""" cmap_ops.phase("assign math") @@ -3197,7 +3203,7 @@ def _assign_math(cmap_ops): def _assign_dotted_circle(cmap_ops): """All scripts with combining marks should provide dotted circle (and provide - an appropriate rendering of the mark in combination with it).""" + an appropriate rendering of the mark in combination with it).""" cmap_ops.phase("assign dotted circle") def is_combining(cp): @@ -3254,7 +3260,7 @@ def _remove_unwanted(cmap_ops): def _assign_wanted(cmap_ops): """After we remove the characters we 'never want', add exceptions back in - to particular fonts.""" + to particular fonts.""" wanted_chars = { "LGC": "20bf feff", # Bitcoin (not in Unicode 9 data yet), BOM "MONO": "feff", # BOM diff --git a/nototools/noto_fonts.py b/nototools/noto_fonts.py index 0b35a97e..e859fed2 100644 --- a/nototools/noto_fonts.py +++ b/nototools/noto_fonts.py @@ -101,9 +101,9 @@ def preferred_script_name(script_key): def script_name_for_report(script_key): - return _script_key_to_report_name.get( - script_key, None - ) or preferred_script_name(script_key) + return _script_key_to_report_name.get(script_key, None) or preferred_script_name( + script_key + ) # NotoFont maps a font path to information we assume the font to have, based @@ -174,9 +174,7 @@ def script_name_for_report(script_key): _EXT_REGEX = re.compile(r".*\.(?:ttf|ttc|otf)$") -def get_noto_font( - filepath, family_name="Arimo|Cousine|Tinos|Noto", phase=3 -): +def get_noto_font(filepath, family_name="Arimo|Cousine|Tinos|Noto", phase=3): """Return a NotoFont if filepath points to a noto font, or None if we can't process the path.""" @@ -264,9 +262,7 @@ def get_noto_font( try: script = convert_to_four_letter(script) except ValueError: - sys.stderr.write( - "unknown script: %s for %s\n" % (script, filename) - ) + sys.stderr.write("unknown script: %s for %s\n" % (script, filename)) return None if not weight: @@ -276,10 +272,7 @@ def get_noto_font( is_UI_metrics = ( is_UI or style == "Emoji" - or ( - style == "Sans" - and script in noto_data.DEEMED_UI_SCRIPTS_SET - ) + or (style == "Sans" and script in noto_data.DEEMED_UI_SCRIPTS_SET) ) is_display = display == "Display" @@ -289,10 +282,7 @@ def get_noto_font( is_hinted = False else: hint_status = path.basename(filedir) - if ( - hint_status not in ["hinted", "unhinted"] - and "noto-source" not in filedir - ): + if hint_status not in ["hinted", "unhinted"] and "noto-source" not in filedir: # print >> sys.stderr, ( # 'unknown hint status for %s, defaulting to unhinted') % filedir pass @@ -484,9 +474,7 @@ def wws_family_id_to_name_parts(wws_id): # mono comes before CJK in the name if len(part_keys) > 2 and part_keys[2] == "mono": parts.append("Mono") - part_keys = part_keys[ - :2 - ] # trim mono so we don't try to add it again + part_keys = part_keys[:2] # trim mono so we don't try to add it again parts.append("CJK") if script == "hans": parts.append("sc") @@ -505,11 +493,7 @@ def wws_family_id_to_name_parts(wws_id): # Mono works as a script. The phase 2 'mono-mono' tag was special-cased # above so it won't get added a second time. script_name = preferred_script_name(script.title()) - script_name = ( - script_name.replace(" ", "") - .replace("'", "") - .replace("-", "") - ) + script_name = script_name.replace(" ", "").replace("'", "").replace("-", "") parts.append(script_name) if len(part_keys) > 2: extra = part_keys[2] @@ -549,9 +533,7 @@ def get_noto_fonts(paths=NOTO_FONT_PATHS): """Scan paths for fonts, and create a NotoFont for each one, returning a list of these. 'paths' defaults to the standard noto font paths, using notoconfig.""" - font_dirs = list( - filter(None, [tool_utils.resolve_path(p) for p in paths]) - ) + font_dirs = list(filter(None, [tool_utils.resolve_path(p) for p in paths])) print("Getting fonts from: %s" % font_dirs) all_fonts = [] @@ -566,8 +548,7 @@ def get_noto_fonts(paths=NOTO_FONT_PATHS): font = get_noto_font(filepath) if not font: sys.stderr.write( - "bad font filename in %s: '%s'.\n" - % ((font_dir, filename)) + "bad font filename in %s: '%s'.\n" % ((font_dir, filename)) ) continue @@ -646,8 +627,7 @@ def get_families(fonts): rep_member = rep_member or rep_backup if not rep_member: raise ValueError( - "Family %s does not have a representative font." - % family_id + "Family %s does not have a representative font." % family_id ) name = get_font_family_name(rep_member.filepath) @@ -697,10 +677,7 @@ def _all_noto_font_key_to_names(paths): ix = fontname.find("-") familyname = fontname if ix == -1 else fontname[:ix] wws_key = noto_font_to_wws_family_id(font) - if ( - wws_key_to_family_name.get(wws_key, familyname) - != familyname - ): + if wws_key_to_family_name.get(wws_key, familyname) != familyname: print( "!!! mismatching font names for key %s: %s and %s" % (wws_key, wws_key_to_family_name[wws_key], familyname) @@ -717,9 +694,7 @@ def test(paths): print(key, val) name = "".join(wws_family_id_to_name_parts(key)) if name != val: - raise Exception( - "!!! generated name %s does not match" % name - ) + raise Exception("!!! generated name %s does not match" % name) def main(): diff --git a/nototools/noto_lint.py b/nototools/noto_lint.py index c8b6cff9..ce14bd72 100755 --- a/nototools/noto_lint.py +++ b/nototools/noto_lint.py @@ -642,7 +642,10 @@ def pluralize_errmsg(count, is_error=True): ).encode("UTF-8") ) else: - print("%s <%s> %s" % (err_type[0], test_name, message.encode("UTF-8").decode("UTF-8"))) + print( + "%s <%s> %s" + % (err_type[0], test_name, message.encode("UTF-8").decode("UTF-8")) + ) sys.stdout.flush() _script_key_to_font_name = { @@ -2385,7 +2388,7 @@ def check_accessiblity(cmap): def get_lint_spec(spec_file, extra_specs): """Return a LintSpec from spec_file supplemented with extra_specs. - If spec_file is None, only use extra_specs.""" + If spec_file is None, only use extra_specs.""" spec = None if spec_file != "None": diff --git a/nototools/notoconfig.py b/nototools/notoconfig.py index e6182bbe..bc409182 100755 --- a/nototools/notoconfig.py +++ b/nototools/notoconfig.py @@ -52,8 +52,8 @@ def _setup(): """The config consists of lines of the form = . - values will hold a mapping from the to value. - Blank lines and lines starting with '#' are ignored.""" + values will hold a mapping from the to value. + Blank lines and lines starting with '#' are ignored.""" global _config_path paths = [path.expanduser("~/.notoconfig"), "/usr/local/share/noto/config"] @@ -84,7 +84,7 @@ def _setup(): def noto_tools(default=""): """Local path to nototools git repo. If this is called, we require config - to be set up.""" + to be set up.""" result = _values.get("noto_tools", default) if result: return result diff --git a/nototools/subset.py b/nototools/subset.py index 36ab4467..5ec261af 100755 --- a/nototools/subset.py +++ b/nototools/subset.py @@ -81,8 +81,7 @@ def subset_font(source_file, target_file, include=None, exclude=None, options=No def main(argv): - """Subset the first argument to second, dropping unused parts of the font. - """ + """Subset the first argument to second, dropping unused parts of the font.""" subset_font(argv[1], argv[2]) diff --git a/nototools/test_vertical_extents.py b/nototools/test_vertical_extents.py index 01c41a75..cd44dbb0 100755 --- a/nototools/test_vertical_extents.py +++ b/nototools/test_vertical_extents.py @@ -44,8 +44,7 @@ def _regular_expression_from_set(character_set): - """Returns a regexp matching any sequence of a set of input characters. - """ + """Returns a regexp matching any sequence of a set of input characters.""" character_set -= set(range(0x00, 0x20)) # Remove ASCII controls literal_list = [] @@ -60,7 +59,7 @@ def _regular_expression_from_set(character_set): def test_rendering(data, font_file_name, min_allowed, max_allowed, language=None): """Test the rendering of the input data in a given font. - + The input data is first filtered for sequences supported in the font. """ font_characters = coverage.character_set(font_file_name) @@ -85,7 +84,7 @@ def test_rendering_from_file( file_handle, font_file_name, min_allowed, max_allowed, language=None ): """Test the rendering of the contents of a file for vertical extents. - + Supports both text files and XTB files. """ @@ -104,7 +103,7 @@ def test_rendering_from_file( else: # Assume text file, with all the data as one large string - #input_data = input_data.decode("UTF-8") + # input_data = input_data.decode("UTF-8") pass # Now, input_data is just a long string, with new lines as separators. @@ -143,9 +142,11 @@ def main(argv): """Test vertical extents to make sure they stay within specified bounds.""" if len(argv) <= 1: - print("test_vertical_extents.py font.ttf [language [ymin ymax]] < sample_text.[txt|xtb]") + print( + "test_vertical_extents.py font.ttf [language [ymin ymax]] < sample_text.[txt|xtb]" + ) return - + font_file_name = argv[1] if len(argv) > 2: diff --git a/nototools/ttc_utils.py b/nototools/ttc_utils.py index 49340c71..17ad5ef9 100755 --- a/nototools/ttc_utils.py +++ b/nototools/ttc_utils.py @@ -38,14 +38,14 @@ class TTCFile(object): """Holds some information from the sfnt headers in a .ttc file. - - fonts is a list of FontEntry objects, in order. It holds - the format ('ttf' or 'otf') and a list of indices into the - tables list. - - tables is the list of TableEntry objects, in order. Each holds - the table tag, offset, and length. Offsets are relative to - the very start of the data. There is one entry for each unique - table in the ttc. - """ + - fonts is a list of FontEntry objects, in order. It holds + the format ('ttf' or 'otf') and a list of indices into the + tables list. + - tables is the list of TableEntry objects, in order. Each holds + the table tag, offset, and length. Offsets are relative to + the very start of the data. There is one entry for each unique + table in the ttc. + """ def __init__(self, data=None): if data: @@ -109,10 +109,10 @@ def ttcfile_dump(ttcfile): def ttc_dump(ttc): """Dumps the ttc information. - It provides a likely filename for each file, and lists the tables, providing - either the TableEntry data, or the table tag and index of the file that first - referenced the table. - """ + It provides a likely filename for each file, and lists the tables, providing + either the TableEntry data, or the table tag and index of the file that first + referenced the table. + """ names = ttc_filenames(ttc) table_map = {} @@ -143,11 +143,11 @@ def ttcfile_filenames(ttcfile): def ttc_filenames(ttc): """Returns likely filenames for each ttc file. - The filenames are based on the postscript name from the name table for each - font. When there is no information, the string '' is provided with - either 'ttf' or 'otf' in place of 'x' depending on the info in the sfnt - header. - """ + The filenames are based on the postscript name from the name table for each + font. When there is no information, the string '' is provided with + either 'ttf' or 'otf' in place of 'x' depending on the info in the sfnt + header. + """ names = [] for font in ttc.fonts: file_name = ttfont_filename(font) @@ -194,8 +194,8 @@ def ttc_namesfile_name(ttc_path): def ttcfile_build_from_namesfile(output_ttc_path, file_dir, namesfile_name=None): """Read names of files from namesfile and pass them to build_ttc to build - a .ttc file. The names file will default to one named after output_ttc and - located in file_dir.""" + a .ttc file. The names file will default to one named after output_ttc and + located in file_dir.""" output_ttc_path = tool_utils.resolve_path(output_ttc_path) if not namesfile_name: @@ -219,7 +219,7 @@ def ttcfile_build_from_namesfile(output_ttc_path, file_dir, namesfile_name=None) def ttcfile_extract(input_ttc_path, output_dir): """Extract .ttf/.otf fonts from a .ttc file, and return a list of the names of - the extracted fonts.""" + the extracted fonts.""" input_ttc_path = tool_utils.resolve_path(input_ttc_path) output_dir = tool_utils.ensure_dir_exists(output_dir) ttc = TTCollection(input_ttc_path) @@ -235,8 +235,8 @@ def ttcfile_extract_and_write_namesfile( input_ttc_path, output_dir, namesfile_name=None ): """Call ttcfile_extract and in addition write a file to output dir containing - the names of the extracted files. The name of the names file will default to - one based on the basename of the input path. It is written to output_dir.""" + the names of the extracted files. The name of the names file will default to + one based on the basename of the input path. It is written to output_dir.""" names = ttcfile_extract(input_ttc_path, output_dir) if not namesfile_name: namesfile_name = ttc_namesfile_name(input_ttc_path) diff --git a/nototools/unicode_data.py b/nototools/unicode_data.py index a6e10b11..d8a76650 100755 --- a/nototools/unicode_data.py +++ b/nototools/unicode_data.py @@ -96,9 +96,9 @@ def load_data(): """Loads the data files needed for the module. - Could be used by processes that care about controlling when the data is - loaded. Otherwise, data will be loaded the first time it's needed. - """ + Could be used by processes that care about controlling when the data is + loaded. Otherwise, data will be loaded the first time it's needed. + """ global _data_is_loaded if not _data_is_loaded: @@ -125,9 +125,9 @@ def load_data(): def name(char, *args): """Returns the name of a character. - Raises a ValueError exception if the character is undefined, unless an - extra argument is given, in which case it will return that argument. - """ + Raises a ValueError exception if the character is undefined, unless an + extra argument is given, in which case it will return that argument. + """ if isinstance(char, int): char = unichr(char) # First try and get the name from unidata, which is faster and supports @@ -187,8 +187,8 @@ def combining(char): def to_upper(char): """Returns the upper case for a lower case character. - This is not full upper casing, but simply reflects the 1-1 - mapping in UnicodeData.txt.""" + This is not full upper casing, but simply reflects the 1-1 + mapping in UnicodeData.txt.""" load_data() cp = _char_to_int(char) try: @@ -200,8 +200,7 @@ def to_upper(char): def canonical_decomposition(char): - """Returns the canonical decomposition of a character as a Unicode string. - """ + """Returns the canonical decomposition of a character as a Unicode string.""" load_data() char = _char_to_int(char) try: @@ -223,8 +222,8 @@ def script(char): def script_extensions(char): """Returns the script extensions property of a character. - The return value is a frozenset of four-letter script codes. - """ + The return value is a frozenset of four-letter script codes. + """ load_data() char = _char_to_int(char) try: @@ -265,7 +264,7 @@ def block_names(): def age(char): """Returns the age property of a character as a string. - Returns None if the character is unassigned.""" + Returns None if the character is unassigned.""" load_data() char = _char_to_int(char) try: @@ -351,7 +350,7 @@ def indic_syllabic_category(char): def create_script_to_chars(): """Returns a mapping from script to defined characters, based on script and - extensions, for all scripts.""" + extensions, for all scripts.""" load_data() result = collections.defaultdict(set) for cp in _defined_characters: @@ -406,8 +405,7 @@ def _folded_script_name(script_name): def script_code(script_name): - """Returns the four-letter ISO 15924 code of a script from its long name. - """ + """Returns the four-letter ISO 15924 code of a script from its long name.""" load_data() folded_script_name = _folded_script_name(script_name) try: @@ -459,12 +457,12 @@ def all_scripts(): def open_unicode_data_file(data_file_name): """Opens a Unicode data file. - Args: - data_file_name: A string containing the filename of the data file. + Args: + data_file_name: A string containing the filename of the data file. - Returns: - A file handle to the data file. - """ + Returns: + A file handle to the data file. + """ filename = path.join(_DATA_DIR_PATH, data_file_name) return codecs.open(filename, "r", "utf-8") @@ -472,27 +470,27 @@ def open_unicode_data_file(data_file_name): def _parse_code_ranges(input_data): """Reads Unicode code ranges with properties from an input string. - Reads a Unicode data file already imported into a string. The format is - the typical Unicode data file format with either one character or a - range of characters separated by a semicolon with a property value (and - potentially comments after a number sign, that will be ignored). + Reads a Unicode data file already imported into a string. The format is + the typical Unicode data file format with either one character or a + range of characters separated by a semicolon with a property value (and + potentially comments after a number sign, that will be ignored). - Example source data file: - http://www.unicode.org/Public/UNIDATA/Scripts.txt + Example source data file: + http://www.unicode.org/Public/UNIDATA/Scripts.txt - Example data: - 0000..001F ; Common # Cc [32] .. - 0020 ; Common # Zs SPACE + Example data: + 0000..001F ; Common # Cc [32] .. + 0020 ; Common # Zs SPACE - Args: - input_data: An input string, containing the data. + Args: + input_data: An input string, containing the data. - Returns: - A list of tuples corresponding to the input data, with each tuple - containing the beginning of the range, the end of the range, and the - property value for the range. For example: - [(0, 31, 'Common'), (32, 32, 'Common')] - """ + Returns: + A list of tuples corresponding to the input data, with each tuple + containing the beginning of the range, the end of the range, and the + property value for the range. For example: + [(0, 31, 'Common'), (32, 32, 'Common')] + """ ranges = [] line_regex = re.compile( r"^" @@ -522,26 +520,26 @@ def _parse_code_ranges(input_data): def _parse_semicolon_separated_data(input_data): """Reads semicolon-separated Unicode data from an input string. - Reads a Unicode data file already imported into a string. The format is - the Unicode data file format with a list of values separated by - semicolons. The number of the values on different lines may be different - from another. + Reads a Unicode data file already imported into a string. The format is + the Unicode data file format with a list of values separated by + semicolons. The number of the values on different lines may be different + from another. - Example source data file: - http://www.unicode.org/Public/UNIDATA/PropertyValueAliases.txt + Example source data file: + http://www.unicode.org/Public/UNIDATA/PropertyValueAliases.txt - Example data: - sc; Cher ; Cherokee - sc; Copt ; Coptic ; Qaac + Example data: + sc; Cher ; Cherokee + sc; Copt ; Coptic ; Qaac - Args: - input_data: An input string, containing the data. + Args: + input_data: An input string, containing the data. - Returns: - A list of lists corresponding to the input data, with each individual - list containing the values as strings. For example: - [['sc', 'Cher', 'Cherokee'], ['sc', 'Copt', 'Coptic', 'Qaac']] - """ + Returns: + A list of lists corresponding to the input data, with each individual + list containing the values as strings. For example: + [['sc', 'Cher', 'Cherokee'], ['sc', 'Copt', 'Coptic', 'Qaac']] + """ all_data = [] for line in input_data.split("\n"): line = line.split("#", 1)[0].strip() # remove the comment @@ -786,7 +784,7 @@ def _load_emoji_data(): # use that data, but still have to parse the line. def _read_emoji_data(lines): """Parse lines of emoji data and return a map from sequence to tuples of - name, age, type.""" + name, age, type.""" line_re = re.compile( r"(?:([0-9A-F ]+)|([0-9A-F]+\.\.[0-9A-F]+)\s*);\s*(%s)\s*;\s*([^#]*)\s*#\s*E?(\d+\.\d+).*" % "|".join(EMOJI_SEQUENCE_TYPES) @@ -830,10 +828,10 @@ def _read_emoji_data_file(filename): def _read_emoji_test_data(data_string): """Parse the emoji-test.txt data. This has names of proposed emoji that are - not yet in the full Unicode data file. Returns a list of tuples of - sequence, group, subgroup, name. + not yet in the full Unicode data file. Returns a list of tuples of + sequence, group, subgroup, name. - The data is a string.""" + The data is a string.""" line_re = re.compile( r"([0-9a-fA-F ]+)\s*;\s*(%s)\s*#\s*(?:[^\s]+)\s+(.*)\s*" % "|".join(_EMOJI_QUAL_TYPES) @@ -997,9 +995,9 @@ def _read_emoji_test_data(data_string): def _get_order_patch(order_text, seq_to_name): """Create a mapping from a key sequence to a list of sequence, name tuples. - This will be used to insert additional sequences after the key sequence - in the order data. seq_to_name is a mapping from new sequence to name, - so the names don't have to be duplicated in the order data.""" + This will be used to insert additional sequences after the key sequence + in the order data. seq_to_name is a mapping from new sequence to name, + so the names don't have to be duplicated in the order data.""" patch_map = {} patch_key = None @@ -1041,11 +1039,11 @@ def _get_android_order_patch(): def _apply_order_patch(patch, group_list): """patch is a map from a key sequence to list of sequence, name pairs, and - group_list is an ordered list of sequence, group, subgroup, name tuples. - Iterate through the group list appending each item to a new list, and - after appending an item matching a key sequence, also append all of its - associated sequences in order using the same group and subgroup. - Return the new list. If there are any unused patches, raise an exception.""" + group_list is an ordered list of sequence, group, subgroup, name tuples. + Iterate through the group list appending each item to a new list, and + after appending an item matching a key sequence, also append all of its + associated sequences in order using the same group and subgroup. + Return the new list. If there are any unused patches, raise an exception.""" result = [] patched = set() @@ -1101,9 +1099,9 @@ def _load_emoji_group_data(): def get_emoji_group_data(seq): """Return group data for the canonical sequence seq, or None. - Group data is a tuple of index, group, subgroup, and name. The - index is a unique global sort index for the sequence among all - sequences in the group data.""" + Group data is a tuple of index, group, subgroup, and name. The + index is a unique global sort index for the sequence among all + sequences in the group data.""" _load_emoji_group_data() return _emoji_group_data.get(seq, None) @@ -1122,7 +1120,7 @@ def get_emoji_groups(): def get_emoji_subgroups(group): """Return the subgroups of this group, in order, or None - if the group is not recognized.""" + if the group is not recognized.""" _load_emoji_group_data() subgroups = [] subgroup = None @@ -1136,8 +1134,8 @@ def get_emoji_subgroups(group): def get_emoji_in_group(group, subgroup=None): """Return the sorted list of the emoji sequences in the group (limiting to - subgroup if subgroup is not None). Returns None if group does not - exist, and an empty list if subgroup does not exist in group.""" + subgroup if subgroup is not None). Returns None if group does not + exist, and an empty list if subgroup does not exist in group.""" _load_emoji_group_data() result = None for seq, (index, g, sg, _) in _emoji_group_data.items(): @@ -1153,9 +1151,9 @@ def get_emoji_in_group(group, subgroup=None): def get_sorted_emoji_sequences(seqs): """Seqs is a collection of canonical emoji sequences. Returns a list of - these sequences in the canonical emoji group order. Sequences that are not - canonical are placed at the end, in unicode code point order. - """ + these sequences in the canonical emoji group order. Sequences that are not + canonical are placed at the end, in unicode code point order. + """ _load_emoji_group_data() return sorted(seqs, key=lambda s: (_emoji_group_data.get(s, 100000), s)) @@ -1247,9 +1245,9 @@ def add_data(data): def get_emoji_sequences(age=None, types=None): """Return the set of canonical emoji sequences, filtering to those <= age - if age is not None, and those with type in types (if not a string) or - type == types (if type is a string) if types is not None. By default - all sequences are returned, including those for single emoji.""" + if age is not None, and those with type in types (if not a string) or + type == types (if type is a string) if types is not None. By default + all sequences are returned, including those for single emoji.""" _load_emoji_sequence_data() result = _emoji_sequence_data.keys() @@ -1265,7 +1263,7 @@ def get_emoji_sequences(age=None, types=None): def get_emoji_sequence_data(seq): """Return a tuple of the name, age, and type for the (possibly non-canonical) - sequence, or None if not recognized as a sequence.""" + sequence, or None if not recognized as a sequence.""" _load_emoji_sequence_data() seq = get_canonical_emoji_sequence(seq) @@ -1276,15 +1274,15 @@ def get_emoji_sequence_data(seq): def get_emoji_sequence_name(seq): """Return the name of the (possibly non-canonical) sequence, or None if - not recognized as a sequence.""" + not recognized as a sequence.""" data = get_emoji_sequence_data(seq) return None if not data else data[0] def get_emoji_sequence_age(seq): """Return the age of the (possibly non-canonical) sequence, or None if - not recognized as a sequence. Proposed sequences have PROPOSED_EMOJI_AGE - as the age.""" + not recognized as a sequence. Proposed sequences have PROPOSED_EMOJI_AGE + as the age.""" # floats are a pain since the actual values are decimal. maybe use # strings to represent age. data = get_emoji_sequence_data(seq) @@ -1293,21 +1291,21 @@ def get_emoji_sequence_age(seq): def get_emoji_sequence_type(seq): """Return the type of the (possibly non-canonical) sequence, or None if - not recognized as a sequence. Types are in EMOJI_SEQUENCE_TYPES.""" + not recognized as a sequence. Types are in EMOJI_SEQUENCE_TYPES.""" data = get_emoji_sequence_data(seq) return None if not data else data[2] def is_canonical_emoji_sequence(seq): """Return true if this is a canonical emoji sequence (has 'vs' where Unicode - says it should), and is known.""" + says it should), and is known.""" _load_emoji_sequence_data() return seq in _emoji_sequence_data def get_canonical_emoji_sequence(seq): """Return the canonical version of this emoji sequence if the sequence is - known, or None.""" + known, or None.""" if is_canonical_emoji_sequence(seq): return seq seq = strip_emoji_vs(seq) @@ -1316,8 +1314,8 @@ def get_canonical_emoji_sequence(seq): def strip_emoji_vs(seq): """Return a version of this emoji sequence with emoji variation selectors - stripped. This is the 'non-canonical' version used by the color emoji font, - which doesn't care how the sequence is represented in text.""" + stripped. This is the 'non-canonical' version used by the color emoji font, + which doesn't care how the sequence is represented in text.""" if EMOJI_VS in seq: return tuple([cp for cp in seq if cp != EMOJI_VS]) return seq @@ -1425,8 +1423,8 @@ def is_emoji_modifier_base(cp): def _load_unicode_emoji_variants(): """Parse StandardizedVariants.txt and initialize a set of characters - that have a defined emoji variant presentation. All such characters - also have a text variant presentation so a single set works for both.""" + that have a defined emoji variant presentation. All such characters + also have a text variant presentation so a single set works for both.""" global _emoji_variants, _emoji_variants_proposed if _emoji_variants: @@ -1471,10 +1469,10 @@ def _load_unicode_emoji_variants(): def get_unicode_emoji_variants(include_proposed="proposed"): """Returns the emoji characters that have both emoji and text presentations. - If include_proposed is 'proposed', include the ones proposed in 2016/08. If - include_proposed is 'proposed_extra', also include the emoji Noto proposes - for text presentation treatment to align related characters. Else - include_proposed should resolve to boolean False.""" + If include_proposed is 'proposed', include the ones proposed in 2016/08. If + include_proposed is 'proposed_extra', also include the emoji Noto proposes + for text presentation treatment to align related characters. Else + include_proposed should resolve to boolean False.""" _load_unicode_emoji_variants() if not include_proposed: return _emoji_variants @@ -1492,12 +1490,12 @@ def get_unicode_emoji_variants(include_proposed="proposed"): def _load_variant_data(): """Parse StandardizedVariants.txt and initialize all non-emoji variant - data. The data is a mapping from codepoint to a list of tuples of: - - variant selector - - compatibility character (-1 if there is none) - - shaping context (bitmask, 1 2 4 8 for isolate initial medial final) - The compatibility character is for cjk mappings that map to 'the same' - glyph as another CJK character.""" + data. The data is a mapping from codepoint to a list of tuples of: + - variant selector + - compatibility character (-1 if there is none) + - shaping context (bitmask, 1 2 4 8 for isolate initial medial final) + The compatibility character is for cjk mappings that map to 'the same' + glyph as another CJK character.""" global _variant_data, _variant_data_cps if _variant_data: @@ -1559,7 +1557,7 @@ def variant_data_cps(): def _load_proposed_emoji_data(): """Parse proposed-emoji.txt if it exists to get cps/names of proposed emoji - (but not approved) for this version of Unicode.""" + (but not approved) for this version of Unicode.""" global _proposed_emoji_data, _proposed_emoji_data_cps if _proposed_emoji_data: @@ -1622,7 +1620,7 @@ def read_codeset(text): def codeset(cpname): """Return a set of the unicode codepoints in the code page named cpname, or - None.""" + None.""" filename = ("%s.txt" % cpname).upper() filepath = path.join( path.dirname(__file__), os.pardir, "third_party", "unicode", filename diff --git a/nototools/update_udhr_samples.py b/nototools/update_udhr_samples.py index 847a9a51..4800c63d 100755 --- a/nototools/update_udhr_samples.py +++ b/nototools/update_udhr_samples.py @@ -54,7 +54,7 @@ def fetch_udhr(fetch_dir): def update_udhr(udhr_dir, fetch_dir, in_repo): """Delete udhr_dir and rebuild with files extracted from udhr_xml.zip - in fetch_dir. Stage if udhr_dir is in the repo.""" + in fetch_dir. Stage if udhr_dir is in the repo.""" zippath = os.path.join(fetch_dir, UDHR_XML_ZIP_NAME) tool_utils.check_file_exists(zippath) @@ -80,14 +80,14 @@ def update_udhr(udhr_dir, fetch_dir, in_repo): def parse_index(src_dir): """Parse the index.xml file in src_dir and return a map from bcp to a set of - file codes, and a map from file code to ohchr code. + file codes, and a map from file code to ohchr code. - Skip files at stages 1 (missing) or 2 (not started). Stage 3 files have - article 1, which is what we want. Stage 4 and 5 are ok, the vast majority are - unreviewed (4). + Skip files at stages 1 (missing) or 2 (not started). Stage 3 files have + article 1, which is what we want. Stage 4 and 5 are ok, the vast majority are + unreviewed (4). - In some cases more than one file is mapped to the same bcp47 code, this gets - dealt with in fix_index.""" + In some cases more than one file is mapped to the same bcp47 code, this gets + dealt with in fix_index.""" tree = ET.parse(os.path.join(src_dir, "index.xml")) bcp_to_codes = collections.defaultdict(set) @@ -153,16 +153,47 @@ def parse_index(src_dir): # We also need to make sure we don't assign a new code that udhr already uses. BCP_FIXES = { - "acu-Latn": {"acu": "acu-Latn", "acu_1": None,}, - "ak": {"ak_asante": "ak", "ak_fante": "ak-fante",}, - "chr-Cher": {"chr_cased": "chr-Cher-cased", "chr_uppercase": "chr-Cher-monocase",}, - "cjk-Latn": {"cjk": "cjk-Latn", "cjk_AO": "cjk-Latn-AO",}, - "ht-Latn": {"hat_popular": "ht-Latn-popular", "hat_kreyol": "ht-Latn-kreyol",}, - "hus-Latn": {"hus": "hus-Latn", "hva": None, "hsf": None,}, - "kg-Latn": {"kng": "kg-Latn", "kng_AO": "kg-Latn-AO",}, - "kmb-Latn": {"009": None, "kmb": "kmb-Latn",}, - "la-Latn": {"lat": "la-Latn", "lat_1": None,}, - "ln-Latn": {"lin_tones": "ln-Latn", "lin": None,}, + "acu-Latn": { + "acu": "acu-Latn", + "acu_1": None, + }, + "ak": { + "ak_asante": "ak", + "ak_fante": "ak-fante", + }, + "chr-Cher": { + "chr_cased": "chr-Cher-cased", + "chr_uppercase": "chr-Cher-monocase", + }, + "cjk-Latn": { + "cjk": "cjk-Latn", + "cjk_AO": "cjk-Latn-AO", + }, + "ht-Latn": { + "hat_popular": "ht-Latn-popular", + "hat_kreyol": "ht-Latn-kreyol", + }, + "hus-Latn": { + "hus": "hus-Latn", + "hva": None, + "hsf": None, + }, + "kg-Latn": { + "kng": "kg-Latn", + "kng_AO": "kg-Latn-AO", + }, + "kmb-Latn": { + "009": None, + "kmb": "kmb-Latn", + }, + "la-Latn": { + "lat": "la-Latn", + "lat_1": None, + }, + "ln-Latn": { + "lin_tones": "ln-Latn", + "lin": None, + }, "ny-Latn": { "nya_chinyanja": "ny-Latn-chinyan", # max 8 chars in bcp47 "nya_chechewa": "ny-Latn-chechewa", @@ -176,23 +207,45 @@ def parse_index(src_dir): "oci_4": None, "prv": None, }, - "pov-Latn": {"008": None, "pov": "pov-Latn",}, - "ro-Latn": {"ron_2006": "ro-Latn", "ron_1993": None, "ron_1953": None,}, - "rom-Latn": {"rmn": "rom-Latn", "rmn_1": None,}, - "th-Thai": {"tha": "th-Thai", "tha2": None,}, - "ts-Latn": {"tso_MZ": "ts-Latn-MZ", "tso_ZW": "ts-Latn-ZW",}, - "umb-Latn": {"011": None, "umb": "umb-Latn",}, - "ur-Arab": {"urd": "ur-Arab", "urd_2": None,}, + "pov-Latn": { + "008": None, + "pov": "pov-Latn", + }, + "ro-Latn": { + "ron_2006": "ro-Latn", + "ron_1993": None, + "ron_1953": None, + }, + "rom-Latn": { + "rmn": "rom-Latn", + "rmn_1": None, + }, + "th-Thai": { + "tha": "th-Thai", + "tha2": None, + }, + "ts-Latn": { + "tso_MZ": "ts-Latn-MZ", + "tso_ZW": "ts-Latn-ZW", + }, + "umb-Latn": { + "011": None, + "umb": "umb-Latn", + }, + "ur-Arab": { + "urd": "ur-Arab", + "urd_2": None, + }, } def fix_index(bcp_to_codes): """Take a mapping from bcp47 to a set of file codes, and - select the mappings we want using a allowlist. We return - a mapping from one bcp47 code to one file code. + select the mappings we want using a allowlist. We return + a mapping from one bcp47 code to one file code. - We use this opportunity to validate the allowlist, and if there are - any errors, we fail once we're finished.""" + We use this opportunity to validate the allowlist, and if there are + any errors, we fail once we're finished.""" errors = [] used_fixes = set() result = {} @@ -274,7 +327,7 @@ def fix_index(bcp_to_codes): def add_likely_scripts(bcp_to_code): """Add script subtags where they are not present in the bcp code. If - we don't know the script""" + we don't know the script""" result = {} for bcp, code in bcp_to_code.items(): if code in CODE_TO_BCP: @@ -336,9 +389,9 @@ def filter_bcp_to_code(bcp_to_code): def add_default_lang_script(bcp_to_code_attrib_sample): """When we query this data, typically we have only language and - script. Some of the bcp codes have variants or regions as well, and in - particular sometimes none of these has just a language and script. - Select one of these to be used for that, and update the map.""" + script. Some of the bcp codes have variants or regions as well, and in + particular sometimes none of these has just a language and script. + Select one of these to be used for that, and update the map.""" errors = [] options = collections.defaultdict(set) @@ -397,22 +450,22 @@ def get_code_to_attrib(src_dir): def get_bcp_to_code_attrib_sample(src_dir, ohchr_dir): """Return a mapping from bcp47 to code (for debugging), attribution, and - sample. The process is: - 1) parse the index.xml file to determine a mapping from bcp47 to code. - the bcp47 code has at least lang and script, and perhaps region/variant. - Multiple codes might share the same bcp47 code. - 2) Use a allowlist to fix cases where a bcp47 code maps to multiple codes, - either by selecting one code, or assigning a separate bcp47 value - to other codes. - 3) Load samples for each bcp47 code using article 1 from the file - identified by the code. If there is no article 1, skip that bcp47 code. - 4) Do more checking on the samples to make sure they look legit and - in particular contain only the scripts we expect them to have based - on the script code in the bcp47 code. - 5) Add an attribution based on the code and the attributions file. - 6) Find cases where all the bcp47's sharing a lang and script have - regions and/or variants, and select one of these to assign to - the lang_script bcp47 code.""" + sample. The process is: + 1) parse the index.xml file to determine a mapping from bcp47 to code. + the bcp47 code has at least lang and script, and perhaps region/variant. + Multiple codes might share the same bcp47 code. + 2) Use a allowlist to fix cases where a bcp47 code maps to multiple codes, + either by selecting one code, or assigning a separate bcp47 value + to other codes. + 3) Load samples for each bcp47 code using article 1 from the file + identified by the code. If there is no article 1, skip that bcp47 code. + 4) Do more checking on the samples to make sure they look legit and + in particular contain only the scripts we expect them to have based + on the script code in the bcp47 code. + 5) Add an attribution based on the code and the attributions file. + 6) Find cases where all the bcp47's sharing a lang and script have + regions and/or variants, and select one of these to assign to + the lang_script bcp47 code.""" bcp_to_codes, code_to_ohchr = parse_index(src_dir) bcp_to_code = fix_index(bcp_to_codes) @@ -445,7 +498,7 @@ def print_bcp_to_code_attrib_sample(bcp_to_code_attrib_sample): def extract_para(src_path): """Extract the text of article 1 from the sample, or None if we can't find - it.""" + it.""" tree = ET.parse(src_path) root = tree.getroot() ns = {"udhr": "http://www.unhchr.ch/udhr"} @@ -583,9 +636,9 @@ def info_data(bad_scripts, script_data): def check_bcp_to_sample(bcp_to_sample): """For each bcp/sample pair, check the sample script histogram. If - anything looks funny (mismatching scripts, bcp script doesn't match - histogram), delete the mapping, and if there are any rejects, at the - end report them.""" + anything looks funny (mismatching scripts, bcp script doesn't match + histogram), delete the mapping, and if there are any rejects, at the + end report them.""" errors = [] for bcp in sorted(bcp_to_sample): @@ -603,9 +656,9 @@ def check_bcp_to_sample(bcp_to_sample): def update_samples(sample_dir, udhr_dir, bcp_to_code_attrib_sample, in_repo, no_stage): """Create samples in sample_dir based on the bcp to c_a_s map. Stage - if sample_dir is in the repo. If sample_dir is in the repo, don't - overwrite samples whose most recent log entry does not start with - 'Updated by tool'.""" + if sample_dir is in the repo. If sample_dir is in the repo, don't + overwrite samples whose most recent log entry does not start with + 'Updated by tool'.""" tool_utils.check_dir_exists(udhr_dir) @@ -669,7 +722,7 @@ def update_samples(sample_dir, udhr_dir, bcp_to_code_attrib_sample, in_repo, no_ def get_scripts(text): """Return the set of scripts in this text. Excludes - some common chars.""" + some common chars.""" # ignore these chars, we assume they are ok in any script exclusions = {0x00, 0x0A, 0x0D, 0x20, 0xA0, 0xFEFF} zyyy_chars = set() @@ -688,7 +741,7 @@ def get_scripts(text): def get_script_histogram(utext): """Return a map from script to character count + chars, excluding some common - whitespace, and inherited characters. utext is a unicode string.""" + whitespace, and inherited characters. utext is a unicode string.""" exclusions = {0x00, 0x0A, 0x0D, 0x20, 0xA0, 0xFEFF} result = {} for cp in utext: @@ -768,8 +821,8 @@ def test_sample_scripts(sample_dir): def compare_samples(base_dir, trg_dir, trg_to_base_name=lambda x: x, opts=None): """Report on differences between samples in base and target directories. - The trg_to_base_name fn takes a target file name and returns the source - file name to use in the comparisons.""" + The trg_to_base_name fn takes a target file name and returns the source + file name to use in the comparisons.""" if not os.path.isdir(base_dir): print("Original sample dir '%s' does not exist" % base_dir) diff --git a/requirements.txt b/requirements.txt index bd10107a..cfe185e8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ afdko==3.4.0 appdirs==1.4.4 attrs==19.3.0 -black==19.10b0 +black==21.8b0 booleanOperations==0.9.0 Brotli==1.0.7 click==7.1.2