diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..615b7f9 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,9 @@ +[submodule "annotations/from-icann"] + path = annotations/from-icann + url = https://github.com/icann/icann-annotations.git +[submodule "annotations/wes-hardaker"] + path = annotations/wes-hardaker + url = https://github.com/hardaker/dns-rfc-annotations.git +[submodule "annotations/isc"] + path = annotations/isc + url = https://gitlab.isc.org/isc-projects/rfc-annotations.git diff --git a/Makefile b/Makefile index 2f0c39b..49c26b5 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,6 @@ CURRENT_CONTAINER=$(shell docker ps -aq --filter name=${CONTAINERNAME}) CURRENT_IMAGE=$(shell docker image list --filter reference=${IMAGENAME} -q) all: folders - python3 program/pull_updates.py RFC_INDEX="YES" python3 -u program/main.py generated-html local-config raw-originals raw-originals/drafts annotations/_generated: @@ -14,7 +13,6 @@ generated-html local-config raw-originals raw-originals/drafts annotations/_gene folders: generated-html local-config raw-originals raw-originals/drafts annotations/_generated annotations: folders - python3 program/pull_updates.py RFC_FETCH_FILES="NO" python3 -u program/main.py test: tests folders @@ -54,4 +52,4 @@ docker-remove: clean: docker-remove rm -rf generated-html/rfc*.html && rm -rf raw-originals && rm -rf annotations/_generated && rm -rf .pytest_cache \ - rm -rf program/__pycache__ \ No newline at end of file + rm -rf program/__pycache__ diff --git a/README.md b/README.md index 2593fc7..dabc751 100644 --- a/README.md +++ b/README.md @@ -105,9 +105,10 @@ Annotations that are automatically generated by the tool are put in `annotations it is unlikely that you want to touch that folder. You can add local annotations by adding subdirectories to the `annotations/` directory. -The tool will collect annotations from the sources listed in the `annotation-sources.txt` configuration file. -The default file comes with sources of DNS-related annotations created by ICANN (such as errata and updates discussion) -as well as outside sources that have told ICANN they want to be included. +The directory already contains some directories initialized as git submodules. +Be sure that the submodules are initialized with `git submodule update --init`. +One submodule in the `annotations/from-icann` directory contains DNS-related annotations created by ICANN (such as errata and updates discussion). +The other submodule directories come from outside sources that have told ICANN they want to be included. See the README in the `annotations/` folder for a description of the format of the annotation files. diff --git a/annotations/from-icann b/annotations/from-icann new file mode 160000 index 0000000..f1eb594 --- /dev/null +++ b/annotations/from-icann @@ -0,0 +1 @@ +Subproject commit f1eb59443b9ae968cf2924233a6abb4db675d61a diff --git a/annotations/isc b/annotations/isc new file mode 160000 index 0000000..f47d672 --- /dev/null +++ b/annotations/isc @@ -0,0 +1 @@ +Subproject commit f47d672e5d5678a2cc84e4039e3c62558bed278b diff --git a/annotations/wes-hardaker b/annotations/wes-hardaker new file mode 160000 index 0000000..75da353 --- /dev/null +++ b/annotations/wes-hardaker @@ -0,0 +1 @@ +Subproject commit 75da35376372a88dbad196ee51c1590838c2974c diff --git a/default-config/annotation-sources.txt b/default-config/annotation-sources.txt deleted file mode 100644 index d528502..0000000 --- a/default-config/annotation-sources.txt +++ /dev/null @@ -1,15 +0,0 @@ -# This is an example of how configurations will work -# Lines starting with a "#" are ignored. Blank lines are ignored. -# Other lines are formatted as - -# The following repo is supplied by ICANN, and will presumably -# be updated by ICANN over time. It contains two directories, -# one for errata put in place in the RFCs, the other for -# explanations of how RFCs update other RFCs -# It is meant only as starting point for the RFC annotations -# tool project, not as the complete list of annotations one -# would ever want. - -https://github.com/icann/icann-annotations.git from-icann -https://github.com/hardaker/dns-rfc-annotations.git wes-hardaker -https://gitlab.isc.org/isc-projects/rfc-annotations.git isc diff --git a/program/drafts.py b/program/drafts.py index 5c46790..e50f01f 100644 --- a/program/drafts.py +++ b/program/drafts.py @@ -2,10 +2,9 @@ import os import subprocess from typing import Optional -from urllib.request import urlopen from xml.dom.minidom import Document, parseString, Element -import util # filtered_files, debug, info, error +import util # filtered_files, debug, info, error, urlopen ''' Read and process Internet Drafts for RFC annotations tools ''' @@ -77,7 +76,7 @@ def get_draft_status(directory: str, url: str = "https://www.ietf.org/id/all_id. if document is None: util.info(f"\nFetching draft status from source of truth {url}... ", end='') try: - text_content = urlopen(url).read().decode('utf-8') + text_content = util.urlopen(url).read().decode('utf-8') util.info(f"Retrieved {len(text_content)} chars of data. Parsing and converting...", end='') document = {} for entry in text_content.split("\n"): diff --git a/program/errata.py b/program/errata.py index 4557b05..4b03a0f 100644 --- a/program/errata.py +++ b/program/errata.py @@ -1,9 +1,8 @@ import json import os from typing import Optional -from urllib.request import urlopen -import util # correct_path, create_checksum, config_directories, debug, info, error +import util # correct_path, create_checksum, config_directories, debug, info, error, urlopen ''' Create errata for RFC annotations tools ''' @@ -22,7 +21,7 @@ def read_errata(path: str = ".", url: str = "https://www.rfc-editor.org/errata.j if document is None: util.info(f"\nFetching errata from source of truth {url}... ", end='') try: - json_content = urlopen(url).read() + json_content = util.urlopen(url).read() util.info("Done") if type(json_content) is bytes: util.debug(f"Retrieved {len(json_content)} bytes of data. Parsing... ", end='') diff --git a/program/pull_updates.py b/program/pull_updates.py deleted file mode 100755 index edf3dca..0000000 --- a/program/pull_updates.py +++ /dev/null @@ -1,151 +0,0 @@ -#!/usr/bin/env python3 -import os.path -import subprocess -import urllib.request -from pathlib import Path - -''' Program to get updates to annotations from remote locations ''' - - -def handle_git(this_url, target_dir): - # Do a git clone, if necessary; do a git pull if not - # If the directory does have a .git subdirectory, do a "git clone" - if not (target_dir / ".git").exists(): - try: - print(f"Cloning {this_url} into {str(target_dir)} for the first time.") - subprocess.run(f"git clone {this_url} {str(target_dir)}", shell=True, check=True) - except: - print(f"** Running the initial 'git clone {this_url} {str(target_dir)}' failed. Skipping.") - return - # Pull the contents - else: - try: - process = subprocess.run(f"cd {str(target_dir)} && git pull", shell=True, check=True, capture_output=True, - text=True) - if "Already up to date" in process.stdout: - print(f"Already up to date: {this_url}") - else: - print(f"Got updates for {this_url}") - except: - print(f"** Running 'cd {str(target_dir)} && git pull' failed. Skipping.") - - -# noinspection PyBroadException -def process_config_content(config): - # This is defined as a function so it can be called recursively - # Go through line-by-line - line_count = 0 - for this_line in config.splitlines(): - line_count += 1 - # Sanity checks for the config file - if this_line.startswith("#"): - continue - if this_line.strip() == "": - continue - if "\t" not in this_line: - exit(f"Line {line_count} in {str(config_location)} does not have a character. Exiting.") - this_url = None - this_dir = None - try: - this_url, this_dir = this_line.split("\t", maxsplit=1) - except: - exit(f"Line {line_count} in {str(config_location)} has more than one character. Exiting.") - # Be sure it looks like a URL or at least has a : for SSH-like descriptors - if ":" not in this_url: - exit(f"There is no ';' in the first field in line {line_count}: \"{this_url}\". Exiting.") - # Create the target directory if it is not already there - target_dir = Path(f"annotations/{this_dir}") - if not target_dir.exists(): - try: - target_dir.mkdir() - print(f"Made new directory {target_dir} for line {line_count} in {str(config_location)}") - except: - exit(f"Could not create {str(target_dir)} for line {line_count} in {str(config_location)}. Exiting.") - # It might be an SSH-style git account, so there will be no "//" - if "//" not in this_url: - if not this_url.endswith(".git"): - print(f"** There is an git-as-SSH URL in line {line_count}, \"{this_url}\", but it does not end " - "with \".git\". Skipping.") - continue - handle_git(this_url, target_dir) - continue - else: - (this_scheme, _) = this_line.split(":", maxsplit=1) - if this_scheme == "git": - print(f"** Line {line_count} has a full git URL of \"{this_url}\", but this program doesn't handle " - "\"git://\" URLs.") - print("** Instead, use an SSH-style URL for git, such as git@github.com:some_person/some_repo.git") - print("** or an https URL such as https://github.com/some_person/some_repo.git. Skipping.") - continue - elif this_scheme == "rsync": - if not has_rsync: - print(f"** There is an rsync URL in line {line_count}, \"{this_url}\", but there is no rsync " - "on in the path on this system. Skipping.") - continue - - # Do the rsync - try: - subprocess.run(f"rsync -av {this_url} {str(target_dir)}", shell=True, check=True, - capture_output=True, text=True) - print(f"Successful rsync for {this_url}") - except Exception as e: - print(f"The rsync URL in line {line_count}, \"{this_url}\", failed with {e}. Skipping.") - continue - elif this_scheme in ("http", "https"): - # Get the last part of the URL to see if it is .git - if this_url.endswith(".git"): - handle_git(this_url, target_dir) - continue - try: - with urllib.request.urlopen(this_url) as f: - web_contents = f.read().decode('latin-1') - except Exception as e: - print(f"** Error reading {this_url}: {e}. Skipping.") - continue - # See if it nees to be updated - filename_part = this_url.split("/")[-1] - out_name_path = Path(target_dir) / filename_part - if out_name_path.exists(): - with out_name_path.open(mode="rt") as in_f: - to_compare = in_f.read() - if to_compare == web_contents: - print(f"No need to update {this_url}") - continue - # Do the update - try: - with out_name_path.open(mode="wt") as out_f: - out_f.write(web_contents) - print(f"Wrote out new version of {this_url}") - continue - except Exception as e: - print(f"** Error when writing {str(out_name_path)}: {e}. Skipping.") - continue - else: - print(f"** Line {line_count} has an unknown URL type: \"{this_url}\". Skipping.") - continue - - -# Main program here -if __name__ == "__main__": - # Determine if they have rsync - p = subprocess.run("which rsync", capture_output=True, shell=True) - if p.stdout: - has_rsync = True # It found a rsync - else: - has_rsync = False - - # Name of config file - config_name = "annotation-sources.txt" - directories = ["local-config", "default-config"] - # Location of config file - for directory in directories: - config_location = os.path.join(directory, config_name) - if os.path.exists(config_location): - try: - config_content = Path(config_location).open(mode="rt").read() - process_config_content(config_content) - exit() - except UnicodeDecodeError: - exit(f"{config_location} does not appear to be a text file. Exiting.") - # Process the config content on the file from local - exit(f"Could not find config file {config_name} in {str(directories)}. Exiting.") diff --git a/program/rfcfile.py b/program/rfcfile.py index b81ab7c..341bb8b 100644 --- a/program/rfcfile.py +++ b/program/rfcfile.py @@ -1,7 +1,6 @@ import os -from urllib.request import urlopen -import util # correct_path, debug, info, error +import util # correct_path, debug, info, error, urlopen ''' Download the RFC files for RFC annotations tools ''' @@ -24,7 +23,7 @@ def download_rfcs(rfc_list: list, directory: str = "."): else: util.info(f"Downloading {rfc.ljust(7)}... ", end='') try: - content = urlopen(f"https://www.rfc-editor.org/rfc/{rfc}.txt").read() + content = util.urlopen(f"https://www.rfc-editor.org/rfc/{rfc}.txt").read() if type(content) is bytes: util.info(f"Retrieved {str(len(content)).rjust(6)} bytes of data.") with open(filename, "wb") as f: @@ -34,4 +33,4 @@ def download_rfcs(rfc_list: list, directory: str = "."): util.error(f"got unexpected fetching response data of type {type(content)}.") except Exception as e: util.error(f"can't download text file for {rfc}: {e}.") - util.info(f"All RFC documents handled.") \ No newline at end of file + util.info(f"All RFC documents handled.") diff --git a/program/util.py b/program/util.py index e689c79..ff135ef 100644 --- a/program/util.py +++ b/program/util.py @@ -3,6 +3,7 @@ import re import sys from typing import Optional +import urllib.request ''' Utility functions for RFC annotations tools ''' @@ -193,3 +194,6 @@ def means_true(s: str) -> bool: def config_directories() -> [str]: return ["default-config"] if _running_in_test else ["local-config", "default-config"] + +def urlopen(url): + return urllib.request.urlopen(urllib.request.Request(url, headers ={'User-Agent': 'Mozilla/5.0'}))