diff --git a/grip/app.py b/grip/app.py index 5c62e9d..3d52493 100644 --- a/grip/app.py +++ b/grip/app.py @@ -151,7 +151,11 @@ def __init__(self, source=None, auth=None, renderer=None, assets=None, # Initialize views self._styles_retrieved = False + self._scripts_retrieved = False self.before_request(self._retrieve_styles) + if self.renderer.user_content and self.render_math: + self.before_request(self._retrieve_assets) + self.render_math = False self.add_url_rule(asset_route, 'asset', self._render_asset) self.add_url_rule(asset_subpath, 'asset', self._render_asset) self.add_url_rule('/', 'render', self._render_page) @@ -228,7 +232,8 @@ def _render_page(self, subpath=None): user_content=self.renderer.user_content, wide_style=self.render_wide, style_urls=self.assets.style_urls, styles=self.assets.styles, autorefresh_url=autorefresh_url, - render_math=self.render_math, math_jax_url=self.math_jax_url) + render_math=self.render_math, math_jax_url=self.math_jax_url, + script_urls=self.assets.script_urls) def _render_refresh(self, subpath=None): if not self.autorefresh: @@ -362,6 +367,30 @@ def _retrieve_styles(self): if self.render_inline: self._inline_styles() + def _retrieve_assets(self): + """ + Retrieves the scripts URLs from the source and caches them. This + is called before the first request is dispatched. + """ + asset_url_path = url_for('asset') + if (request.path.startswith(asset_url_path) and + request.path.endswith(".js") or request.path.endswith(".woff")): + path_start_index = len(request.url_root)+len(asset_url_path)-1 + self.assets.cache_asset(request.url[path_start_index:]) + + if self._scripts_retrieved: + return + self._scripts_retrieved = True + + try: + self.assets.retrieve_scripts(url_for('asset')) + except Exception as ex: + if self.debug: + print(format_exc(), file=sys.stderr) + else: + print(' * Error: could not retrieve scripts:', ex, + file=sys.stderr) + def default_renderer(self): """ Returns the default renderer using the current config. diff --git a/grip/assets.py b/grip/assets.py index e65c428..5ef3149 100644 --- a/grip/assets.py +++ b/grip/assets.py @@ -18,7 +18,8 @@ from .constants import ( STYLE_URLS_SOURCE, STYLE_URLS_RES, STYLE_ASSET_URLS_RE, - STYLE_ASSET_URLS_SUB_FORMAT) + STYLE_ASSET_URLS_SUB_FORMAT, SCRIPT_FILENAMES_RES, + SCRIPT_URLS_SOURCE, SCRIPT_URLS_RES) from .vendor.six import add_metaclass @@ -34,6 +35,7 @@ def __init__(self, cache_path, style_urls=None, quiet=None): self.cache_path = cache_path self.style_urls = list(style_urls) if style_urls else [] self.styles = [] + self.script_urls = [] self.quiet = quiet def _strip_url_params(self, url): @@ -62,6 +64,14 @@ def retrieve_styles(self, asset_url_path): """ pass + @abstractmethod + def retrieve_scripts(self, asset_url_path): + """ + Get scripts URLs from the source HTML page and specified cached asset + URL path. + """ + pass + class GitHubAssetManager(ReadmeAssetManager): """ @@ -106,6 +116,40 @@ def _get_style_urls(self, asset_url_path): return urls + def _get_script_urls(self, asset_url_path): + """ + Gets the specified resource and parses all style URLs and their + assets in the form of the specified patterns. + """ + # Check cache + if self.cache_path: + cached = self._get_cached_script_urls(asset_url_path) + # Skip fetching styles if there's any already cached + if cached: + return cached + + # Find script URLs + r = requests.get(SCRIPT_URLS_SOURCE) + if not 200 <= r.status_code < 300: + print('Warning: retrieving script gave status code', + r.status_code, file=sys.stderr) + urls = [] + content = r.text + for script_urls_re in SCRIPT_URLS_RES: + print(re.findall(script_urls_re, content)) + urls.extend(re.findall(script_urls_re, content)) + if not urls: + print('Warning: no script found - see https://github.com/joeyespo/' + 'grip/issues/265', file=sys.stderr) + + # Cache the script and their assets + if self.cache_path: + is_cached = self._cache_contents(urls, asset_url_path) + if is_cached: + urls = self._get_cached_script_urls(asset_url_path) + + return urls + def _get_cached_style_urls(self, asset_url_path): """ Gets the URLs of the cached styles. @@ -122,6 +166,22 @@ def _get_cached_style_urls(self, asset_url_path): for style in cached_styles if style.endswith('.css')] + def _get_cached_script_urls(self, asset_url_path): + """ + Gets the URLs of the cached scripts. + """ + try: + cached_scripts = os.listdir(self.cache_path) + except IOError as ex: + if ex.errno != errno.ENOENT and ex.errno != errno.ESRCH: + raise + return [] + except OSError: + return [] + return [posixpath.join(asset_url_path, script) + for script in cached_scripts + if script.endswith('.js')] + def _cache_contents(self, style_urls, asset_url_path): """ Fetches the given URLs and caches their contents @@ -132,7 +192,7 @@ def _cache_contents(self, style_urls, asset_url_path): asset_urls = [] for style_url in style_urls: if not self.quiet: - print(' * Downloading style', style_url, file=sys.stderr) + print(' * Downloading style or script', style_url, file=sys.stderr) r = requests.get(style_url) if not 200 <= r.status_code < 300: print(' -> Warning: Style request responded with', @@ -193,3 +253,40 @@ def retrieve_styles(self, asset_url_path): if not asset_url_path.endswith('/'): asset_url_path += '/' self.style_urls.extend(self._get_style_urls(asset_url_path)) + + def cache_asset (self, asset_url): + if not asset_url.startswith('math_renderer/'): + asset_url = 'https://github.com/assets/%s' %asset_url + else: + asset_url = asset_url[len("math_renderer/"):] + asset_url = 'https://github.githubassets.com/static/%s' %asset_url + r = requests.get(asset_url, stream=True) + if not 200 <= r.status_code < 300: + print(' -> Warning: Asset request responded with', + r.status_code, file=sys.stderr) + print(' -> try to use the "--clear" option') + return + + filename = self.cache_filename(asset_url) + file_content = r.raw.read(decode_content=True) + + # Cache file if the download was successful + if not os.path.exists(self.cache_path): + os.makedirs(self.cache_path) + filename = safe_join(self.cache_path, filename) + with open(filename, 'wb') as f: + f.write(file_content) + + def retrieve_scripts(self, asset_url_path): + """ + Get script URLs from the source HTML page and specified cached + asset base URL. + """ + if not asset_url_path.endswith('/'): + asset_url_path += '/' + urls = self._get_script_urls(asset_url_path) + script_urls = [] + for script_filename_re in SCRIPT_FILENAMES_RES: + script_urls += [script_url for script_url in urls if + re.search(script_filename_re, script_url)] + self.script_urls.extend(script_urls) diff --git a/grip/constants.py b/grip/constants.py index 7febf5e..339d49a 100644 --- a/grip/constants.py +++ b/grip/constants.py @@ -26,6 +26,16 @@ # default URL for loading MathJax DEFAULT_MATH_JAX_URL = 'https://cdnjs.cloudflare.com/ajax/libs/mathjax/3.2.2/es5/tex-chtml.min.js' +# script tag parsing +SCRIPT_URLS_SOURCE = 'https://github.com/joeyespo/grip' +SCRIPT_FILENAMES_RES = [ + r'''/wp-runtime-[0-9a-zA-Z]*\.js$''', + r'''/element-registry-[0-9a-zA-Z]*\.js$''' +] +SCRIPT_URLS_RES = [ + r'''\"(https://github.githubassets.com/assets/wp-runtime-[0-9a-zA-Z]*\.js)\"''', + r'''\"(https://github.githubassets.com/assets/element-registry-[0-9a-zA-Z]*\.js)\"''' +] # Style parsing STYLE_URLS_SOURCE = 'https://github.com/joeyespo/grip' diff --git a/grip/templates/index.html b/grip/templates/index.html index 19c8f42..6525579 100644 --- a/grip/templates/index.html +++ b/grip/templates/index.html @@ -33,8 +33,8 @@ {%- endblock -%} {%- block scripts -%} -{%- if render_math %} - - -{%- endif %} + + + {%- endif %} + {%- for script_url in script_urls %} + + {%- endfor %} + {%- endblock -%} {%- block page -%}