You will be redirected to the main page within 3 seconds. If not redirected, please click here.
diff --git a/.all-contributorsrc b/.all-contributorsrc
deleted file mode 100644
index bc6a9103..00000000
--- a/.all-contributorsrc
+++ /dev/null
@@ -1,45 +0,0 @@
-{
- "files": [
- "README.md"
- ],
- "imageSize": 100,
- "commit": false,
- "contributorsPerLine": 7,
- "projectName": "al-folio",
- "projectOwner": "alshedivat",
- "repoType": "github",
- "repoHost": "https://github.com",
- "badgeTemplate": "[core_contributors]: https://img.shields.io/badge/core_contributors-<%= contributors.length %>-orange.svg 'Number of core contributors'",
- "contributorTemplate": "\">\" width=\"<%= options.imageSize %>px;\" alt=\"\"/>
<%= contributor.name %>",
- "skipCi": true,
- "contributors": [
- {
- "login": "alshedivat",
- "name": "Maruan",
- "avatar_url": "https://avatars.githubusercontent.com/u/2126561?v=4",
- "profile": "http://maruan.alshedivat.com",
- "contributions": [
- "design",
- "code"
- ]
- },
- {
- "login": "rohandebsarkar",
- "name": "Rohan Deb Sarkar",
- "avatar_url": "https://avatars.githubusercontent.com/u/50144004?v=4",
- "profile": "http://rohandebsarkar.github.io",
- "contributions": [
- "code"
- ]
- },
- {
- "login": "pourmand1376",
- "name": "Amir Pourmand",
- "avatar_url": "https://avatars.githubusercontent.com/u/32064808?v=4",
- "profile": "https://amirpourmand.ir",
- "contributions": [
- "code"
- ]
- }
- ]
-}
diff --git a/.gitattributes b/.gitattributes
deleted file mode 100644
index 24244739..00000000
--- a/.gitattributes
+++ /dev/null
@@ -1 +0,0 @@
-_config.yml merge=ours
diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
deleted file mode 100644
index c78502f4..00000000
--- a/.github/FUNDING.yml
+++ /dev/null
@@ -1,12 +0,0 @@
-# These are supported funding model platforms
-
-github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
-patreon: # Replace with a single Patreon username
-open_collective: # Replace with a single Open Collective username
-ko_fi: alshedivat
-tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
-community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
-liberapay: # Replace with a single Liberapay username
-issuehunt: # Replace with a single IssueHunt username
-otechie: # Replace with a single Otechie username
-custom: # ['https://www.buymeacoffee.com/TkFxuKo']
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
deleted file mode 100644
index 511f5851..00000000
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-name: Bug report
-about: Create a report to help us improve
-title: ''
-labels: bug
-assignees: ''
-
----
-
-**Acknowledge the following**
-- [ ] I carefully read and followed the [Getting Started](https://github.com/alshedivat/al-folio#getting-started) guide.
-- [ ] I read through [FAQ](https://github.com/alshedivat/al-folio#faq) and searched through the [past issues](https://github.com/alshedivat/al-folio/issues), none of which addressed my issue.
-- [ ] The issue I am raising is a potential bug in al-folio and not just a usage question.
[For usage questions, please post in the [Discussions](https://github.com/alshedivat/al-folio/discussions) instead of raising an issue.]
-
-**Describe the bug**
-A clear and concise description of what the bug is.
-
-**To Reproduce**
-Steps to reproduce the behavior:
-1. Go to '...'
-2. Click on '....'
-3. Scroll down to '....'
-4. See error
-
-**Expected behavior**
-A clear and concise description of what you expected to happen.
-
-**Screenshots**
-If applicable, add screenshots to help explain your problem.
-
-**System (please complete the following information):**
- - OS: [e.g. iOS]
- - Browser (and its version) [e.g. chrome, safari]
- - Jekyll version [e.g. 3.8.7]
-- Ruby version [e.g. 2.6.5]
-
-**Additional context**
-Add any other context about the problem here.
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
deleted file mode 100644
index 11fc491e..00000000
--- a/.github/ISSUE_TEMPLATE/feature_request.md
+++ /dev/null
@@ -1,20 +0,0 @@
----
-name: Feature request
-about: Suggest an idea for this project
-title: ''
-labels: enhancement
-assignees: ''
-
----
-
-**Is your feature request related to a problem? Please describe.**
-A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
-
-**Describe the solution you'd like**
-A clear and concise description of what you want to happen.
-
-**Describe alternatives you've considered**
-A clear and concise description of any alternative solutions or features you've considered.
-
-**Additional context**
-Add any other context or screenshots about the feature request here.
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
deleted file mode 100644
index 9ae75b4c..00000000
--- a/.github/pull_request_template.md
+++ /dev/null
@@ -1,23 +0,0 @@
-
-
-## OpenReview Submission Thread
-
-
-
-## Checklist before requesting a review
-
-- [ ] I am opening a pull request against the `accepted` branch of the `staging` repo.
-- [ ] I have de-anonymized my post, added author lists, etc.
-- [ ] My post matches the formatting requirements
- - [ ] I have a short 2-3 sentence abstract in the `description` field of my front-matter ([example](https://github.com/iclr-blogposts/staging/blob/aa15aa3797b572e7b7bb7c8881fd350d5f76fcbd/_posts/2022-12-01-distill-example.md?plain=1#L4-L5))
- - [ ] I have a table of contents, formatted using the `toc` field of my front-matter ([example](https://github.com/iclr-blogposts/staging/blob/aa15aa3797b572e7b7bb7c8881fd350d5f76fcbd/_posts/2022-12-01-distill-example.md?plain=1#L33-L42))
- - [ ] My bibliography is correctly formatted, using a `.bibtex` file as per the sample post
-
-## Changes implemented in response to reviewer feedback
-
-- [ ] Tick this box if you received a conditional accept
-- [ ] I have implemented the necessary changes in response to reviewer feedback (if any)
-
-
-
-## Any other comments
diff --git a/.github/stale.yml b/.github/stale.yml
deleted file mode 100644
index 8ec2004d..00000000
--- a/.github/stale.yml
+++ /dev/null
@@ -1,18 +0,0 @@
-# Number of days of inactivity before an issue becomes stale
-daysUntilStale: 60
-# Number of days of inactivity before a stale issue is closed
-daysUntilClose: 7
-# Issues with these labels will never be considered stale
-exemptLabels:
- - pinned
- - security
- - enhancement
-# Label to use when marking an issue as stale
-staleLabel: wontfix
-# Comment to post when marking an issue as stale. Set to `false` to disable
-markComment: >
- This issue has been automatically marked as stale because it has not had
- recent activity. It will be closed if no further activity occurs. Thank you
- for your contributions.
-# Comment to post when closing a stale issue. Set to `false` to disable
-closeComment: false
diff --git a/.github/workflows/deploy-docker-tag.yml b/.github/workflows/deploy-docker-tag.yml
deleted file mode 100644
index 3e6b6a3a..00000000
--- a/.github/workflows/deploy-docker-tag.yml
+++ /dev/null
@@ -1,40 +0,0 @@
-name: Docker Image CI (Upload Tag)
-
-on:
- push:
- tags:
- - 'v*'
-
-jobs:
-
- build:
-
- runs-on: ubuntu-latest
-
- steps:
- - name: Checkout
- uses: actions/checkout@v2
- - name: Buildx
- uses: docker/setup-buildx-action@v1
-
- -
- name: Docker meta
- id: meta
- uses: docker/metadata-action@v4
- with:
- images: amirpourmand/al-folio
-
- - name: Login
- uses: docker/login-action@v1
- with:
- username: ${{ secrets.DOCKER_USERNAME }}
- password: ${{ secrets.DOCKER_PASSWORD }}
-
- - name: Build and push
- uses: docker/build-push-action@v3
- with:
- context: .
- push: ${{ github.event_name != 'pull_request' }}
- tags: ${{ steps.meta.outputs.tags }}
- labels: ${{ steps.meta.outputs.labels }}
-
diff --git a/.github/workflows/deploy-image.yml b/.github/workflows/deploy-image.yml
deleted file mode 100644
index b747dfc1..00000000
--- a/.github/workflows/deploy-image.yml
+++ /dev/null
@@ -1,31 +0,0 @@
-name: Docker Image CI
-
-on:
- push:
- branches: [ master ]
-
-jobs:
-
- build:
-
- runs-on: ubuntu-latest
- if: github.repository_owner == 'alshedivat'
-
- steps:
- - name: Checkout
- uses: actions/checkout@v2
- - name: Buildx
- uses: docker/setup-buildx-action@v1
-
- - name: Login
- uses: docker/login-action@v1
- with:
- username: ${{ secrets.DOCKER_USERNAME }}
- password: ${{ secrets.DOCKER_PASSWORD }}
-
- - name: Build and push
- uses: docker/build-push-action@v2
- with:
- context: .
- push: true
- tags: amirpourmand/al-folio
diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
deleted file mode 100644
index dbe26a88..00000000
--- a/.github/workflows/deploy.yml
+++ /dev/null
@@ -1,43 +0,0 @@
-name: deploy
-
-on:
- push:
- branches:
- - master
- - main
- pull_request:
- branches:
- - master
- - main
- workflow_dispatch: {}
-
-jobs:
- deploy:
- runs-on: ubuntu-latest
- steps:
- - name: Checkout code
- uses: actions/checkout@v3
- - name: Setup Ruby
- uses: ruby/setup-ruby@v1
- with:
- ruby-version: '3.0.2'
- bundler-cache: true
- - name: Install deps
- run: |
- npm install -g mermaid.cli
- - name: Setup deploy options
- id: setup
- run: |
- git config --global user.name "GitHub Action"
- git config --global user.email "41898282+github-actions[bot]@users.noreply.github.com"
- if [[ ${GITHUB_REF} = refs/pull/*/merge ]]; then # pull request
- echo "SRC_BRANCH=${GITHUB_HEAD_REF}" >> $GITHUB_OUTPUT
- echo "NO_PUSH=--no-push" >> $GITHUB_OUTPUT
- elif [[ ${GITHUB_REF} = refs/heads/* ]]; then # branch, e.g. master, source etc
- echo "SRC_BRANCH=${GITHUB_REF#refs/heads/}" >> $GITHUB_OUTPUT
- fi
- echo "DEPLOY_BRANCH=gh-pages" >> $GITHUB_OUTPUT
- - name: Deploy website
- run: yes | bash bin/deploy --verbose ${{ steps.setup.outputs.NO_PUSH }}
- --src ${{ steps.setup.outputs.SRC_BRANCH }}
- --deploy ${{ steps.setup.outputs.DEPLOY_BRANCH }}
diff --git a/.nojekyll b/.nojekyll
new file mode 100644
index 00000000..e69de29b
diff --git a/404.html b/404.html
index 0da4ee0b..f558c339 100644
--- a/404.html
+++ b/404.html
@@ -1,9 +1 @@
----
-layout: page
-permalink: /404.html
-title: "Page not found"
-description: "Looks like there has been a mistake. Nothing exists here."
-redirect: true
----
-
-
You will be redirected to the main page within 3 seconds. If not redirected, please click here.
+You will be redirected to the main page within 3 seconds. If not redirected, please click here.
{{ content.name }} | -{{ content.value }} | -
{{ item.date | date: "%b %-d, %Y" }} | -
- {% if item.inline -%}
- {{ item.content | remove: ' ' | remove: ' ' | emojify }} - {%- else -%} - {{ item.title }} - {%- endif %} - |
-
---|
No news so far...
- {%- endif %} -{{ project.description }}
-{{ project.description }}
-{{ page.subtitle }}
-an archive of posts in this category
-{{ post.date | date: "%b %-d, %Y" }} | -- {{ post.title }} - | -
---|
an archive of posts with this tag
-{{ post.date | date: "%b %-d, %Y" }} | -- {{ post.title }} - | -
---|
an archive of posts from this year
-{{ post.date | date: "%b %-d, %Y" }} | -- {{ post.title }} - | -
---|
{{ page.description }}
-{{ page.description }}
-
- PLACEHOLDER FOR ACADEMIC ATTRIBUTION
-
-
- BibTeX citation
-
- PLACEHOLDER FOR BIBTEX
-
- -Weight decay is widely used in networks with Batch Normalization (Ioffe & Szegedy, -2015). In principle, weight decay regularization should have no effect in this case, since one -can scale the weights by a small factor without changing the network’s predictions. Hence, it -does not meaningfully constrain the network’s capacity. - -—Zhang et al., 2019 -- -* However, the experiments of the paper show that weight decay on layers with `(BN)` can nevertheless improve accuracy. The authors argue that this is due to an effectively larger learning rate. - -This blog post will summarize the development of weight decay specifically for Adam. -We try to shed some light on the following questions: - -1. What is the difference between Adam and its weight decay version AdamW? Does the existing literature give a clear answer to the question when (and why) AdamW performs better? -2. Is the weight decay mechanism of AdamW just *one more trick* or can we actually motivate it from an optimization perspective? -3. The last section is somewhat explorational: could we come up with different formulas for a weight decay version of Adam? By doing so, we will see that AdamW already combines several advantages for practical use. - - -### Notation - -We denote by $$\alpha > 0$$ the initial learning rate. We use $$\eta_t > 0$$ for a learning rate schedule multiplier. By this, the effective learning rate in iteration $$t$$ is $$\alpha \eta_t$$. We use $$\lambda > 0$$ for the weight decay parameter. - -## Adam - -Adam uses an exponentially moving average (EMA) of stochastic gradients, typically denoted by $$m_t$$, and of the elementwise squared gradients, denoted by $$v_t$$. - -We denote with $$\hat m_t$$ and $$\hat v_t$$ the EMA estimates with bias correction (see
- We provide empirical evidence that our proposed modification decouples the optimal choice of weight decay factor from the setting of the learning rate for both standard SGD and Adam [...]. - —Loshchilov and Hutter, 2019 -- -What the authors mean by *decoupling* is that if we plot the test accuracy as a heatmap of learning rate and weight decay, the areas with high accuracy are more rectangular; the best learing rate is not too sensitive to the choice of weight decay. We illustrate this conceptually in the plot below which is inspired by Figure 2 in
-Improving PDE solvers has trickle down benefits to a vast range of other fields. -- -Partial differential equations (PDEs) play a crucial role in modeling complex systems and understanding how they change over time and in space. - -They are used across physics and engineering, modeling a wide range of physical phenomena like heat transfer, sound waves, electromagnetism, and fluid dynamics, but they can also be used in finance to model the behavior of financial markets, in biology to model the spread of diseases, and in computer vision to model the processing of images. - -They are particularly interesting in deep learning! - -
-Ordinary differential equations (ODEs) describe how a function changes with respect to a single independent variable and its derivatives. In contrast, PDEs are mathematical equations that describe the behavior of a dependent variable as it changes with respect to multiple independent variables and their derivatives. -
--Formally, for one time dimension and possibly multiple spatial dimensions denoted by \(\textbf{x}=[x_{1},x_{2},x_{3},\text{...}]^{\top} \in \mathbb{X}\), a general (temporal) PDE may be written as -
--$$\partial_{t}\textbf{u}= F\left(t, \textbf{x}, \textbf{u},\partial_{\textbf{x}}\textbf{u},\partial_{\textbf{xx}}\textbf{u},\text{...}\right) \qquad (t,\mathbf{x}) \in [0,T] \times \mathbb{X}$$ -
--The \(\partial\) is a partial derivative operator which can be understood as "a small change in". For example, the \(\partial_{t}\textbf{u}\) term refers to how much an infinitesmally small change in \(t\) changes \(\textbf{u}\). Below is an explicit definition for some arbitrary function \(f(x,y)\): - -$$\frac{\partial f(x,y)}{\partial x} = \lim_{h \to 0} \frac{f(x+h,y) - f(x,y)}{h}$$ - -
-- - Many equations are solutions to such PDEs alone. For example, the wave equation is given by \(\partial_{tt}u = \partial_{xx}u\). You will find that any function in the form \(u(x,t)=F(x-ct)+\) \(G(x+ct)\) is a potential solution. Initial conditions are used to specify how a PDE "starts" in time, and boundary conditions determine the value of the solution at the boundaries of the region where the PDE is defined. - -
--\(\partial_{t} \mathbf{u} + \nabla \cdot \mathbf{J}(\mathbf{u}) = 0\) -
- -\(J\) is the flux, or the amount of some quantity that is flowing through a region at a given time
-\(\nabla \cdot J\) is the divergence of the flux, or the amount of outflow of the flux at a given point
--Very few PDEs have analytical solutions, so numerical methods have been developed to approximate PDE solutions over a wider range of potential problems. -- -#### Numerical Methods - -Often, approaches for temporal PDEs follow the method of lines (MOL). - -Every point of the discretization is then thought of as a separate ODE evolving in time, enabling the use of ODE solvers such as Runge-Kutta methods. - -
-In the most basic case (a regular grid), arbitrary spatial and temporal resolutions \(\mathbf{n_{x}}\) and \(n_{t}\) can be chosen and thus used to create a grid where \(\mathbf{n_{x}}\) is a vector containing a resolution for each spatial dimension. -
--The domain may also be irregularly sampled, resulting in a grid-free discretization. This is often the case with real-world data that comes from scattered sensors, for example. -
-Finite difference methods (FDMs) or any other discretization technique can be used to discretize the time domain. -
--One direction of ongoing research seeks to determine discretization methods which can result in more efficient numerical solvers (for example, take larger steps in flatter regions and smaller steps in rapidly changing regions). -
- --A popular choice when using a gridded discretization is the finite difference method (FDM). Spatial derivative operators are replaced by a stencil which indicates how values at a finite set of neighboring grid points are combined to approximate the derivative at a given position. This stencil is based on the Taylor series expansion. -
- --{% include figure.html path="assets/img/2023-05-01-autoregressive-neural-pde-solver/fdm_animation.gif" style="max-width:690px;height:auto;" %} -
- - - -
-The finite volume method (FVM) is another approach which works for irregular geometries. Rather than requiring a grid, the computation domain can be divided into discrete, non-overlapping control volumes used to compute the solution for that portion
-For every control volume, a set of equations describing the balance of some physical quantities (in essence, estimating the flux at control volume boundaries) can be solved which results in the approximated spatial derivative. -
- --While this method only works for conservation form equations, it can handle complex problems with irregular geometries and fluxes that are difficult to handle with other numerical techniques such as the FDM. -
-
-In the pseudospectral method (PSM), PDEs are solved pointwise in physical space by using basis functions to approximate the spatial derivatives
-These methods are well-suited for solving problems with smooth solutions and periodic boundary conditions, but their performance drops for irregular or non-smooth solutions, as well as problems with more degrees of freedom where their global nature results in high dimensional dense matrix computations. -
-User | -Structural | -Implementational | -
---|---|---|
Computation efficiency, computational cost, accuracy, guarantees (or uncertainty estimates), generalization across PDEs | -Spatial and temporal resolution, boundary conditions, domain sampling regularity, dimensionality | -Stability over long rollouts, preservation of invariants | -
-The countless combinations of requirements resulted in what Bartels defines as a splitter field
-These methods, while effective and mathematically proven, often come at high computation costs. Taking into account that PDEs often exhibit chaotic behaviour and are sensitive to any changes in their parameters, re-running a solver every time a coefficient or boundary condition changes in a single PDE can be computationally expensive. -
--One key example which limits grid-based classical solvers is the Courant-Friedrichs-Lewy (CFL) condition, which states that the maximum time step size should be proportional to the minimum spatial grid size. According to this condition, as the number of dimensions increases, the size of the temporal step must decrease and therefore numerical solvers become very slow for complex PDEs. -
-Algorithm | -Equation | -Boundary conditions | -Complexity | -
---|---|---|---|
Classical FDM/FEM/FVM | -general | -general | -poly\(((\frac{1}{\varepsilon})^{d})\) | -
Adaptive FDM/FEM |
- general | -general | -poly\(((\log(\frac{1}{\varepsilon}))^{d})\) | -
Spectral method |
- general | -general | -poly\(((\log(\frac{1}{\varepsilon}))^{d})\) | -
Sparse grid FDM/FEM |
- general | -general | -poly\(((\frac{1}{\varepsilon})(\log(\frac{1}{\varepsilon}))^{d})\) | -
Sparse grid spectral method |
- elliptic | -general | -poly\((\log(\frac{1}{\varepsilon})(\log \log(\frac{1}{\varepsilon}))^{d})\) | -
-Neural solvers offer some very desirable properties that may serve to unify some of this splitter field. Neural networks can learn and generalize to new contexts such as different initial/boundary conditions, coefficients, or even different PDEs entirely
-Though most methods lie along a spectrum from classical leaning to end-to-end neural, a naive yet illustrative categorization into three groupings is shown below. -
--{% include figure.html path="assets/img/2023-05-01-autoregressive-neural-pde-solver/PDEchart.png" style="max-width:690px;height:auto;" %} -
- -#### Fully Neural/Universal Function Approximators - -The term fully neural here refers to methods which rely on the universal function approximation theory such that a sufficiently complex network can represent any arbitrary function. Many common fully neural methods are also known as neural operators which model the solution of a PDE as an operator that maps inputs to outputs. The problem is set such that a neural operator $$\mathcal{M}$$ satisfies $$\mathcal{M}(t,\mathbf{u}^{0}) = \mathbf{u}(t)$$ where $$\mathbf{u}^{0}$$ are the initial conditions- These global integral operators (implemented as Fourier space convolutional operators) are combined with local nonlinear activation functions, resulting in an architecture which is highly expressive yet computationally efficient, as well as being resolution-invariant. -
-
- While the vanilla FNO required the input function to be defined on a grid due to its reliance on the FFT, further work developed mesh-independent variations as well
- - Convolution Theorem - -
-- The Fourier transform of the convolution of two signals is equal to the pointwise product of their individual Fourier transforms -
--{% include figure.html path="assets/img/2023-05-01-autoregressive-neural-pde-solver/FNO.png" style="max-width:80%;height:auto;" %} -
- - --Neural operators are able to operate on multiple domains and can be completely data-driven. -
--However, these models do not tend to predict out-of-distribution \(t\) and are therefore limited when dealing with temporal PDEs. Another major barrier is their relative lack of interpretability and guarantees compared to classical solvers. -
- -#### Neural-Augmented Classical Methods - -A parallel line of research involves using deep learning as a tool to improve classical numerical methods for solving PDEs. One avenue involves modifying existing iterative methods: while neural operator methods directly mapped inputs to outputs, autoregressive methods take an iterative approach instead. For example, iterating over time results in a problem such as $$\mathbf{u}(t+\Delta t) = \mathcal{A}(\Delta t, \mathbf{u}(t))$$ where $$\mathcal{A}$$ is some temporal update- Other autoregressive models include PixelCNN for images, WaveNet for audio, and the Transformer for text. -
--The loss function is used to evaluate the difference between the temporal update and the expected next state, and the overall one-step loss is calculated as the expected value of this loss over all time-steps and all possible next states. -
--\(L_{\text{stability}} = \mathbb{E}_{k}\mathbb{E}_{\mathbf{u^{k+1}|\mathbf{u^{k},\mathbf{u^{k} \sim p_{k}}}}}[\mathbb{E}_{\epsilon | \mathbf{u}^{k}} [\mathcal{L}(\mathcal{A}(\mathbf{u}^{k}+\) \(\epsilon\) \()),\mathbf{u}^{k+1}]]\) -
- --\(L_{\text{total}} = L_{\text{one-step}} + L_{\text{stability}}\) -
- --The stability loss is largely based off the one-step loss, but now assumes that the temporal update uses noisy data. -
- --The pushforward trick lies in the choice of \(\epsilon\) such that \(\mathbf{u}^{k}+\epsilon = \mathcal{A}(\mathbf{u}^{k-1})\), similar to the test time distribution. Practically, it is implemented to be noise from the network itself so that as the network improves, the loss decreases. -
- -
-Necessarily, the noise of the network must be known or calculated to implement this loss term. So, the model is unrolled for 2 steps but only backpropagated over the most recent unroll step, which already has the neural network noise
-While the network could be unrolled during training, this not only slows the training down but also might result in the network learning shortcuts across unrolled steps. -
- -**Temporal bundling** - -Classical Numerical Method | -MP-PDE Network Component | -
---|---|
Partitioning the problem onto a grid | -Encoder Encodes a vector of solutions into node embeddings |
-
Estimating the spatial derivatives | -Processor Estimates spatial derivatives via message passing |
-
Time updates | -Decoder Combines some representation of spatial derivatives smoothed into a time update |
-
-The encoder is implemented as a two-layer MLP which computes an embedding for each node \(i\) to cast the data to a non-regular integration grid: -
--The node embeddings from the encoder are then used in a message passing GNN. The message passing algorithm, which approximates spatial derivatives, is run \(M\) steps using the following updates: -
- -
-Bar-Sinai et al. explores the relationship between FDM and FVM as used in the method of lines
-\(\partial^{(n)}_{x}u \approx \sum_{i} a^{(n)}_{i} u_{i}\) -
- --for some precomputed coefficients \(a^{(n)}_{i}\). The right hand side parallels the message passing scheme, which aggregates the local difference (\(\mathbf{u}_{i}^{k-K:k}-\mathbf{u}_{j}^{k-K:k}\) in the edge update) and other (learned) embeddings over neighborhoods of nodes. -
- -
-This relationship gives an intuitive understanding of the message passing GNN, which mimics FDM for a single layer, FVM for two layers, and WENO5 for three layers
-While the interpretation is desirable, how far this holds in the actual function of the MP-GNN is harder to address. The concepts of the nodes as integration points and messages as local differences break down as the nodes and edges update. In addition, the furthest node that contributes a message from for any point is at \(n\) edges away for the \(n^{th}\) layer (or a specified limit). This results in a very coarse and potentially underinformed approximation for the first layer which is then propagated to the next layers. However, both the updates use two layer MLPs which (although abstracting away from their respective interpretations) may in effect learn optimal weightings to counterbalance this. -
--The approximated spatial derivatives are then combined and smoothed using a 1D CNN which outputs a bundle of next time steps (recall temporal bundling) \(\mathbf{d}_{i}\). The solution is then updated: -
- --\(\mathbf{u}^{k+l}_{i} = u^{k}_{i} + (t_{k+l}-t_{k})\mathbf{d}^{l}_{i}\) -
- -
-Some precedence is seen, for example, in classical linear multistep methods which (though effective) face stability concerns. Since the CNN is adaptive, it appears that it avoids this issue
-Accumulated error: \(\frac{1}{n_{x}} \sum_{x,t} MSE\) -
--Runtime (s): Measured time taken to run for a given number of steps. -
- --As a general neural PDE solver, the MP-GNN surpasses even the current state-of-the-art FNO. -- -For example, after training a neural model and setting up an instance of MOL, this is a brief comparison of how they can generalize without re-training. - -
Generalization to... | -MP-GNN | -FNO | -Classical (MOL) | -
---|---|---|---|
New PDEs | -Yes | -No | -No | -
Different resolutions | -Yes | -Yes | -No (unless downsampling) | -
Changes in PDE parameters | -Yes | -Yes | -Sometimes | -
Non-regular grids | -Yes | -Some | -Yes (dependent on implementation) | -
Higher dimensions | -Yes | -No | -No | -
- | Accumulated Error | -Runtime [s] | -|||||
---|---|---|---|---|---|---|---|
- \(\quad (n_{t},n_{x})\) - | -WENO5 | -FNO-RNN | -FNO-PF | -MP-PDE | -WENO5 | -MP-PDE | -|
E1 | -(250,100) | -2.02 | -11.93 | -0.54 | -1.55 | -1.9 | -0.09 | -
E1 | -(250, 50) | -6.23 | -29.98 | -0.51 | -1.67 | -1.8 | -0.08 | -
E1 | -(250, 40) | -9.63 | -10.44 | -0.57 | -1.47 | -1.7 | -0.08 | -
E2 | -(250, 100) | -1.19 | -17.09 | -2.53 | -1.58 | -1.9 | -0.09 | -
E2 | -(250, 50) | -5.35 | -3.57 | -2.27 | -1.63 | -1.8 | -0.09 | -
E2 | -(250, 40) | -8.05 | -3.26 | -2.38 | -1.45 | -1.7 | -0.08 | -
E3 | -(250, 100) | -4.71 | -10.16 | -5.69 | -4.26 | -4.8 | -0.09 | -
E3 | -(250, 50) | -11.71 | -14.49 | -5.39 | -3.74 | -4.5 | -0.09 | -
E3 | -(250, 40) | -15.97 | -20.90 | -5.98 | -3.70 | -4.4 | -0.09 | -
Shorthand | -Meaning | -
---|---|
E1 | -Burgers' equation without diffusion | -
E2 | -Burgers' equation with variable diffusion | -
E3 | -Mixed equation, see below | -
\(n_{t}\) | -Temporal resolution | -
\(n_{x}\) | -Spatial resolution | -
WENO5 | -Weighted Essentially Non-Oscillatory (5th order) | -
FNO-RNN | -Recurrent variation of FNO from original paper | -
FNO-PF | -FNO with the pushforward trick added | -
MP-PDE | -Message passing neural PDE solver | -
-The authors form a general PDE in the form -
- --\([\partial_{t}u + \partial_{x}(\alpha u^{2} - \beta \partial_{x} u + \gamma \partial_{xx} u)](t,x) = \delta (t,x)\) -
- --\(u(0,x) = \delta(0,x)\) -
- --such that \(\theta_{PDE} = (\alpha, \beta, \gamma)\) and different combinations of these result in the heat equation, Burgers' equation, and the KdV equation. \(\delta\) is a forcing term, allowing for greater variation in the equations being tested. -
- --The pushforward trick is successful in mitigating error accumulation. --Comparing the accumulated errors of FNO-RNN and the FNO-PF across all experiments highlights the advantage of the pushforward trick. While the MP-PDE outperforms all other tested methods in the two generalization experiments **E2** and **E3**, the FNO-PF is most accurate for **E1**. - -When solving a single equation, the FNO likely performs better, though both FNO-PF and MP-PDE methods outperform WENO5. - -
-Neural solvers are resolution-invariant. --As $$n_{x}$$ is decreased, WENO5 performs increasingly worse whereas all the neural solvers remain relatively stable. -
-Neural solver runtimes are constant to resolution. --Additionally, the runtimes of WENO5 decrease (likely proportionally) since fewer steps require fewer calculations, but the MP-PDE runtimes again appear relatively stable. - -### Comparing Interpretations - -The way the MP-PDE is constructed parallels how both GRAND and the PDE-GCN are built. All three architectures follow a basic premise of mirroring the MOL and describe certain mechanisms in their respective systems which mimic spatial discretisations and temporal discretisations. - -The spatial derivative is discretized by a GNN in the MP-PDE and by the message passing algorithm (consisting of node and edge updates within one layer of a GNN) in the GRAND and PDE-GCN. In the MP-PDE, the spatial derivatives are in effect parameterized by the node and edge updates (the former which Brandstetter et al. highlight takes the difference in solutions $$u_{i}=u_{j}$$) detailed above, both of which are generic MLPs. In comparison, both GRAND and PDE-GCN (using the diffusion variant) come to comparable formulas when discretising using the forward Euler method. - -The GRAND paper derives the following, where $$\tau$$ is a temporal step, $$\mathbf{x}$$ is the diffusion equation, and $$\mathbf{A}$$ is the attention matrix
-Fig.1 Example of MAML and a class label permutation
-Fig.2 Overview of UnicornMAML
-Fig.3 MAML with the zeroing trick applied
- Gradient Descent with Momentum
- Input: starting guess \(\xx_0\), step-size \(\step > 0\) and momentum
- parameter \(\mom \in (0, 1)\).
- \(\xx_1 = \xx_0 - \dfrac{\step}{\mom+1} \nabla f(\xx_0)\)
- For \(t=1, 2, \ldots\) compute
- \begin{equation}\label{eq:momentum_update}
- \xx_{t+1} = \xx_t + \mom(\xx_{t} - \xx_{t-1}) - \step\nabla
- f(\xx_t)
- \end{equation}
-
-Consider the following polynomial \(P_t\) of degree \(t\), defined recursively as: -\begin{equation} -\begin{split} -&P_{t+1}(\lambda) = (1 + \mom - \step \lambda ) P_{t}(\lambda) - -\mom P_{t-1}(\lambda)\\ -&P_1(\lambda) = 1 - \frac{\step}{1 + \mom} \lambda\,, ~ P_0(\lambda) = 1\,,~ -\end{split}\label{eq:def_residual_polynomial2} -\end{equation} -Then we can write the suboptimality at iteration \(t\) as -\begin{equation} -\xx_t - \xx^\star = P_t(\HH) \left( \xx_0 - \xx^\star \right) \,, -\end{equation} -where \(P_t(\HH)\) is the matrix obtained from evaluating the (originally real-valued) polynomial \(P_t\) at the matrix \(\HH\). -
- - -This last identity will allow us to easily compute convergence rates. In particular, plugging it into the definition of the convergence rate \eqref{eq:convergence_rate} we get that the rate is determined by the absolute value of the residual polynomial over the $$[\mu, L]$$ interval: -\begin{align} -r_t &= \sup_{\xx_0, \text{eigs}(\HH) \in [\mu, L]} \frac{\\|P_t(\HH) \left( \xx_0 - \xx^\star \right)\\|}{\\|\xx_{0} - \xx^\star\\|} \\\ -& = \sup_{\text{eigs}(\HH) \in [\mu, L]} \\|P_t(\HH)\\| \\\ -& = \sup_{\lambda \in [\mu, L]} \lvert P_t(\lambda) \rvert\,. -\end{align} -We've now reduced the problem of computing the convergence rate to the problem of computing the absolute value of a polynomial over a given interval. This is a problem that has been extensively studied in the theory of orthogonal polynomials. In particular, we'll use known bounds on Chebyshev polynomials of the first and second kind, as the residual polynomial of momentum can be written as a convex combination of these two polynomials. This fact is proven in the next result, which is a generalization of equation (II.29) in (Rutishauser 1959).-The residual polynomial of momentum can be written in terms of Chebyshev polynomials of the first and second kind as -\begin{align} -P_t(\lambda) = \mom^{t/2} \left( {\small\frac{2\mom}{1+\mom}}\, T_t(\sigma(\lambda)) + {\small\frac{1 - \mom}{1 + \mom}}\,U_t(\sigma(\lambda))\right)\,. -\end{align} -where \(\sigma(\lambda) = {\small\dfrac{1}{2\sqrt{\mom}}}(1 + \mom - \step\,\lambda)\,\) is a linear function that we'll refer to as the link function and \(T_t\) and \(U_t\) are the Chebyshev polynomials of the first and second kind respectively. -
- -- Let's denote by \(\widetilde{P}_t\) the right hand side of the above equation, that is, - \begin{equation} - \widetilde{P}_{t}(\lambda) \defas \mom^{t/2} \left( {\small\frac{2 - \mom}{1 + \mom}}\, - T_t(\sigma(\lambda)) - + {\small\frac{1 - \mom}{1 + \mom}}\, - U_t(\sigma(\lambda))\right)\,. - \end{equation} - Our goal is to show that \(P_t = \widetilde{P}_t\) for all \(t\). -
-- For \(t=1\), \(T_1(\lambda) = \lambda\) and \(U_1(\lambda) = 2\lambda\), so we have - \begin{align} - \widetilde{P}_1(\lambda) &= \sqrt{\mom} \left(\tfrac{2 - \mom}{1 + \mom} \sigma(\lambda) + \tfrac{1 - \mom}{1 + \mom} 2 - \sigma(\lambda)\right)\\ - &= \frac{2 \sqrt{\mom}}{1 + \mom} \sigma(\lambda) = 1 - \frac{\step}{1 + \mom} \lambda\,, - \end{align} - which corresponds to the definition of \(P_1\) in \eqref{eq:def_residual_polynomial2}. -
-- Assume it's true for any iteration up to \(t\), we will show it's true for \(t+1\). Using the three-term recurrence of Chebyshev polynomials we have - \begin{align} - &\widetilde{P}_{t+1}(\lambda) = \mom^{(t+1)/2} \left( {\small\frac{2 \mom}{1 + \mom}}\, - T_{t+1}(\sigma(\lambda)) - + {\small\frac{1 - \mom}{1 + \mom}}\, U_{t+1}(\sigma(\lambda))\right) \\ - &= \mom^{(t+1)/2} \Big( {\small\frac{2 - \mom}{1 + \mom}}\, - (2 \sigma(\lambda) T_{t}(\sigma(\lambda)) - T_{t-1}(\sigma(\lambda))) \nonumber\\ - &\qquad\qquad - + {\small\frac{1 - \mom}{1 + \mom}}\, (2 \sigma(\lambda) - U_{t}(\sigma(\lambda)) - U_{t-1}(\sigma(\lambda)))\Big)\\ - &= 2 \sigma(\lambda) \sqrt{\mom} P_t(\lambda) - \mom P_{t-1}(\lambda)\\ - &= (1 + \mom - \step \lambda) P_t(\lambda) - - \mom P_{t-1}(\lambda) - \end{align} - where the third identity follows from grouping polynomials of same degree and the - induction hypothesis. The last expression is the recursive definition of \(P_{t+1}\) in - \eqref{eq:def_residual_polynomial2}, which proves the desired \(\widetilde{P}_{t+1} = - {P}_{t+1}\). -
- - -- The asymptotic rate in the robust region is \(r_{\infty} = \sqrt{\mom}\). -
- -This is nothing short of magical. It would seem natural –and this will be the case in other regions– that the speed of convergence should depend on both the step-size and the momentum parameter. Yet, this result implies that it's not the case in the robust region. In this region, the convergence only depends on the momentum parameter $\mom$. Amazing.- In the lazy region the asymptotic rate is \(r_{\infty} = \sqrt{\mom}\left(|\sigma(\lmin)| + \sqrt{\sigma(\lmin)^2 - 1} \right)\). -
- -Unlike in the robust region, this rate depends on both the step-size and the momentum parameter, which enters in the rate through the link function $$\sigma$$. This can be observed in the color plot of the asymptotic rate - - - -{% include figure.html path="assets/img/2023-05-01-hitchhikers-momentum/rate_lazy_region.png" class="img-fluid" %} - - -## Knife's Edge - - -The robust and lazy region occupy most (but not all!) of the region for which momentum converges. There's a small region that sits between the lazy and robust regions and the region where momentum diverges. We call this region the Knife's edge - -For parameters not in the robust or lazy region, we have that $$|\sigma(L)| > 1$$ and $$|\sigma(L)| > |\sigma(\lmin)|$$. Using the asymptotics of Chebyshev polynomials as we did in the previous section, we have that the asymptotic rate is $$\sqrt{\mom}\left(|\sigma(L)| + \sqrt{\sigma(L)^2 - 1} \right)$$. The method will only converge when this asymptotic rate is below 1. Enforcing this results in $$\step \lt 2 (1 + \mom) / L$$. Combining this condition with the one of not being in the robust or lazy region gives the characterization: -\begin{equation} -\step \lt \frac{2 (1 + \mom)}{L} \quad \text{ and } \quad \step \geq \max\Big\\{\tfrac{2(1 + \mom)}{L + \lmin}, \tfrac{(1 + \sqrt{\mom})^2}{L}\Big\\}\,. -\end{equation} - - -{% include figure.html path="assets/img/2023-05-01-hitchhikers-momentum/sketch_knife_edge.png" class="img-fluid" %} - - -### Asymptotic rate - -The asymptotic rate can be computed using the same technique as in the lazy region. The resulting rate is the same as in that region but with $$\sigma(L)$$ replacing $$\sigma(\lmin)$$: - - -- In the Knife's edge region the asymptotic rate is \(\sqrt{\mom}\left(|\sigma(L)| + \sqrt{\sigma(L)^2 - 1} \right)\). -
- -Pictorially, this corresponds to - -{% include figure.html path="assets/img/2023-05-01-hitchhikers-momentum/rate_knife_edge.png" class="img-fluid" %} - - -## Putting it All Together - -This is the end of our journey. We've visited all the regions on which momentum converges.The asymptotic rate \(\limsup_{t \to \infty} \sqrt[t]{r_t}\) of momentum is -\begin{alignat}{2} - &\sqrt{\mom} &&\text{ if }\step \in \big[\frac{(1 - \sqrt{\mom})^2}{\lmin}, \frac{(1+\sqrt{\mom})^2}{L}\big]\\ -&\sqrt{\mom}(|\sigma(\lmin)| + \sqrt{\sigma(\lmin)^2 - 1}) &&\text{ if } \step \in \big[0, \min\{\tfrac{2(1 + \mom)}{L + \lmin}, \tfrac{(1 - \sqrt{\mom})^2}{\lmin}\}\big]\\ -&\sqrt{\mom}(|\sigma(L)| + \sqrt{\sigma(L)^2 - 1})&&\text{ if } \step \in \big[\max\big\{\tfrac{2(1 + \mom)}{L + \lmin}, \tfrac{(1 + \sqrt{\mom})^2}{L}\big\}, \tfrac{2 (1 + \mom) }{L} \big)\\ -&\geq 1 \text{ (divergence)} && \text{ otherwise.} -\end{alignat} -
- -Plotting the asymptotic rates for all regions we can see that Polyak momentum (the method with momentum $\mom = \left(\frac{\sqrt{L} - \sqrt{\lmin}}{\sqrt{L} + \sqrt{\lmin}}\right)^2$ and step-size $\step = \left(\frac{2}{\sqrt{L} + \sqrt{\lmin}}\right)^2$ which is asymptotically optimal among the momentum methods with constant coefficients) is at the intersection of the three regions. - - - -{% include figure.html path="assets/img/2023-05-01-hitchhikers-momentum/rate_convergence_momentum.png" class="img-fluid" %} - - - -## Reproducibility - -All plots in this post were generated using the following Jupyer notebook: [[HTML]]({{'assets/html/2023-05-01-hitchhikers-momentum/hitchhikers-momentum.html' | relative_url}}) [[IPYNB]]({{'assets/html/2023-05-01-hitchhikers-momentum/hitchhikers-momentum.ipynb' | relative_url}}) diff --git a/_posts/2023-05-01-how-does-the-inductive-bias-influence-the-generalization-capability-of-neural-networks.md b/_posts/2023-05-01-how-does-the-inductive-bias-influence-the-generalization-capability-of-neural-networks.md deleted file mode 100644 index ec56e41e..00000000 --- a/_posts/2023-05-01-how-does-the-inductive-bias-influence-the-generalization-capability-of-neural-networks.md +++ /dev/null @@ -1,166 +0,0 @@ ---- -layout: distill -title: How does the inductive bias influence the generalization capability of neural networks? -description: [The blog post discusses how memorization and generalization are affected by extreme overparameterization. Therefore, it explains the overfitting puzzle in machine learning and how the inductive bias can help to understand the generalization capability of neural networks.] -date: 2023-05-01 -htmlwidgets: true - -# anonymize when submitting -# authors: -# - name: Anonymous - -# do not fill this in until your post is accepted and you're publishing your camera-ready post! -authors: - - name: Charlotte Barth - url: "https://www.linkedin.com/in/charlotte-barth-a58b0a152/?originalSubdomain=de" - affiliations: - name: TU Berlin - - name: Thomas Goerttler - url: "https://scholar.google.de/citations?user=ppQIwpIAAAAJ&hl=de" - affiliations: - name: TU Berlin - - name: Klaus Obermayer - url: "https://www.tu.berlin/ni/" - affiliations: - name: TU Berlin - -# must be the exact same name as your blogpost -bibliography: 2023-05-01-how-does-the-inductive-bias-influence-the-generalization-capability-of-neural-networks.bib - -# Add a table of contents to your post. -# - make sure that TOC names match the actual section names -# for hyperlinks within the post to work correctly. -toc: - - name: Overfitting Puzzle - - name: Experiments - subsections: - - name: Fully connected networks (FCN) - - name: Convolutional neural networks (CNN) - - name: General findings - - name: Conclusion ---- - -Deep neural networks are a commonly used machine learning technique that has proven to be effective for many different use cases. However, their ability to generalize from training data is not well understood. In this blog post, we will explore the paper "Identity Crisis: Memorization and Generalization under Extreme Overparameterization" by Zhang et al. [2020]- In many real-world classification datasets, the number of examples for each class varies. Class-imbalanced classification refers to classification on datasets where the frequencies of class labels vary significantly. -
-
- It is generally more difficult for a neural network to learn to classify classes with fewer examples
- Transformations are alterations of data. In the context of image classification, nuisance transformations are alterations that do not affect the class labels of the data. A model is said to be invariant to a nuisance transformation if it can successfully ignore the transformation when predicting a class label. -
- We can formally define a nuisance transformation -- $$T(\cdot |x)$$ -
-- as a distribution over transformation functions. An example of a nuisance transformation might be a distribution over rotation matrices of different angles, or lighting transformations with different exposure values. By definition, nuisance transformations have no impact on class labels $y$, only on data $x$. A perfectly transformation-invariant classifier would thus completely ignore them, i.e., -
-- $$ - \hat{P}_w(y = j|x) = \hat{P}_w(y = j|x'), \; x' \sim T(\cdot |x). - $$ -
-
- (see Zhou et al.
-$$ -\underset{\omega}{\mathrm{min}} \; \mathbb{E}_{\mathcal{T} \sim p(\mathcal{T})} \; \mathcal{L}(\mathcal{D}, \omega), -$$ -
- -where $ \omega $ is parameters trained exclusively on the meta-level, i.e., the *meta-knowledge* learnable from the task distribution-$$ -\bbox[5pt, border: 2px solid blue]{ -\begin{align*} - \omega^{*} = \underset{\omega}{\mathrm{argmin}} \sum_{i=1}^{M} \mathcal{L}^{meta}(\theta^{* \; (i)}(\omega), D^{val}_i), -\end{align*} -} -$$ -
- - -where $M$ describes the number of tasks in a batch, $\mathcal{L}^{meta}$ is the meta-loss function, and $ D^{val}_i $ is the validation set of the task $ i $. $\omega$ represents the parameters exclusively updated in the outer loop. $ \theta^{* \; (i)} $ represents an inner loop learning a task that we can formally express as a sub-objective constraining the primary objective - --$$ -\bbox[5pt, border: 2px solid red]{ -\begin{align*} - s.t. \; \theta^{* \; (i)} = \underset{\theta}{\mathrm{argmin}} \; \mathcal{L^{task}}(\theta, \omega, D^{tr}_i), -\end{align*} -} -$$ -
- -where $ \theta $ are the model parameters updated in the inner loop, $ \mathcal{L}^{task} $ is the loss function by which they are updated and $ D^{tr}_i $ is the training set of the task $ i $-$$ -\begin{align*} - &\mathcal{L}^{x_{2}}_{GAN}(\theta_d, \theta_c, \theta_s) - \\\\ - =& \;\mathbb{E}_{c_{1} \sim p(c_{1}), s_{2} \sim p(s_{2})} \left[ \log (1 -D_ {2} (G_{2} (c_{1}, s_{2}, \theta_c, \theta_s), \theta_d)) \right] - \\ - +& \;\mathbb{E}_{x_{2} \sim p(x_{2})} \left[ \log(D_{2} (x_{2}, \theta_d)) \right], -\end{align*} -$$ -
- -where the $ \theta_d $ represents the parameters of the discriminator network, $p(x_2)$ is the data of the second domain, $ c_1 $ is the content embedding of an image from the first domain to be translated. $ s_2 $ is a random style code of the second domain. $ D_2 $ is the discriminator of the second domain, and $ G_2 $ is its generator. MUNIT's full objective function is: - --$$ -\begin{align*} - \underset{\theta_c, \theta_s}{\mathrm{argmin}} \; \underset{\theta_d}{\mathrm{argmax}}& \;\mathbb{E}_{c_{1} \sim p(c_{1}), s_{2} \sim p(s_{2})} \left[ \log (1 -D_ {2} (G_{2} (c_{1}, s_{2}, \theta_c, \theta_s), \theta_d)) \right] - \\ +& \; \mathbb{E}_{x_{2} \sim p(x_{2})} \left[ \log(D_{2} (x_{2}, \theta_d)) \right], + \; \mathcal{L}^{x_{1}}_{GAN}(\theta_d, \theta_c, \theta_s) - \\ +& \;\mathcal{L}_{recon}(\theta_c, \theta_s) -\end{align*} -$$ -
- -(compare-$$ -\bbox[5px, border: 2px solid blue]{ -\begin{align*} - \omega^{*} - & = \{ \theta_c^*, \theta_s^* \} - \\\\ - & = - \underset{\theta_c, \theta_s}{\mathrm{argmin}} \; \mathbb{E}_{c_{1} \sim p(c_{1}), s_{2} \sim p(s_{2})} \left[ \log (1 -D_ {2} (G_{2} (c_{1}, s_{2}, \theta_c, \theta_s), \theta_d^{*})) \right] - \\ - & + \mathcal{L}_{recon}(\theta_c, \theta_s), -\end{align*} -} -$$ -
- -We then add a single constraint, a subsidiary maximization problem for the discriminator function: - --$$ -\bbox[5px, border: 2px solid red]{ -\begin{align*} - &s.t. \;\theta_d^{*} - \\\\ - & = - \underset{\theta_d}{\mathrm{argmax}} \; \mathbb{E}_{c_{1} \sim p(c_{1}), s_{2} \sim p(s_{2})} \left[ \log (1 -D_ {2} (G_{2} (c_{1}, s_{2}, \theta_c, \theta_s), \theta_d)) \right] - \\ - & + \mathbb{E}_{x_{2} \sim p(x_{2})} \left[ \log(D_{2} (x_{2}, \theta_d)) \right] -\end{align*} -} -$$ -
- -Interestingly, this bi-level view does not only resemble a meta-learning procedure as expressed above, but the bi-level optimization also facilitates a similar effect. Maximizing the discriminator's performance in the constraint punishes style information encoded as content information. If style information is encoded as content information, the discriminator detects artifacts of the original domain in the translated image. Similarly, a meta-learner prevents *meta-overfitting* via an outer optimization loop. - -*However, MUNIT, while representable as a bi-level optimization problem does not "essentially boil down to nesting two search problems".atoi
and then adds 2.
-
-```python
-def atoi(seq=tokens):
- return seq.map(lambda x: ord(x) - ord('0'))
-
-op = (atoi(where(tokens == "-", "0", tokens)) + 2)
-op.input("02-13")
-```
-
-
-
-
-![svg]({{site.baseurl}}/assets/img/2023-05-01-raspy/Blog_37_0.svg)
-
-
-
-
-From here on, unless we use a different input sequence, we will assume that the input is ‘hello’ and omit the input display in the illustrations.
-
-
-### Attention Selectors
-
-Things get more interesting when we start to apply attention. This allows routing of information between the different elements of the sequence.
-
-
-
-
-
-
-![svg]({{site.baseurl}}/assets/img/2023-05-01-raspy/Blog_39_0.svg)
-
-
-
-
-We begin by defining notation for the keys and queries of the model. Keys and queries are effectively transforms that we will broadcast and compare to each other to create *selectors*, our parallel to attention patterns. We create them directly from transforms. For example, if we want to define a key, we call `key` on a transform.
-
-```python
-key(tokens)
-```
-
-
-
-
-![svg]({{site.baseurl}}/assets/img/2023-05-01-raspy/Blog_41_0.svg)
-
-
-
-
-Similarly for `query`. (Queries are presented as columns to reflect their relation to the selectors we will create from them.)
-
-```python
-query(tokens)
-```
-
-
-
-
-![svg]({{site.baseurl}}/assets/img/2023-05-01-raspy/Blog_43_0.svg)
-
-
-
-
-Scalars can be used as keys or queries. They broadcast out to the length of the underlying sequence.
-
-```python
-query(1)
-```
-
-
-
-
-![svg]({{site.baseurl}}/assets/img/2023-05-01-raspy/Blog_45_0.svg)
-
-
-
-
-By applying a comparison operation between a key and a query we create a *selector*, our parallel to an attention matrix - though this one is unweighted.
-
-A selector is a binary matrix indicating which input position (column) each output position (row) will attend to in an eventual attention computation. In the comparison creating it, the key values describe the input (column) positions, and the query values describe the output (row) positions.
-
-```python
-eq = (key(tokens) == query(tokens))
-eq
-```
-
-
-
-
-![svg]({{site.baseurl}}/assets/img/2023-05-01-raspy/Blog_47_0.svg)
-
-
-
-
-Some examples:
-
-* A selector that matches each output position to the previous input position.
-
-```python
-offset = (key(indices) == query(indices - 1))
-offset
-```
-
-
-
-
-![svg]({{site.baseurl}}/assets/img/2023-05-01-raspy/Blog_49_0.svg)
-
-
-
-
-* A selector that matches each output position to all earlier input positions.
-
-```python
-before = key(indices) < query(indices)
-before
-```
-
-
-
-
-![svg]({{site.baseurl}}/assets/img/2023-05-01-raspy/Blog_51_0.svg)
-
-
-
-
-* A selector that matches each output position to all later input positions.
-
-```python
-after = key(indices) > query(indices)
-after
-```
-
-
-
-
-![svg]({{site.baseurl}}/assets/img/2023-05-01-raspy/Blog_53_0.svg)
-
-
-
-
-Selectors can be merged using boolean operations. For example, this selector focuses each output position on 1) earlier positions that 2) contain the same original input token as its own. We show this by including both pairs of keys and queries in the matrix.
-
-```python
-before & eq
-```
-
-
-
-
-![svg]({{site.baseurl}}/assets/img/2023-05-01-raspy/Blog_55_0.svg)
-
-
-
-
-### Using Attention
-
-Given an attention selector we can provide a value sequence to aggregate. We represent aggregation by **summing** up over the values that have a true value for their selector.
-
-(Note: in the original paper, they use a **mean** aggregation and show a clever construction where mean aggregation is able to represent a sum calculation. RASPy uses sum by default for simplicity and to avoid fractions. In practicce this means that RASPy may underestimate the number of layers needed to convert to a mean based model by a factor of 2.)
-
-Attention aggregation gives us the ability to compute functions like histograms.
-
-```python
-(key(tokens) == query(tokens)).value(1)
-```
-
-
-
-
-![svg]({{site.baseurl}}/assets/img/2023-05-01-raspy/Blog_59_0.svg)
-
-
-
-
-Visually we follow the architecture diagram. Queries are to the left, Keys at the top, Values at the bottom, and the Output is to the right.
-
-
-
-
-
-
-![svg]({{site.baseurl}}/assets/img/2023-05-01-raspy/Blog_61_0.svg)
-
-
-
-
-Some attention operations may not even use the input tokens. For instance to compute the `length` of a sequence, we create a “select all” attention selector and then add 1 from each position.
-
-```python
-length = (key(1) == query(1)).value(1)
-length = length.name("length")
-length
-```
-
-
-
-
-![svg]({{site.baseurl}}/assets/img/2023-05-01-raspy/Blog_63_0.svg)
-
-
-
-
-Here's a more complex example, shown step-by-step. (This is the kind of thing they ask in interviews!)
-
-Say we want to compute the sum of neighboring values in a sequence, along a sliding window. First we apply the forward cutoff, attending only to positions that are not too far in the past.
-
-```python
-WINDOW=3
-s1 = (key(indices) >= query(indices - WINDOW + 1))
-s1
-```
-
-
-
-
-![svg]({{site.baseurl}}/assets/img/2023-05-01-raspy/Blog_65_0.svg)
-
-
-
-
-Then the backward cutoff, attending only to positions up to and including our own.
-
-```python
-s2 = (key(indices) <= query(indices))
-s2
-```
-
-
-
-
-![svg]({{site.baseurl}}/assets/img/2023-05-01-raspy/Blog_67_0.svg)
-
-
-
-
-Intersect.
-
-```python
-sel = s1 & s2
-sel
-```
-
-
-
-
-![svg]({{site.baseurl}}/assets/img/2023-05-01-raspy/Blog_69_0.svg)
-
-
-
-
-And finally aggregate.
-
-```python
-sum2 = sel.value(tokens)
-sum2.input([1,3,2,2,2])
-```
-
-
-
-
-![svg]({{site.baseurl}}/assets/img/2023-05-01-raspy/Blog_71_0.svg)
-
-
-
-
-Here is a simple example that produces a 2-layer transform. The first corresponds to computing length and the second the cumulative sum. The cumulative sum has to go into a second layer because it is applied to a transform which uses length, and so it can only be computed after the computation of length is complete.
-
-```python
-def cumsum(seq=tokens):
- x = (before | (key(indices) == query(indices))).value(seq)
- return x.name("cumsum")
-cumsum().input([3, 1, -2, 3, 1])
-```
-
-
-
-
-![svg]({{site.baseurl}}/assets/img/2023-05-01-raspy/Blog_73_0.svg)
-
-
-
-
-### Layers
-
-The language supports building up more complex transforms. It keeps track of the *layers* by tracking the operations computed so far.
-
-
-
-
-
-
-![svg]({{site.baseurl}}/assets/img/2023-05-01-raspy/Blog_76_0.svg)
-
-
-
-
-Here is a simple example that produces a 2-layer transform. The first corresponds to computing length and the second the cumulative sum.
-
-```python
-x = cumsum(length - indices)
-x.input([3, 2, 3, 5])
-```
-
-
-
-
-![svg]({{site.baseurl}}/assets/img/2023-05-01-raspy/Blog_78_0.svg)
-
-
-
-
-## Coding with Transformers
-
-Given this library of functions, we can write operations to accomplish surprisingly complex tasks.
-
-**Can we produce a Transformer that does basic addition of two arbitrary length numbers?**
-
-i.e. given a string "19492+23919" can we produce the correct output?
-
-We will go through these steps, and their solutions, here. If you would rather do them on your own, we provide a version where you can try them yourself!
-
-Before we dive in to the main task, we will do some challenges of increasing difficulty to help us build some intuitions.
-
-
-### Challenge 1: Select a given index
-
-Produce a sequence where all the elements have the value at index i.
-
-```python
-def index(i, seq=tokens):
- x = (key(indices) == query(i)).value(seq)
- return x.name("index")
-index(1)
-```
-
-
-
-
-![svg]({{site.baseurl}}/assets/img/2023-05-01-raspy/Blog_83_0.svg)
-
-
-
-
-### Challenge 2: Shift
-
-Shift all of the tokens in a sequence to the right by i positions. (Here we introduce an optional parameter in the aggregation: the default value to be used when no input positions are selected. If not defined, this value is 0.)
-
-```python
-def shift(i=1, default="_", seq=tokens):
- x = (key(indices) == query(indices-i)).value(seq, default)
- return x.name("shift")
-shift(2)
-```
-
-
-
-
-![svg]({{site.baseurl}}/assets/img/2023-05-01-raspy/Blog_85_0.svg)
-
-
-
-
-### Challenge 3: Minimum
-
-Compute the minimum values of the sequence. (This one starts to get harder. Our version uses 2 layers of attention.)
-
-```python
-def minimum(seq=tokens):
- sel1 = before & (key(seq) == query(seq))
- sel2 = key(seq) < query(seq)
- less = (sel1 | sel2).value(1)
- x = (key(less) == query(0)).value(seq)
- return x.name("min")
-minimum()([5,3,2,5,2])
-```
-
-
-
-
-![svg]({{site.baseurl}}/assets/img/2023-05-01-raspy/Blog_87_0.svg)
-
-
-
-
-The idea behind our solution is an implicit full ordering of the input positions: we (implicitly) order the positions according to input token value, with input position as tie breaker. Our first act is to have each position attend to all positions before it in the ordering: `sel1` focuses on earlier input positions with the same input token value, and `sel2` focuses on input positions with lower input token value. We then aggregate a 1 from all positions to get where each position is located in this ordering (i.e., how many other positions precede it). The minimum value is the input value at the first position according to this ordering (i.e., the one which had no other positions precede it).
-
-### Challenge 4: First Index
-
-Compute the first index that has token q, assuming the sequence always has length shorter than 100. (2 layers)
-
-```python
-def first(q, seq=tokens):
- return minimum(where(seq == q, indices, 99))
-first("l")
-```
-
-
-
-
-![svg]({{site.baseurl}}/assets/img/2023-05-01-raspy/Blog_90_0.svg)
-
-
-
-
-### Challenge 5: Right Align
-
-Right align a padded sequence e.g. ralign().inputs('xyz___') = '---xyz'" (2 layers)
-
-```python
-def ralign(default="-", sop=tokens):
- c = (key(sop) == query("_")).value(1)
- x = (key(indices + c) == query(indices)).value(sop, default)
- return x.name("ralign")
-ralign()("xyz__")
-```
-
-
-
-
-![svg]({{site.baseurl}}/assets/img/2023-05-01-raspy/Blog_92_0.svg)
-
-
-
-
-### Challenge 6: Split
-
-Split a sequence into two parts at value v and then right align. You can assume there is exactly one appearance of v in the sequence. (3 layers to get and align the first part of the sequence, but only 1 for the second.)
-
-```python
-def split(v, get_first_part, sop=tokens, default="0"):
- split_point = (key(sop) == query(v)).value(indices)
- if get_first_part:
- x = ralign(default,
- where(indices < split_point,
- sop, "_"))
- return x
- else:
- x = where(indices > split_point, sop, default)
- return x
-split("+", False)("xyz+zyr")
-```
-
-
-
-
-![svg]({{site.baseurl}}/assets/img/2023-05-01-raspy/Blog_94_0.svg)
-
-
-
-
-```python
-split("+", 0)("xyz+zyr")
-```
-
-
-
-
-![svg]({{site.baseurl}}/assets/img/2023-05-01-raspy/Blog_95_0.svg)
-
-
-
-
-### Challenge 6: Slide
-
-Replace special tokens "<" with the closest non "<" value to their right. (2 layers)
-
-```python
-def slide(match, seq=tokens):
- x = cumsum(match)
- y = ((key(x) == query(x + 1)) & (key(match) == query(True))).value(seq)
- seq = where(match, seq, y)
- return seq.name("slide")
-slide(tokens != "<").input("xxxh<<Notation | -Description | -Notation | -Description | -
---|---|---|---|
$s$ | -the current state (at time $t$) | -$S$ | -the set of all states | -
$s^{\prime}$ | -the next state (at time $t+1$) | -$U$ | -the set of all actions | -
$u^{i}$ | -the action of agent $i$ | -$N$ | -the set of all agents | -
$\mathbf{u}$ | -the joint actions (at time $t$) | -$\tau^{i}$ | -the action-observation history of agent $i$ | -
$o^{i}$ | -the observation of agent $i$ | -$${\tau}$$ | -the joint action-observation histories | -
$$o$$ | -the joint observation | -$r(s, \mathbf{u})$ | -the joint reward supplied by environments | -
$Q_{i}(\tau^{i}, u^{i})$ | -the utility function of agent $i$ | -$\gamma$ | -the discount factor | -
$Q_{tot}({\tau}, \mathbf{u})$ | -the joint value function | -$P(s^{\prime} \mid s, \mathbf{u})$ | -the transition function | -
$Z(o^{i} \mid s, u^{i})$ | -the observation function | -$\epsilon$ | -action selection probability of $\epsilon$-greedy | -
$N$ | -the set of all agents with $n$ agents | -$$\theta$$ | -the set of parameters of agents network, with $[\theta^{i}]_{i=1}^{n}$ | -
$b$ | -sampled batch size for training | -$\phi$ | -the parameter of mixing network | -
$TS$ | -the $T$otal rollout $S$amples | -$PP$ | -the number of rollout $P$rocesses in $P$arallel | -
$SE$ | -the number of $S$amples in each $E$pisode |
- $PI$ | -the $P$olicy $I$teration number | -
Senarios | -Difficulty | -QMIX | -Finetuned-QMIX | -
10m_vs_11m | -Easy | -98% | -100% | -
8m_vs_9m | -Hard | -84% | -100% | -
5m_vs_6m | -Hard | -84% | -90% | -
3s_vs_5z | -Hard | -96% | -100% | -
bane_vs_bane | -Hard | -100% | -100% | -
2c_vs_64zg | -Hard | -100% | -100% | -
corridor | -Super hard | -0% | -100% | -
MMM2 | -Super hard | -98% | -100% | -
3s5z_vs_3s6z | -Super hard | -3% | -93% (Hidden Size = 256) | -
27m_vs_3s6z | -Super hard | -56% | -100% | -
6h_vs_8z | -Super hard | -0% | -93% (λ = 0.3) | -
Announcements:
The Machine Learning community is currently experiencing a reproducibility crisis and a reviewing crisis [Littman, 2021]. Because of the highly competitive and noisy reviewing process of ML conferences [Tran et al., 2020], researchers have an incentive to oversell their results, slowing down the progress and diminishing the integrity of the scientific community. Moreover with the growing number of papers published and submitted at the main ML conferences [Lin et al., 2020], it has become more challenging to keep track of the latest advances in the field.
Blog posts are becoming an increasingly popular and useful way to talk about science [Brown and Woolston, 2018]. They offer substantial value to the scientific community by providing a flexible platform to foster open, human, and transparent discussions about new insights or limitations of a scientific publication. However, because they are not as recognized as standard scientific publications, only a minority of researchers manage to maintain an active blog and get visibility for their efforts. Many are well-established researchers (Francis Bach, Ben Recht, Ferenc Huszár, Lilian Weng) or big corporations that leverage entire teams of graphic designers designer and writers to polish their blogs (Facebook AI, Google AI, DeepMind, OpenAI). As a result, the incentives for writing scientific blog posts are largely personal; it is unreasonable to expect a significant portion of the machine learning community to contribute to such an initiative when everyone is trying to establish themselves through publications.
Last year, we ran the first iteration of the Blogpost track at ICLR 2022! It was very successful, attracting over 60 submissions and 20 accepted posts.
Our goal is to create a formal call for blog posts at ICLR to incentivize and reward researchers to review past work and summarize the outcomes, develop new intuitions, or highlight some shortcomings. A very influential initiative of this kind happened after the second world war in France. Because of the lack of up-to-date textbooks, a collective of mathematicians under the pseudonym Nicolas Bourbaki [Halmos 1957], decided to start a series of textbooks about the foundations of mathematics [Bourbaki, 1939]. In the same vein, we aim at providing a new way to summarize scientific knowledge in the ML community.
Due to the large diversity of topics that can be discussed in a blog post, we decided to restrict the range of topics for this call for blog posts. We identified that the blog posts that would bring to most value to the community and the conference would be posts that distill and discuss previously published papers.
Abstract deadline: February 2nd AOE, 2023 (submit to OpenReview).
Submission deadline: February 10th AOE, 2023 (any modifications to your blog post, via a pull request on github).
Notification of acceptance: March 31st, 2023
Camera-ready merge: April 28th, 2023 (please follow the instructions here)
The format and process for this blog post track is as follows:
Blog Posts must not be used to highlight or advertise past publications of the authors of of their lab. Previously, we did not accept submissions with a conflict of interest, however this year we will only ask the authors to report if they have such a conflict. If so, reviewers will be asked to judge if the submission is sufficiently critical and objective of the papers addressed in the blog post.
The posts will be created and published under a unified template; see the submission instructions and the sample post hosted on the blog of this website.
Our goal is to avoid heavily engineered, professionally-made blog-posts—Such as the “100+ hours” mentioned as a standard by the Distill guidelines—to entice ideas and clear writing rather than dynamic visualizations or embedded javascript engines.
As a result, we restrict submissions to the Markdown format. We believe this is a good trade-off between complexity and flexibility. Markdown enables users to easily embed media such as images, gifs, audio, and video as well as write mathematical equations using MathJax, without requiring users to know how to create HTML web pages. This (mostly) static format is also fairly portable; users can download the blog post without much effort for offline reading or archival purposes. More importantly, this format can be easily hosted and maintained through GitHub.
Eryn Brown and Chris Woolston. Why science blogging still matters. Nature, 2018.
Paul R Halmos. Nicolas bourbaki. Scientific American, 1957.
Nicolas Bourbaki. Elements of mathematics. Éditions Hermann, 1939.
tag. We found the following text: ' + text); - const wrapper = document.createElement('span'); - wrapper.innerHTML = addedNode.nodeValue; - addedNode.parentNode.insertBefore(wrapper, addedNode); - addedNode.parentNode.removeChild(addedNode); - } - } break; - } - } - } - }).observe(this, {childList: true}); - } - - } - - var commonjsGlobal = typeof globalThis !== 'undefined' ? globalThis : typeof window !== 'undefined' ? window : typeof global !== 'undefined' ? global : typeof self !== 'undefined' ? self : {}; - - function createCommonjsModule(fn, module) { - return module = { exports: {} }, fn(module, module.exports), module.exports; - } - - var bibtexParse = createCommonjsModule(function (module, exports) { - /* start bibtexParse 0.0.22 */ - - //Original work by Henrik Muehe (c) 2010 - // - //CommonJS port by Mikola Lysenko 2013 - // - //Port to Browser lib by ORCID / RCPETERS - // - //Issues: - //no comment handling within strings - //no string concatenation - //no variable values yet - //Grammar implemented here: - //bibtex -> (string | preamble | comment | entry)*; - //string -> '@STRING' '{' key_equals_value '}'; - //preamble -> '@PREAMBLE' '{' value '}'; - //comment -> '@COMMENT' '{' value '}'; - //entry -> '@' key '{' key ',' key_value_list '}'; - //key_value_list -> key_equals_value (',' key_equals_value)*; - //key_equals_value -> key '=' value; - //value -> value_quotes | value_braces | key; - //value_quotes -> '"' .*? '"'; // not quite - //value_braces -> '{' .*? '"'; // not quite - (function(exports) { - - function BibtexParser() { - - this.months = ["jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec"]; - this.notKey = [',','{','}',' ','=']; - this.pos = 0; - this.input = ""; - this.entries = new Array(); - - this.currentEntry = ""; - - this.setInput = function(t) { - this.input = t; - }; - - this.getEntries = function() { - return this.entries; - }; - - this.isWhitespace = function(s) { - return (s == ' ' || s == '\r' || s == '\t' || s == '\n'); - }; - - this.match = function(s, canCommentOut) { - if (canCommentOut == undefined || canCommentOut == null) - canCommentOut = true; - this.skipWhitespace(canCommentOut); - if (this.input.substring(this.pos, this.pos + s.length) == s) { - this.pos += s.length; - } else { - throw "Token mismatch, expected " + s + ", found " - + this.input.substring(this.pos); - } this.skipWhitespace(canCommentOut); - }; - - this.tryMatch = function(s, canCommentOut) { - if (canCommentOut == undefined || canCommentOut == null) - canCommentOut = true; - this.skipWhitespace(canCommentOut); - if (this.input.substring(this.pos, this.pos + s.length) == s) { - return true; - } else { - return false; - } }; - - /* when search for a match all text can be ignored, not just white space */ - this.matchAt = function() { - while (this.input.length > this.pos && this.input[this.pos] != '@') { - this.pos++; - } - if (this.input[this.pos] == '@') { - return true; - } return false; - }; - - this.skipWhitespace = function(canCommentOut) { - while (this.isWhitespace(this.input[this.pos])) { - this.pos++; - } if (this.input[this.pos] == "%" && canCommentOut == true) { - while (this.input[this.pos] != "\n") { - this.pos++; - } this.skipWhitespace(canCommentOut); - } }; - - this.value_braces = function() { - var bracecount = 0; - this.match("{", false); - var start = this.pos; - var escaped = false; - while (true) { - if (!escaped) { - if (this.input[this.pos] == '}') { - if (bracecount > 0) { - bracecount--; - } else { - var end = this.pos; - this.match("}", false); - return this.input.substring(start, end); - } } else if (this.input[this.pos] == '{') { - bracecount++; - } else if (this.pos >= this.input.length - 1) { - throw "Unterminated value"; - } } if (this.input[this.pos] == '\\' && escaped == false) - escaped = true; - else - escaped = false; - this.pos++; - } }; - - this.value_comment = function() { - var str = ''; - var brcktCnt = 0; - while (!(this.tryMatch("}", false) && brcktCnt == 0)) { - str = str + this.input[this.pos]; - if (this.input[this.pos] == '{') - brcktCnt++; - if (this.input[this.pos] == '}') - brcktCnt--; - if (this.pos >= this.input.length - 1) { - throw "Unterminated value:" + this.input.substring(start); - } this.pos++; - } return str; - }; - - this.value_quotes = function() { - this.match('"', false); - var start = this.pos; - var escaped = false; - while (true) { - if (!escaped) { - if (this.input[this.pos] == '"') { - var end = this.pos; - this.match('"', false); - return this.input.substring(start, end); - } else if (this.pos >= this.input.length - 1) { - throw "Unterminated value:" + this.input.substring(start); - } } - if (this.input[this.pos] == '\\' && escaped == false) - escaped = true; - else - escaped = false; - this.pos++; - } }; - - this.single_value = function() { - var start = this.pos; - if (this.tryMatch("{")) { - return this.value_braces(); - } else if (this.tryMatch('"')) { - return this.value_quotes(); - } else { - var k = this.key(); - if (k.match("^[0-9]+$")) - return k; - else if (this.months.indexOf(k.toLowerCase()) >= 0) - return k.toLowerCase(); - else - throw "Value expected:" + this.input.substring(start) + ' for key: ' + k; - - } }; - - this.value = function() { - var values = []; - values.push(this.single_value()); - while (this.tryMatch("#")) { - this.match("#"); - values.push(this.single_value()); - } return values.join(""); - }; - - this.key = function() { - var start = this.pos; - while (true) { - if (this.pos >= this.input.length) { - throw "Runaway key"; - } // а-яА-Я is Cyrillic - //console.log(this.input[this.pos]); - if (this.notKey.indexOf(this.input[this.pos]) >= 0) { - return this.input.substring(start, this.pos); - } else { - this.pos++; - - } } }; - - this.key_equals_value = function() { - var key = this.key(); - if (this.tryMatch("=")) { - this.match("="); - var val = this.value(); - return [ key, val ]; - } else { - throw "... = value expected, equals sign missing:" - + this.input.substring(this.pos); - } }; - - this.key_value_list = function() { - var kv = this.key_equals_value(); - this.currentEntry['entryTags'] = {}; - this.currentEntry['entryTags'][kv[0]] = kv[1]; - while (this.tryMatch(",")) { - this.match(","); - // fixes problems with commas at the end of a list - if (this.tryMatch("}")) { - break; - } - kv = this.key_equals_value(); - this.currentEntry['entryTags'][kv[0]] = kv[1]; - } }; - - this.entry_body = function(d) { - this.currentEntry = {}; - this.currentEntry['citationKey'] = this.key(); - this.currentEntry['entryType'] = d.substring(1); - this.match(","); - this.key_value_list(); - this.entries.push(this.currentEntry); - }; - - this.directive = function() { - this.match("@"); - return "@" + this.key(); - }; - - this.preamble = function() { - this.currentEntry = {}; - this.currentEntry['entryType'] = 'PREAMBLE'; - this.currentEntry['entry'] = this.value_comment(); - this.entries.push(this.currentEntry); - }; - - this.comment = function() { - this.currentEntry = {}; - this.currentEntry['entryType'] = 'COMMENT'; - this.currentEntry['entry'] = this.value_comment(); - this.entries.push(this.currentEntry); - }; - - this.entry = function(d) { - this.entry_body(d); - }; - - this.bibtex = function() { - while (this.matchAt()) { - var d = this.directive(); - this.match("{"); - if (d == "@STRING") { - this.string(); - } else if (d == "@PREAMBLE") { - this.preamble(); - } else if (d == "@COMMENT") { - this.comment(); - } else { - this.entry(d); - } - this.match("}"); - } }; - } - exports.toJSON = function(bibtex) { - var b = new BibtexParser(); - b.setInput(bibtex); - b.bibtex(); - return b.entries; - }; - - /* added during hackathon don't hate on me */ - exports.toBibtex = function(json) { - var out = ''; - for ( var i in json) { - out += "@" + json[i].entryType; - out += '{'; - if (json[i].citationKey) - out += json[i].citationKey + ', '; - if (json[i].entry) - out += json[i].entry ; - if (json[i].entryTags) { - var tags = ''; - for (var jdx in json[i].entryTags) { - if (tags.length != 0) - tags += ', '; - tags += jdx + '= {' + json[i].entryTags[jdx] + '}'; - } - out += tags; - } - out += '}\n\n'; - } - return out; - - }; - - })( exports); - - /* end bibtexParse */ - }); - - // Copyright 2018 The Distill Template Authors - - function normalizeTag(string) { - return string - .replace(/[\t\n ]+/g, ' ') - .replace(/{\\["^`.'acu~Hvs]( )?([a-zA-Z])}/g, (full, x, char) => char) - .replace(/{\\([a-zA-Z])}/g, (full, char) => char); - } - - function parseBibtex(bibtex) { - const bibliography = new Map(); - const parsedEntries = bibtexParse.toJSON(bibtex); - for (const entry of parsedEntries) { - // normalize tags; note entryTags is an object, not Map - for (const [key, value] of Object.entries(entry.entryTags)) { - entry.entryTags[key.toLowerCase()] = normalizeTag(value); - } - entry.entryTags.type = entry.entryType; - // add to bibliography - bibliography.set(entry.citationKey, entry.entryTags); - } - return bibliography; - } - - function serializeFrontmatterToBibtex(frontMatter) { - return `@article{${frontMatter.slug}, - author = {${frontMatter.bibtexAuthors}}, - title = {${frontMatter.title}}, - journal = {${frontMatter.journal.title}}, - year = {${frontMatter.publishedYear}}, - note = {${frontMatter.url}}, - doi = {${frontMatter.doi}} -}`; - } - - // Copyright 2018 The Distill Template Authors - - class Bibliography extends HTMLElement { - - static get is() { return 'd-bibliography'; } - - constructor() { - super(); - - // set up mutation observer - const options = {childList: true, characterData: true, subtree: true}; - const observer = new MutationObserver( (entries) => { - for (const entry of entries) { - if (entry.target.nodeName === 'SCRIPT' || entry.type === 'characterData') { - this.parseIfPossible(); - } - } - }); - observer.observe(this, options); - } - - connectedCallback() { - requestAnimationFrame(() => { - this.parseIfPossible(); - }); - } - - parseIfPossible() { - const scriptTag = this.querySelector('script'); - if (!scriptTag) return; - if (scriptTag.type == 'text/bibtex') { - const newBibtex = scriptTag.textContent; - if (this.bibtex !== newBibtex) { - this.bibtex = newBibtex; - const bibliography = parseBibtex(this.bibtex); - this.notify(bibliography); - } - } else if (scriptTag.type == 'text/json') { - const bibliography = new Map(JSON.parse(scriptTag.textContent)); - this.notify(bibliography); - } else { - console.warn('Unsupported bibliography script tag type: ' + scriptTag.type); - } - } - - notify(bibliography) { - const options = { detail: bibliography, bubbles: true }; - const event = new CustomEvent('onBibliographyChanged', options); - this.dispatchEvent(event); - } - - /* observe 'src' attribute */ - - static get observedAttributes() { - return ['src']; - } - - receivedBibtex(event) { - const bibliography = parseBibtex(event.target.response); - this.notify(bibliography); - } - - attributeChangedCallback(name, oldValue, newValue) { - var oReq = new XMLHttpRequest(); - oReq.onload = (e) => this.receivedBibtex(e); - oReq.onerror = () => console.warn(`Could not load Bibtex! (tried ${newValue})`); - oReq.responseType = 'text'; - oReq.open('GET', newValue, true); - oReq.send(); - } - - - } - - // Copyright 2018 The Distill Template Authors - // - // Licensed under the Apache License, Version 2.0 (the "License"); - // you may not use this file except in compliance with the License. - // You may obtain a copy of the License at - // - // http://www.apache.org/licenses/LICENSE-2.0 - // - // Unless required by applicable law or agreed to in writing, software - // distributed under the License is distributed on an "AS IS" BASIS, - // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - // See the License for the specific language governing permissions and - // limitations under the License. - - // import style from '../styles/d-byline.css'; - - function bylineTemplate(frontMatter) { - return ` -
-`; - } - - class Byline extends HTMLElement { - - static get is() { return 'd-byline'; } - - set frontMatter(frontMatter) { - this.innerHTML = bylineTemplate(frontMatter); - } - - } - - // Copyright 2018 The Distill Template Authors - - const T$3 = Template( - "d-cite", - ` - - -
-
-`);
-
- class Code extends Mutating(T$4(HTMLElement)) {
-
- renderContent() {
-
- // check if language can be highlighted
- this.languageName = this.getAttribute('language');
- if (!this.languageName) {
- console.warn('You need to provide a language attribute to your `; - if (frontMatter.githubCompareUpdatesUrl) { - html += `View all changes to this article since it was first published.`; - } - html += ` - If you see mistakes or want to suggest changes, please create an issue on GitHub.
- `; - } - - const journal = frontMatter.journal; - if (typeof journal !== 'undefined' && journal.title === 'Distill') { - html += ` -Diagrams and text are licensed under Creative Commons Attribution CC-BY 4.0 with the source available on GitHub, unless noted otherwise. The figures that have been reused from other sources don’t fall under this license and can be recognized by a note in their caption: “Figure from …”.
- `; - } - - if (typeof frontMatter.publishedDate !== 'undefined') { - html += ` -For attribution in academic contexts, please cite this work as
-${frontMatter.concatenatedAuthors}, "${frontMatter.title}", Distill, ${frontMatter.publishedYear}.-
BibTeX citation
-${serializeFrontmatterToBibtex(frontMatter)}- `; - } - - return html; - } - - class DistillAppendix extends HTMLElement { - - static get is() { return 'distill-appendix'; } - - set frontMatter(frontMatter) { - this.innerHTML = appendixTemplate(frontMatter); - } - - } - - const footerTemplate = ` - - - - -`; - - // Copyright 2018 The Distill Template Authors - - const T$c = Template('distill-footer', footerTemplate); - - class DistillFooter extends T$c(HTMLElement) { - - } - - // Copyright 2018 The Distill Template Authors - - let templateIsLoading = false; - let runlevel = 0; - const initialize = function() { - if (window.distill.runlevel < 1) { - throw new Error("Insufficient Runlevel for Distill Template!"); - } - - /* 1. Flag that we're being loaded */ - if ("distill" in window && window.distill.templateIsLoading) { - throw new Error( - "Runlevel 1: Distill Template is getting loaded more than once, aborting!" - ); - } else { - window.distill.templateIsLoading = true; - console.debug("Runlevel 1: Distill Template has started loading."); - } - - /* 2. Add styles if they weren't added during prerendering */ - makeStyleTag(document); - console.debug("Runlevel 1: Static Distill styles have been added."); - console.debug("Runlevel 1->2."); - window.distill.runlevel += 1; - - /* 3. Register Controller listener functions */ - /* Needs to happen before components to their connected callbacks have a controller to talk to. */ - for (const [functionName, callback] of Object.entries(Controller.listeners)) { - if (typeof callback === "function") { - document.addEventListener(functionName, callback); - } else { - console.error("Runlevel 2: Controller listeners need to be functions!"); - } - } - console.debug("Runlevel 2: We can now listen to controller events."); - console.debug("Runlevel 2->3."); - window.distill.runlevel += 1; - - /* 4. Register components */ - const components = [ - Abstract, Appendix, Article, Bibliography, Byline, Cite, CitationList, Code, - Footnote, FootnoteList, FrontMatter$1, HoverBox, Title, DMath, References, TOC, Figure, - Slider, Interstitial - ]; - - const distillComponents = [DistillHeader, DistillAppendix, DistillFooter]; - - if (window.distill.runlevel < 2) { - throw new Error("Insufficient Runlevel for adding custom elements!"); - } - const allComponents = components.concat(distillComponents); - for (const component of allComponents) { - console.debug("Runlevel 2: Registering custom element: " + component.is); - customElements.define(component.is, component); - } - - console.debug( - "Runlevel 3: Distill Template finished registering custom elements." - ); - console.debug("Runlevel 3->4."); - window.distill.runlevel += 1; - - // If template was added after DOMContentLoaded we may have missed that event. - // Controller will check for that case, so trigger the event explicitly: - if (domContentLoaded()) { - Controller.listeners.DOMContentLoaded(); - } - - console.debug("Runlevel 4: Distill Template initialisation complete."); - window.distill.templateIsLoading = false; - window.distill.templateHasLoaded = true; - }; - - window.distill = { runlevel, initialize, templateIsLoading }; - - /* 0. Check browser feature support; synchronously polyfill if needed */ - if (Polyfills.browserSupportsAllFeatures()) { - console.debug("Runlevel 0: No need for polyfills."); - console.debug("Runlevel 0->1."); - window.distill.runlevel += 1; - window.distill.initialize(); - } else { - console.debug("Runlevel 0: Distill Template is loading polyfills."); - Polyfills.load(window.distill.initialize); - } - -}))); -//# sourceMappingURL=template.v2.js.map +!function(n){"function"==typeof define&&define.amd?define(n):n()}(function(){"use strict"; +// Copyright 2018 The Distill Template Authors +function n(n,t){n.title=t.title,t.published&&(t.published instanceof Date?n.publishedDate=t.published:t.published.constructor===String&&(n.publishedDate=new Date(t.published))),t.publishedDate&&(t.publishedDate instanceof Date?n.publishedDate=t.publishedDate:t.publishedDate.constructor===String?n.publishedDate=new Date(t.publishedDate):console.error("Don't know what to do with published date: "+t.publishedDate)),n.description=t.description,n.authors=t.authors.map(n=>new Nr(n)),n.katex=t.katex,n.password=t.password,t.doi&&(n.doi=t.doi)} +// Copyright 2018 The Distill Template Authors +function t(n=document){const t=new Set,e=n.querySelectorAll("d-cite");for(const n of e){const e=(n.getAttribute("key")||n.getAttribute("bibtex-key")).split(",").map(n=>n.trim());for(const n of e)t.add(n)}return[...t]}function e(n,t,e,i){if(null==n.author)return"";var r=n.author.split(" and ");let o=r.map(n=>{if(-1!=(n=n.trim()).indexOf(","))var e=n.split(",")[0].trim(),i=n.split(",")[1];else if(-1!=n.indexOf(" "))e=n.split(" ").slice(-1)[0].trim(),i=n.split(" ").slice(0,-1).join(" ");else e=n.trim();var r="";return i!=undefined&&(r=(r=i.trim().split(" ").map(n=>n.trim()[0])).join(".")+"."),t.replace("${F}",i).replace("${L}",e).replace("${I}",r).trim()});if(r.length>1){var a=o.slice(0,r.length-1).join(e);return a+=(i||e)+o[r.length-1]}return o[0]}function i(n){var t=n.journal||n.booktitle||"";if("volume"in n){var e=n.issue||n.number;e=e!=undefined?"("+e+")":"",t+=", Vol "+n.volume+e}return"pages"in n&&(t+=", pp. "+n.pages),""!=t&&(t+=". "),"publisher"in n&&"."!=(t+=n.publisher)[t.length-1]&&(t+="."),t}function r(n){if("url"in n){var t=n.url,e=/arxiv\.org\/abs\/([0-9\.]*)/.exec(t);if(null!=e&&(t=`http://arxiv.org/pdf/${e[1]}.pdf`),".pdf"==t.slice(-4))var i="PDF";else if(".html"==t.slice(-5))i="HTML";return` [${i||"link"}]`}return""}function o(n,t){return"doi"in n?`${t?"
',n.githubCompareUpdatesUrl&&(t+=`View all changes to this article since it was first published.`),t+=`\n If you see mistakes or want to suggest changes, please create an issue on GitHub.
\n `);const e=n.journal;return void 0!==e&&"Distill"===e.title&&(t+=`\nDiagrams and text are licensed under Creative Commons Attribution CC-BY 4.0 with the source available on GitHub, unless noted otherwise. The figures that have been reused from other sources don\u2019t fall under this license and can be recognized by a note in their caption: \u201cFigure from \u2026\u201d.
\n `),"undefined"!=typeof n.publishedDate&&(t+=`\nFor attribution in academic contexts, please cite this work as
\n${n.concatenatedAuthors}, "${n.title}", Distill, ${n.publishedYear}.\n
BibTeX citation
\n${v(n)}\n `),t}const Mr=["Sunday","Monday","Tuesday","Wednesday","Thursday","Friday","Saturday"],Tr=["Jan.","Feb.","March","April","May","June","July","Aug.","Sept.","Oct.","Nov.","Dec."],_r=n=>n<10?"0"+n:n,Cr=function(n){return`${Mr[n.getDay()].substring(0,3)}, ${_r(n.getDate())} ${Tr[n.getMonth()].substring(0,3)} ${n.getFullYear().toString()} ${n.getUTCHours().toString()}:${n.getUTCMinutes().toString()}:${n.getUTCSeconds().toString()} Z`},Ar=function(n){return Array.from(n).reduce((n,[t,e])=>Object.assign(n,{[t]:e}),{})},Er=function(n){const t=new Map;for(var e in n)n.hasOwnProperty(e)&&t.set(e,n[e]);return t};class Nr{constructor(n){this.name=n.author,this.personalURL=n.authorURL,this.affiliation=n.affiliation,this.affiliationURL=n.affiliationURL,this.affiliations=n.affiliations||[]}get firstName(){const n=this.name.split(" ");return n.slice(0,n.length-1).join(" ")}get lastName(){const n=this.name.split(" ");return n[n.length-1]}}class Lr{constructor(){this.title="unnamed article",this.description="",this.authors=[],this.bibliography=new Map,this.bibliographyParsed=!1,this.citations=[],this.citationsCollected=!1,this.journal={},this.katex={},this.doi=undefined,this.publishedDate=undefined}set url(n){this._url=n}get url(){return this._url?this._url:this.distillPath&&this.journal.url?this.journal.url+"/"+this.distillPath:this.journal.url?this.journal.url:void 0}get githubUrl(){return this.githubPath?"https://github.com/"+this.githubPath:undefined}set previewURL(n){this._previewURL=n}get previewURL(){return this._previewURL?this._previewURL:this.url+"/thumbnail.jpg"}get publishedDateRFC(){return Cr(this.publishedDate)}get updatedDateRFC(){return Cr(this.updatedDate)}get publishedYear(){return this.publishedDate.getFullYear()}get publishedMonth(){return Tr[this.publishedDate.getMonth()]}get publishedDay(){return this.publishedDate.getDate()}get publishedMonthPadded(){return _r(this.publishedDate.getMonth()+1)}get publishedDayPadded(){return _r(this.publishedDate.getDate())}get publishedISODateOnly(){return this.publishedDate.toISOString().split("T")[0]}get volume(){const n=this.publishedYear-2015;if(n<1)throw new Error("Invalid publish date detected during computing volume");return n}get issue(){return this.publishedDate.getMonth()+1}get concatenatedAuthors(){return this.authors.length>2?this.authors[0].lastName+", et al.":2===this.authors.length?this.authors[0].lastName+" & "+this.authors[1].lastName:1===this.authors.length?this.authors[0].lastName:void 0}get bibtexAuthors(){return this.authors.map(n=>n.lastName+", "+n.firstName).join(" and ")}get slug(){let n="";return this.authors.length&&(n+=this.authors[0].lastName.toLowerCase(),n+=this.publishedYear,n+=this.title.split(" ")[0].toLowerCase()),n||"Untitled"}get bibliographyEntries(){return new Map(this.citations.map(n=>{return[n,this.bibliography.get(n)]}))}set bibliography(n){n instanceof Map?this._bibliography=n:"object"==typeof n&&(this._bibliography=Er(n))}get bibliography(){return this._bibliography}static fromObject(n){const t=new Lr;return Object.assign(t,n),t}assignToObject(n){Object.assign(n,this),n.bibliography=Ar(this.bibliographyEntries),n.url=this.url,n.doi=this.doi,n.githubUrl=this.githubUrl,n.previewURL=this.previewURL,this.publishedDate&&(n.volume=this.volume,n.issue=this.issue,n.publishedDateRFC=this.publishedDateRFC,n.publishedYear=this.publishedYear,n.publishedMonth=this.publishedMonth,n.publishedDay=this.publishedDay,n.publishedMonthPadded=this.publishedMonthPadded,n.publishedDayPadded=this.publishedDayPadded),this.updatedDate&&(n.updatedDateRFC=this.updatedDateRFC),n.concatenatedAuthors=this.concatenatedAuthors,n.bibtexAuthors=this.bibtexAuthors,n.slug=this.slug}} +// Copyright 2018 The Distill Template Authors +const Dr=n=>(class extends n{constructor(){super();const n={childList:!0,characterData:!0,subtree:!0},t=new MutationObserver(()=>{t.disconnect(),this.renderIfPossible(),t.observe(this,n)});t.observe(this,n)}connectedCallback(){super.connectedCallback(),this.renderIfPossible()}renderIfPossible(){this.textContent&&this.root&&this.renderContent()}renderContent(){console.error(`Your class ${this.constructor.name} must provide a custom renderContent() method!`)}}),Or=(n,t,e=!0)=>i=>{const r=document.createElement("template");return r.innerHTML=t,e&&"ShadyCSS"in window&&ShadyCSS.prepareTemplate(r,n),class extends i{static get is(){return n}constructor(){super(),this.clone=document.importNode(r.content,!0),e&&(this.attachShadow({mode:"open"}),this.shadowRoot.appendChild(this.clone))}connectedCallback(){this.hasAttribute("distill-prerendered")||(e?"ShadyCSS"in window&&ShadyCSS.styleElement(this):this.insertBefore(this.clone,this.firstChild))}get root(){return e?this.shadowRoot:this}$(n){return this.root.querySelector(n)}$$(n){return this.root.querySelectorAll(n)}}}; +// Copyright 2018 The Distill Template Authors +var Ir='/*\n * Copyright 2018 The Distill Template Authors\n *\n * Licensed under the Apache License, Version 2.0 (the "License");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an "AS IS" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nspan.katex-display {\n text-align: left;\n padding: 8px 0 8px 0;\n margin: 0.5em 0 0.5em 1em;\n}\n\nspan.katex {\n -webkit-font-smoothing: antialiased;\n color: rgba(0, 0, 0, 0.8);\n font-size: 1.18em;\n}\n'; +// Copyright 2018 The Distill Template Authors +const Fr=function(n,t,e){let i=e,r=0;const o=n.length;for(;i
tag. We found the following text: "+t);const e=document.createElement("span");e.innerHTML=n.nodeValue,n.parentNode.insertBefore(e,n),n.parentNode.removeChild(n)}}}}).observe(this,{childList:!0})}}var ro="undefined"!=typeof globalThis?globalThis:"undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof self?self:{},oo=m(function(n,t){!function(n){function t(){this.months=["jan","feb","mar","apr","may","jun","jul","aug","sep","oct","nov","dec"],this.notKey=[",","{","}"," ","="],this.pos=0,this.input="",this.entries=new Array,this.currentEntry="",this.setInput=function(n){this.input=n},this.getEntries=function(){return this.entries},this.isWhitespace=function(n){return" "==n||"\r"==n||"\t"==n||"\n"==n},this.match=function(n,t){if(t!=undefined&&null!=t||(t=!0),this.skipWhitespace(t),this.input.substring(this.pos,this.pos+n.length)!=n)throw"Token mismatch, expected "+n+", found "+this.input.substring(this.pos);this.pos+=n.length,this.skipWhitespace(t)},this.tryMatch=function(n,t){return t!=undefined&&null!=t||(t=!0),this.skipWhitespace(t),this.input.substring(this.pos,this.pos+n.length)==n},this.matchAt=function(){for(;this.input.length>this.pos&&"@"!=this.input[this.pos];)this.pos++;return"@"==this.input[this.pos]},this.skipWhitespace=function(n){for(;this.isWhitespace(this.input[this.pos]);)this.pos++;if("%"==this.input[this.pos]&&1==n){for(;"\n"!=this.input[this.pos];)this.pos++;this.skipWhitespace(n)}},this.value_braces=function(){var n=0;this.match("{",!1);for(var t=this.pos,e=!1;;){if(!e)if("}"==this.input[this.pos]){if(!(n>0)){var i=this.pos;return this.match("}",!1),this.input.substring(t,i)}n--}else if("{"==this.input[this.pos])n++;else if(this.pos>=this.input.length-1)throw"Unterminated value";e="\\"==this.input[this.pos]&&0==e,this.pos++}},this.value_comment=function(){for(var n="",t=0;!this.tryMatch("}",!1)||0!=t;){if(n+=this.input[this.pos],"{"==this.input[this.pos]&&t++,"}"==this.input[this.pos]&&t--,this.pos>=this.input.length-1)throw"Unterminated value:"+this.input.substring(start);this.pos++}return n},this.value_quotes=function(){this.match('"',!1);for(var n=this.pos,t=!1;;){if(!t){if('"'==this.input[this.pos]){var e=this.pos;return this.match('"',!1),this.input.substring(n,e)}if(this.pos>=this.input.length-1)throw"Unterminated value:"+this.input.substring(n)}t="\\"==this.input[this.pos]&&0==t,this.pos++}},this.single_value=function(){var n=this.pos;if(this.tryMatch("{"))return this.value_braces();if(this.tryMatch('"'))return this.value_quotes();var t=this.key();if(t.match("^[0-9]+$"))return t;if(this.months.indexOf(t.toLowerCase())>=0)return t.toLowerCase();throw"Value expected:"+this.input.substring(n)+" for key: "+t},this.value=function(){var n=[];for(n.push(this.single_value());this.tryMatch("#");)this.match("#"),n.push(this.single_value());return n.join("")},this.key=function(){for(var n=this.pos;;){if(this.pos>=this.input.length)throw"Runaway key";if(this.notKey.indexOf(this.input[this.pos])>=0)return this.input.substring(n,this.pos);this.pos++}},this.key_equals_value=function(){var n=this.key();if(this.tryMatch("="))return this.match("="),[n,this.value()];throw"... = value expected, equals sign missing:"+this.input.substring(this.pos)},this.key_value_list=function(){var n=this.key_equals_value();for(this.currentEntry.entryTags={},this.currentEntry.entryTags[n[0]]=n[1];this.tryMatch(",")&&(this.match(","),!this.tryMatch("}"));)n=this.key_equals_value(),this.currentEntry.entryTags[n[0]]=n[1]},this.entry_body=function(n){this.currentEntry={},this.currentEntry.citationKey=this.key(),this.currentEntry.entryType=n.substring(1),this.match(","),this.key_value_list(),this.entries.push(this.currentEntry)},this.directive=function(){return this.match("@"),"@"+this.key()},this.preamble=function(){this.currentEntry={},this.currentEntry.entryType="PREAMBLE",this.currentEntry.entry=this.value_comment(),this.entries.push(this.currentEntry)},this.comment=function(){this.currentEntry={},this.currentEntry.entryType="COMMENT",this.currentEntry.entry=this.value_comment(),this.entries.push(this.currentEntry)},this.entry=function(n){this.entry_body(n)},this.bibtex=function(){for(;this.matchAt();){var n=this.directive();this.match("{"),"@STRING"==n?this.string():"@PREAMBLE"==n?this.preamble():"@COMMENT"==n?this.comment():this.entry(n),this.match("}")}}}n.toJSON=function(n){var e=new t;return e.setInput(n),e.bibtex(),e.entries},n.toBibtex=function(n){var t="";for(var e in n){if(t+="@"+n[e].entryType,t+="{",n[e].citationKey&&(t+=n[e].citationKey+", "),n[e].entry&&(t+=n[e].entry),n[e].entryTags){var i="";for(var r in n[e].entryTags)0!=i.length&&(i+=", "),i+=r+"= {"+n[e].entryTags[r]+"}";t+=i}t+="}\n\n"}return t}}(t)});class ao extends HTMLElement{static get is(){return"d-bibliography"}constructor(){super();const n={childList:!0,characterData:!0,subtree:!0};new MutationObserver(n=>{for(const t of n)"SCRIPT"!==t.target.nodeName&&"characterData"!==t.type||this.parseIfPossible()}).observe(this,n)}connectedCallback(){requestAnimationFrame(()=>{this.parseIfPossible()})}parseIfPossible(){const n=this.querySelector("script");if(n)if("text/bibtex"==n.type){const t=n.textContent;if(this.bibtex!==t){this.bibtex=t;const n=y(this.bibtex);this.notify(n)}}else if("text/json"==n.type){const t=new Map(JSON.parse(n.textContent));this.notify(t)}else console.warn("Unsupported bibliography script tag type: "+n.type)}notify(n){const t=new CustomEvent("onBibliographyChanged",{detail:n,bubbles:!0});this.dispatchEvent(t)}static get observedAttributes(){return["src"]}receivedBibtex(n){const t=y(n.target.response);this.notify(t)}attributeChangedCallback(n,t,e){var i=new XMLHttpRequest;i.onload=(n=>this.receivedBibtex(n)),i.onerror=(()=>console.warn(`Could not load Bibtex! (tried ${e})`)),i.responseType="text",i.open("GET",e,!0),i.send()}}class so extends HTMLElement{static get is(){return"d-byline"}set frontMatter(n){this.innerHTML=w(n)}}
+// Copyright 2018 The Distill Template Authors
+const lo=Or("d-cite",'\n\n\n
\n\n`);class go extends(Dr(fo(HTMLElement))){renderContent(){if(this.languageName=this.getAttribute("language"),!this.languageName)return void console.warn('You need to provide a language attribute to your