diff --git a/.github/workflows/CI.yaml b/.github/workflows/CI.yaml
index 1398821a..c7e7a817 100644
--- a/.github/workflows/CI.yaml
+++ b/.github/workflows/CI.yaml
@@ -1,16 +1,5 @@
 name: CI
 
-on: [push]
-jobs:
-  build:
-    name: Push Sphinx Pages
-    runs-on: ubuntu-latest
-    steps:
-    - uses: seanzhengw/sphinx-pages@master
-      with:
-        github_token: ${{ secrets.GITHUB_TOKEN }}
-        create_readme: true
-
 on:
   pull_request:
     branches:
diff --git a/.github/workflows/sphinx.yml b/.github/workflows/sphinx.yml
new file mode 100644
index 00000000..09e75009
--- /dev/null
+++ b/.github/workflows/sphinx.yml
@@ -0,0 +1,38 @@
+name: "Build Doc"
+on: 
+- push
+
+jobs:
+  docs:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python 3.7
+      uses: actions/setup-python@v2
+      with:
+        python-version: 3.7
+
+    - uses: conda-incubator/setup-miniconda@v2
+      with:
+        installer-url: ${{ matrix.conda-installer }}
+        python-version: ${{ matrix.python-version }}
+        activate-environment: test
+        channel-priority: true
+        environment-file: devtools/conda-envs/espaloma.yaml
+        auto-activate-base: false
+        use-mamba: true
+
+    - name: Install package
+      shell: bash -l {0}
+      run: |
+        python -m pip install --no-deps .
+    - name: Compile
+      shell: bash -l {0}
+      run: |
+        python -m pip install sphinx sphinx-rtd-theme numpydoc
+        cd docs && make html   
+    - name: Deploy
+      uses: peaceiris/actions-gh-pages@v3
+      with:
+        github_token: ${{ secrets.GITHUB_TOKEN }}
+        publish_dir: docs/_build/html
diff --git a/README.md b/README.md
index 85b9b1e8..e77ce0af 100644
--- a/README.md
+++ b/README.md
@@ -1,20 +1,25 @@
-espaloma
+espaloma: **E**xtensible **S**urrogate **P**otenti**al** **O**ptimized by **M**essage-passing **A**lgorithms
 ==============================
 [//]: # (Badges)
 [![CI](https://github.com/choderalab/espaloma/actions/workflows/CI.yaml/badge.svg?branch=master)](https://github.com/choderalab/espaloma/actions/workflows/CI.yaml)
 [![Total alerts](https://img.shields.io/lgtm/alerts/g/choderalab/espaloma.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/choderalab/espaloma/alerts/)
 [![Language grade: Python](https://img.shields.io/lgtm/grade/python/g/choderalab/espaloma.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/choderalab/espaloma/context:python)
+[![docs stable](https://img.shields.io/badge/docs-stable-5077AB.svg?logo=read%20the%20docs)](www.espaloma.wangyq.net/)
 
-Extensible Surrogate Potential of Ab initio Learned and Optimized by Message-passing Algorithms
 
-Rather than:
+Source code for [Wang Y, Fass J, and Chodera JD "End-to-End Differentiable Construction of Molecular Mechanics Force Fields."](https://arxiv.org/abs/2010.01196)
 
-molecule ---(atom typing schemes)---> atom-types ---(atom typing schemes)---> bond-, angle-, torsion-types ---(table lookup)---> force field parameters
+![abstract](docs/_static/espaloma_abstract_v2-2.png)
 
-we want to have
 
-molecule ---(graph nets)---> atom-embedding ---(pooling)---> hypernode-embedding ---(feedforward neural networks)---> force field parameters
 
+# Paper Abstract
+Molecular mechanics (MM) potentials have long been a workhorse of computational chemistry.
+Leveraging accuracy and speed, these functional forms find use in a wide variety of applications in biomolecular modeling and drug discovery, from rapid virtual screening to detailed free energy calculations.
+Traditionally, MM potentials have relied on human-curated, inflexible, and poorly extensible discrete chemical perception rules _atom types_ for applying parameters to small molecules or biopolymers, making it difficult to optimize both types and parameters to fit quantum chemical or physical property data.
+Here, we propose an alternative approach that uses _graph neural networks_ to perceive chemical environments, producing continuous atom embeddings from which valence and nonbonded parameters can be predicted using invariance-preserving layers.
+Since all stages are built from smooth neural functions, the entire process---spanning chemical perception to parameter assignment---is modular and end-to-end differentiable with respect to model parameters, allowing new force fields to be easily constructed, extended, and applied to arbitrary molecules.
+We show that this approach is not only sufficiently expressive to reproduce legacy atom types, but that it can learn and extend existing molecular mechanics force fields, construct entirely new force fields applicable to both biopolymers and small molecules from quantum chemical calculations, and even learn to accurately predict free energies from experimental observables.
 
 # Manifest
 
@@ -48,6 +53,6 @@ This software is licensed under [MIT license](https://opensource.org/licenses/MI
 
 Copyright (c) 2020, Chodera Lab at Memorial Sloan Kettering Cancer Center and Authors:
 Authors:
-- Yuanqing Wang
+- [Yuanqing Wang](http://www.wangyq.net)
 - Josh Fass
 - John D. Chodera
diff --git a/devtools/conda-envs/espaloma.yaml b/devtools/conda-envs/espaloma.yaml
index 674400cd..68e6fee5 100644
--- a/devtools/conda-envs/espaloma.yaml
+++ b/devtools/conda-envs/espaloma.yaml
@@ -4,6 +4,7 @@ channels:
   - dglteam
   - openeye
   - defaults
+  - anaconda
 dependencies:
   # Base dependencies
   - python
@@ -29,3 +30,5 @@ dependencies:
   - nose-timer
   - coverage
   - qcportal
+  - sphinx
+  - sphinx_rtd_theme
diff --git a/docs/_static/espaloma_abstract_v2-2.png b/docs/_static/espaloma_abstract_v2-2.png
new file mode 100644
index 00000000..d71eb929
Binary files /dev/null and b/docs/_static/espaloma_abstract_v2-2.png differ
diff --git a/docs/conf.py b/docs/conf.py
index 1bb69371..dd134917 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -16,7 +16,7 @@
 import os
 import sys
 
-sys.path.insert(0, os.path.abspath('../espaloma'))
+sys.path.insert(0, os.path.abspath('..'))
 
 import espaloma
 from espaloma import mm, nn, data, graphs
diff --git a/docs/getting_started.rst b/docs/getting_started.rst
index 707a0b64..568bbed8 100644
--- a/docs/getting_started.rst
+++ b/docs/getting_started.rst
@@ -1,4 +1,76 @@
 Getting Started
 ===============
 
-This page details how to get started with espaloma. 
+.. image:: _static/espaloma_abstract_v2-2.png
+
+Paper Abstract
+--------------
+Molecular mechanics (MM) potentials have long been a workhorse of computational chemistry.
+Leveraging accuracy and speed, these functional forms find use in a wide variety of applications in biomolecular modeling and drug discovery, from rapid virtual screening to detailed free energy calculations.
+Traditionally, MM potentials have relied on human-curated, inflexible, and poorly extensible discrete chemical perception rules _atom types_ for applying parameters to small molecules or biopolymers, making it difficult to optimize both types and parameters to fit quantum chemical or physical property data.
+Here, we propose an alternative approach that uses _graph neural networks_ to perceive chemical environments, producing continuous atom embeddings from which valence and nonbonded parameters can be predicted using invariance-preserving layers.
+Since all stages are built from smooth neural functions, the entire process---spanning chemical perception to parameter assignment---is modular and end-to-end differentiable with respect to model parameters, allowing new force fields to be easily constructed, extended, and applied to arbitrary molecules.
+We show that this approach is not only sufficiently expressive to reproduce legacy atom types, but that it can learn and extend existing molecular mechanics force fields, construct entirely new force fields applicable to both biopolymers and small molecules from quantum chemical calculations, and even learn to accurately predict free energies from experimental observables.
+
+Minimal Example
+---------------
+::
+
+    import torch, dgl, espaloma as esp
+
+    # retrieve QM dataset used to train OpenFF 1.0.0 ("parsley") small molecule force field
+    dataset = esp.data.dataset.GraphDataset.load("parsley").view(batch_size=128)
+
+    # define Espaloma stage I: graph -> atom latent representation
+    representation = esp.nn.Sequential(
+        layer=esp.nn.layers.dgl_legacy.gn("SAGEConv"), # use SAGEConv implementation in DGL
+        config=[128, "relu", 128, "relu", 128, "relu"], # 3 layers, 128 units, ReLU activation
+    )
+
+    # define Espaloma stage II and III: 
+    # atom latent representation -> bond, angle, and torsion representation and parameters
+    readout = esp.nn.readout.janossy.JanossyPooling(
+        in_features=128,
+        config=[128, "relu", 128, "relu", 128, "relu"],
+        out_features={              # define modular MM parameters Espaloma will assign
+            1: {"e": 1, "s": 1},
+            2: {"coefficients": 2}, # bond linear combination
+            3: {"coefficients": 3}, # angle linear combination
+            4: {"k": 6}, # torsion barrier heights (can be positive or negative)
+        },
+    )
+
+    # compose all three Espaloma stages into an end-to-end model
+    espaloma_model = torch.nn.Sequential(
+                     representation, 
+                     readout,
+                     esp.mm.geometry.GeometryInGraph(),
+                     esp.mm.energy.EnergyInGraph(),
+                     esp.nn.readout.charge_equilibrium.ChargeEquilibrium(),
+    )
+
+    # define training metric
+    metrics = [
+        esp.metrics.GraphMetric(
+                base_metric=torch.nn.MSELoss(), # use mean-squared error loss
+                between=['u', "u_ref"],         # between predicted and QM energies
+                level="g",
+        )
+        esp.metrics.GraphMetric(
+                base_metric=torch.nn.MSELoss(), # use mean-squared error loss
+                between=['q', "q_hat"],         # between predicted and reference charges
+                level="n1",
+        )
+    ]
+
+    # fit Espaloma model to training data
+    results = esp.Train(
+        ds_tr=dataset, net=espaloma_model, metrics=metrics,
+        device=torch.device('cuda:0'), n_epochs=5000,
+        optimizer=lambda net: torch.optim.Adam(net.parameters(), 1e-3), # use Adam optimizer
+    ).run()
+
+
+
+
+ 
diff --git a/espaloma/mm/functional.py b/espaloma/mm/functional.py
index 56b76d5d..82c7b9f7 100644
--- a/espaloma/mm/functional.py
+++ b/espaloma/mm/functional.py
@@ -161,12 +161,12 @@ def periodic(
 
     k = k[:, None, :].repeat(1, x.shape[1], 1)
 
-    energy = (k * (1.0 + cos_n_theta_minus_phases)).sum(dim=-1)
+    # energy = (k * (1.0 + cos_n_theta_minus_phases)).sum(dim=-1)
 
-    # energy = (
-    #     torch.nn.functional.relu(k) * (cos_n_theta_minus_phases + 1.0)
-    #    -torch.nn.functional.relu(0.0-k) * (cos_n_theta_minus_phases - 1.0)
-    # ).sum(dim=-1)
+    energy = (
+        torch.nn.functional.relu(k) * (cos_n_theta_minus_phases + 1.0)
+       -torch.nn.functional.relu(0.0-k) * (cos_n_theta_minus_phases - 1.0)
+    ).sum(dim=-1)
 
 
     return energy
diff --git a/espaloma/mm/tests/test_openmm_consistency.py b/espaloma/mm/tests/test_openmm_consistency.py
index aef0c238..df5e4302 100644
--- a/espaloma/mm/tests/test_openmm_consistency.py
+++ b/espaloma/mm/tests/test_openmm_consistency.py
@@ -48,7 +48,7 @@ def _create_torsion_sim(
 # TODO: mark this properly: want to test periodicities 1..6, +ve, -ve k
 # @pytest.mark.parametrize(periodicity=[1,2,3,4,5,6], k=[-10 * omm_energy_unit, +10 * omm_energy_unit])
 def test_periodic_torsion(
-    periodicity=4, k=-10 * omm_energy_unit, n_samples=100
+    periodicity=4, k=10 * omm_energy_unit, n_samples=100
 ):
     """ Using simulated torsion scan, test if espaloma torsion energies and
     OpenMM torsion energies agree.
diff --git a/espaloma/nn/readout/janossy.py b/espaloma/nn/readout/janossy.py
index eb69f924..d482317e 100644
--- a/espaloma/nn/readout/janossy.py
+++ b/espaloma/nn/readout/janossy.py
@@ -423,3 +423,32 @@ def forward(self, g):
             )
 
         return g
+
+
+class ExpCoefficients(torch.nn.Module):
+    def forward(self, g):
+        import math
+        g.nodes['n2'].data['coefficients'] = g.nodes['n2'].data['log_coefficients'].exp()
+        g.nodes['n3'].data['coefficients'] = g.nodes['n3'].data['log_coefficients'].exp()
+        return g
+
+class LinearMixtureToOriginal(torch.nn.Module):
+    def forward(self, g):
+        import math
+        g.nodes['n2'].data['k'], g.nodes['n2'].data['eq'] = esp.mm.functional.linear_mixture_to_original(
+            g.nodes['n2'].data['coefficients'][:, 0][:, None],
+            g.nodes['n2'].data['coefficients'][:, 1][:, None],
+            1.5, 6.0,
+        )
+
+        g.nodes['n3'].data['k'], g.nodes['n3'].data['eq'] = esp.mm.functional.linear_mixture_to_original(
+            g.nodes['n3'].data['coefficients'][:, 0][:, None],
+            g.nodes['n3'].data['coefficients'][:, 1][:, None],
+            0.0, math.pi
+        )
+
+        g.nodes['n3'].data.pop('coefficients')
+        g.nodes['n2'].data.pop('coefficients')
+        return g
+
+