diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index f7a9ad67..fa2cc64d 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -12,7 +12,9 @@ Because GitHub's [graph of contributors](http://github.com/secondmind-labs/GPflu [Felix Leibfried](https://github.com/fleibfried), [John A. McLeod](https://github.com/johnamcleod), [Hugh Salimbeni](https://github.com/hughsalimbeni), -[Marcin B. Tomczak](https://github.com/marctom) +[Marcin B. Tomczak](https://github.com/marctom), +[Sebastian Popescu](https://github.com/SebastianPopescu), +[Alessandro Vullo](https://github.com/avullo), Feel free to add yourself when you first contribute to GPflux's code, tests, or documentation! \ No newline at end of file diff --git a/docs/notebooks/deep_cde.ipynb b/docs/notebooks/deep_cde.ipynb index eb069dbb..ff48c7a2 100644 --- a/docs/notebooks/deep_cde.ipynb +++ b/docs/notebooks/deep_cde.ipynb @@ -2,18 +2,22 @@ "cells": [ { "cell_type": "markdown", + "id": "61537fc9", + "metadata": {}, "source": [ "# Deep Gaussian processes with Latent Variables\n", "\n", "In this notebook, we explore the use of Deep Gaussian processes and Latent Variables to model a dataset with heteroscedastic noise. The model can be seen as a deep GP version of or as doing variational inference in models from . We start by fitting a single layer GP model to show that it doesn't result in a satisfactory fit for the noise.\n", "\n", "This notebook is inspired by [prof. Neil Lawrence's Deep Gaussian process talk](https://inverseprobability.com/talks/notes/deep-gps.html), which we highly recommend watching." - ], - "metadata": {} + ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, + "id": "1d1e7e03", + "metadata": {}, + "outputs": [], "source": [ "import tensorflow as tf\n", "import gpflow\n", @@ -24,76 +28,73 @@ "\n", "import tensorflow_probability as tfp\n", "from sklearn.neighbors import KernelDensity\n" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "markdown", + "id": "5b0bccd7", + "metadata": { + "lines_to_next_cell": 2 + }, "source": [ "## Load data\n", "\n", "The data comes from a motorcycle accident simulation [1] and shows some interesting behaviour. In particular the heteroscedastic nature of the noise." - ], - "metadata": {} + ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, + "id": "08bbc183", + "metadata": {}, + "outputs": [], "source": [ "def motorcycle_data():\n", - " \"\"\" Return inputs and outputs for the motorcycle dataset. We normalise the outputs. \"\"\"\n", + " \"\"\"Return inputs and outputs for the motorcycle dataset. We normalise the outputs.\"\"\"\n", " import pandas as pd\n", + "\n", " df = pd.read_csv(\"./data/motor.csv\", index_col=0)\n", " X, Y = df[\"times\"].values.reshape(-1, 1), df[\"accel\"].values.reshape(-1, 1)\n", " Y = (Y - Y.mean()) / Y.std()\n", " X /= X.max()\n", " return X, Y" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, + "id": "3d0e2bdf", + "metadata": {}, + "outputs": [], "source": [ "X, Y = motorcycle_data()\n", "num_data, d_xim = X.shape\n", "\n", "X_MARGIN, Y_MARGIN = 0.1, 0.5\n", "fig, ax = plt.subplots()\n", - "ax.scatter(X, Y, marker='x', color='k');\n", - "ax.set_ylim(Y.min() - Y_MARGIN, Y.max() + Y_MARGIN);\n", - "ax.set_xlim(X.min() - X_MARGIN, X.max() + X_MARGIN);" - ], - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": [ - "
" - ], - "image/png": "" - }, - "metadata": { - "needs_background": "light" - } - } - ], - "metadata": {} + "ax.scatter(X, Y, marker=\"x\", color=\"k\")\n", + "ax.set_ylim(Y.min() - Y_MARGIN, Y.max() + Y_MARGIN)\n", + "ax.set_xlim(X.min() - X_MARGIN, X.max() + X_MARGIN)" + ] }, { "cell_type": "markdown", + "id": "7c40b3e1", + "metadata": {}, "source": [ "## Standard single layer Sparse Variational GP\n", "\n", "We first show that a single layer SVGP performs quite poorly on this dataset. In the following code block we define the kernel, inducing variable, GP layer and likelihood of the shallow GP:" - ], - "metadata": {} + ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, + "id": "54e4e5a8", + "metadata": { + "lines_to_end_of_cell_marker": 2 + }, + "outputs": [], "source": [ "NUM_INDUCING = 20\n", "\n", @@ -101,25 +102,24 @@ "inducing_variable = gpflow.inducing_variables.InducingPoints(\n", " np.linspace(X.min(), X.max(), NUM_INDUCING).reshape(-1, 1)\n", ")\n", - "gp_layer = gpflux.layers.GPLayer(\n", - " kernel, inducing_variable, num_data=num_data, num_latent_gps=1\n", - ")\n", - "likelihood_layer = gpflux.layers.LikelihoodLayer(gpflow.likelihoods.Gaussian(0.1))\n", - "\n" - ], - "outputs": [], - "metadata": {} + "gp_layer = gpflux.layers.GPLayer(kernel, inducing_variable, num_data=num_data, num_latent_gps=1)\n", + "likelihood_layer = gpflux.layers.LikelihoodLayer(gpflow.likelihoods.Gaussian(0.1))" + ] }, { "cell_type": "markdown", + "id": "9914574b", + "metadata": {}, "source": [ "We can now encapsulate `gp_layer` in a GPflux DeepGP model:" - ], - "metadata": {} + ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, + "id": "e2471c80", + "metadata": {}, + "outputs": [], "source": [ "\n", "single_layer_dgp = gpflux.models.DeepGP([gp_layer], likelihood_layer)\n", @@ -129,47 +129,16 @@ "history = model.fit({\"inputs\": X, \"targets\": Y}, epochs=int(1e3), verbose=0)\n", "fig, ax = plt.subplots()\n", "ax.plot(history.history[\"loss\"])\n", - "ax.set_xlabel('Epoch')\n", - "ax.set_ylabel('Loss')" - ], - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "WARNING:tensorflow:From /home/vincent/anaconda3/envs/gpflow2/lib/python3.7/site-packages/tensorflow/python/ops/linalg/linear_operator_diag.py:175: calling LinearOperator.__init__ (from tensorflow.python.ops.linalg.linear_operator) with graph_parents is deprecated and will be removed in a future version.\n", - "Instructions for updating:\n", - "Do not pass `graph_parents`. They will no longer be used.\n" - ] - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "Text(0, 0.5, 'Loss')" - ] - }, - "metadata": {}, - "execution_count": 5 - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "
" - ], - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEGCAYAAABiq/5QAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAe10lEQVR4nO3deZwcZ33n8c+vqo+5RzOakSxLNpJvywfgDD6w8cvG5gyBHBAwEBzjxJuEBXMsV3Zfyx7Jbk4IBJbFQLgPB+OA8SYGbAzGBBskI5+ykeVLt0bX3NPnb/+omtFoRrbH0vTUTNX3/Xr1S93VrX5+NSV95+mnnn7K3B0REcmOIOkCRERkfin4RUQyRsEvIpIxCn4RkYxR8IuIZEwu6QJmo6enx1evXp10GSIii8r69ev3uHvv9O2LIvhXr17NunXrki5DRGRRMbMnD7ddQz0iIhmj4BcRyRgFv4hIxij4RUQyRsEvIpIxCn4RkYxR8IuIZEyqg//Ge7by1bsOO41VRCSzGhb8ZvZPZrbbzB6Ysq3bzH5oZpviP7sa1T7A9+7dzvW/3NLIJkREFp1G9vi/CLxy2rYPAbe5+8nAbfHjhgmDgFpdF5oREZmqYcHv7ncA+6Ztfh3wpfj+l4DfblT7ALnAFPwiItPM9xj/cnffEd/fCSx/uhea2TVmts7M1vX39x9RY2FoVOv1I/q7IiJpldjJXY8u9vu03XF3v87d+9y9r7d3xuJys6Iev4jITPMd/LvMbAVA/OfuRjYWBkZVwS8icoj5Dv6bgCvj+1cC321kY+rxi4jM1MjpnN8Afg6camZbzexq4K+Al5nZJuDy+HHDhEGgHr+IyDQNuxCLu1/xNE9d1qg2p1OPX0RkplR/czcMjGpNs3pERKZKdfCrxy8iMlOqgz+ax6/gFxGZKtXBrx6/iMhMqQ7+iVk90XfFREQE0h78ZgCo0y8iclCqgz8XRsGv9XpERA5KdfCHQRT8GucXETko1cGfU/CLiMyQ6uBXj19EZKZUB/9Ej19z+UVEDkp18IdBtHvq8YuIHJTq4FePX0RkplQH/+QYf03BLyIyIdXBr3n8IiIzpTr4NatHRGSmVAe/xvhFRGZKdfBrVo+IyEypDn71+EVEZkp18B8c49fJXRGRCakO/skev6ZziohMSnXwB5rVIyIyQ6qDX2P8IiIzpTr4J8f4delFEZFJqQ7+3MR0To3xi4hMSnXwhxrqERGZIdXBP7FWj07uiogclOrgP9jj1zx+EZEJqQ5+XXNXRGSmVAe/xvhFRGZKdfDntEibiMgMqQ5+9fhFRGZKdfBPjvHXdHJXRGRCIsFvZu8xswfN7AEz+4aZNTWinTBUj19EZLp5D34zWwm8C+hz9zOBEHhTI9rSrB4RkZmSGurJAc1mlgNagO2NaERj/CIiM8178Lv7NuDvgKeAHcCAu/9g+uvM7BozW2dm6/r7+4+ordDU4xcRmS6JoZ4u4HXAGuBYoNXM3jr9de5+nbv3uXtfb2/vEbWlHr+IyExJDPVcDjzu7v3uXgFuBF7ciIbMjDAwXXpRRGSKJIL/KeB8M2sxMwMuAzY2qrEo+Bv17iIii08SY/x3AzcA9wD3xzVc16j2curxi4gcIpdEo+7+EeAj89FWGJjG+EVEpkj1N3dhosev4BcRmZD64A+DQD1+EZEpUh/8ucB0zV0RkSlSH/wa4xcROVTqgz8XalaPiMhUqQ9+9fhFRA6V+uDXrB4RkUOlPvg1q0dE5FCpD/5cYFS1ZoOIyKTUB7/G+EVEDpX64C+EARX1+EVEJqU++HOhUdUXuEREJqU++PNhQEVDPSIikzIQ/EalqqEeEZEJqQ/+XBBQ1Td3RUQmpT7487mAisb4RUQmpT/4A9OsHhGRKdIf/GGgWT0iIlOkPvhzoXr8IiJTpT748/oCl4jIITIQ/KaTuyIiU6Q++HOhpnOKiEyV+uCPhnocd/X6RUQgC8EfGIBW6BQRiaU++HNhtIua0ikiEkl98OfDqMdf1sweEREgE8E/0eNX8IuIQJaCX2P8IiJABoI/NzHUo6WZRUSADAT/xBi/evwiIpEMBH+0i1q2QUQkkvrgzwUKfhGRqVIf/IVcPNSjefwiIkBCwW9mS8zsBjN72Mw2mtkFjWpLPX4RkUPlEmr348At7v56MysALY1q6OAYv3r8IiKQQPCbWSdwMfCHAO5eBsqNam9iVo96/CIikSSGetYA/cAXzOxXZvY5M2ttVGOTa/VoaWYRESCZ4M8B5wCfdvcXAiPAh6a/yMyuMbN1Zrauv7//iBubXKunqqEeERFIJvi3Alvd/e748Q1EvwgO4e7XuXufu/f19vYecWPFXAhokTYRkQnzHvzuvhPYYmanxpsuAx5qVHvFXLSLpUqtUU2IiCwqszq5a2YnEvXSS2Z2CXA28GV3P3CE7b4T+Fo8o+cx4KojfJ9nVczHwa+1ekREgNn3+L8N1MzsJOA64Djg60faqLtviIdxznb333b3/Uf6Xs9mYqhHwS8iEplt8NfdvQr8DvCP7v5+YEXjypo7E0M94xrqEREBZh/8FTO7ArgSuDnelm9MSXNrcoxfPX4REWD2wX8VcAHwl+7+uJmtAb7SuLLmjplRyAWUqurxi4jALE/uuvtDwLsAzKwLaHf3v25kYXOpmAsoVdTjFxGBWfb4zezHZtZhZt3APcBnzeyjjS1t7hRzoYZ6RERisx3q6XT3QeB3iaZxngdc3riy5lZTXkM9IiITZhv8OTNbAfw+B0/uLhrFXKAev4hIbLbB/z+A7wOb3f2XZnYCsKlxZc2tYi7UGL+ISGy2J3e/BXxryuPHgN9rVFFzraihHhGRSbM9ubvKzP7FzHbHt2+b2apGFzdXNNQjInLQbId6vgDcBBwb374Xb1sUNKtHROSg2QZ/r7t/wd2r8e2LwJGvlTzPonn8GuoREYHZB/9eM3urmYXx7a3A3kYWNpeKefX4RUQmzDb43040lXMnsAN4PfE1cxcD9fhFRA6aVfC7+5Pu/lp373X3Ze7+2yymWT25gHH1+EVEgKO7Atd756yKBmsphIyWq0mXISKyIBxN8NucVdFgrcUc45U6VV13V0TkqILf56yKBmsrRt9TGylrnF9E5Bm/uWtmQxw+4A1obkhFDTAZ/KUqnc2L4voxIiIN84zB7+7t81VII7XGwT9c0ji/iMjRDPUsGm0KfhGRSZkI/tYpQz0iIlmXieBvU/CLiEzKVPAPjSv4RUQyEfytxRBQj19EBDIT/JrHLyIyIRPBX8wF5EPTUI+ICBkJfjOjs7nAwFg56VJERBKXieAHWNpaYN+Igl9EJDPB39WaZ/9IJekyREQSl5ng724tsG9UPX4RkcwEf1dLgf0a6hERyU7wd7cW2D9apl5fNKtJi4g0RGaCv6ulQN1hcFzj/CKSbYkFv5mFZvYrM7t5Ptpb2lYAYM9waT6aExFZsJLs8V8LbJyvxo7paAJg54CCX0SyLZHgN7NVwG8Cn5uvNld0RhcM2z4wNl9NiogsSEn1+P8B+ADwtFc/N7NrzGydma3r7+8/6gaXdxYB2DkwftTvJSKymM178JvZa4Dd7r7+mV7n7te5e5+79/X29h51u8VcSE9bgR3q8YtIxiXR478QeK2ZPQF8E3ipmX11Phpe0dnM9gPq8YtIts178Lv7h919lbuvBt4E/Mjd3zofbR/T2aShHhHJvMzM4wc4trNJJ3dFJPMSDX53/7G7v2a+2luxpJmh8aq+xCUimZapHv8py9sAeHjHUMKViIgkJ1PBf8axnQA8uH0g4UpERJKTqeBf1l6kp63Ag9sHky5FRCQxmQp+M2PtsZ0KfhHJtEwFP8DaFR08unuIcvVpvzQsIpJqmQv+M47toFJzfr1LJ3hFJJsyGfwAD2m4R0QyKnPBv3ppKy2FUDN7RCSzMhf8QWCcvqJDJ3hFJLMyF/wQDfds3DGo6++KSCZlNvhHyjWe3DeadCkiIvMuo8Gvb/CKSHZlMvhPXt5GPjTu26rgF5HsyWTwF3MhZ63sZN0T+5IuRURk3mUy+AFetKab+7cNMFKqJl2KiMi8ymzwv3ztcio15//dtyPpUkRE5lVmg/+c47tY2lrgrsf3Jl2KiMi8ymzwmxkXn9LLDx/axXillnQ5IiLzJrPBD/CGvlUMjVe55YGdSZciIjJvMh38569ZyvHdLVz/yy1JlyIiMm8yHfxBYLz2+cdy9+N7NbtHRDIj08EPcO6abuoOP3hIwz0ikg2ZD/6LTurhzJUd/M0tj+gkr4hkQuaDPwiMD7ziNHYMjHOz5vSLSAZkPvgBLjyph7UrOvjYD39NtaZr8YpIuin4gTAw3vOyU9h2YIwv//zJpMsREWkoBX/sstOWce7qbj5zx2YGRitJlyMi0jAK/lgQGB945ansGynzjq/fg7uuziUi6aTgn6JvdTcfftXp3PnoHu58dE/S5YiINISCf5o3n3c8K5c085++dS+D4xryEZH0UfBP05QP+cvfOZNdgyXee/29muUjIqmj4D+MS05dxn/5zdO5deMuPn/n40mXIyIypxT8T+Pqi9bwkpN7+NvvP8J9Ww8kXY6IyJyZ9+A3s+PM7HYze8jMHjSza+e7htkwMz55xTn0thf5oy+tY9uBsaRLEhGZE0n0+KvA+9x9LXA+8A4zW5tAHc+qsyXPZ9/Wx0ipyts+fzdP7R1NuiQRkaM278Hv7jvc/Z74/hCwEVg533XM1pkrO/nkm89hc/8IV33xFzy0fTDpkkREjkqiY/xmthp4IXD3YZ67xszWmdm6/v7++S7tEJeetowvv/1cBsaq/O6nf8ZPNyVbj4jI0Ugs+M2sDfg28G53n9GNdvfr3L3P3ft6e3vnv8BpLj6ll3+79iWsXtrK1V9cx60P7Uq6JBGRI5JI8JtZnij0v+buNyZRw5HobS/yzWvO5/QV7fzJV9fz8Vs3UatraYfDqdedD994n2ZEiSxASczqMeDzwEZ3/+h8t3+0lrQU+OofnccFJy7lY7f+mj/56np2DY4nXdaCs+3AGN/4xRau/tK6pEsRkWmS6PFfCPwB8FIz2xDfXp1AHUesvSnPl99+Lu9/xan85JF+zvtft/Hmz95FqaoreE3Ysj+aAaWrmoksPEnM6rnT3c3dz3b3F8S3f53vOo6WmfGOS0/i+++5mLNXdfLvm/fyH76ynmFdtB2AnQPRp6DALOFKRGQ6fXP3KK3paeWm/3gR//U1a/nxI/1c8re3c+M9WxnK+AJvQ+PRL8AwUPCLLDS5pAtIi7dftIZTj2nn73/wCO/953sBeM3ZK/jYG19APsze79eJX3yKfZGFJ3uJ1EAXntTDN6+5gA++8jQAbr5vB6/75M84MFpOuLL5N9HjL2t1U5EFR8E/xwq5gD+95EQ2/eWr+PNXn8aju4d57Sd/xt2P7U26tHk1GAf/0HiVz/xkc8LViMhUCv4GyYcB11x8Il//4/MAeON1d/Hef97A43tGEq5sfkw9x/G//+1hfd9BZAFR8DdY3+pubnn3S7jm4hP43r3becXH7uADN9zL+if3JV1aQw2NV2nKB7QUQgC27tcCdyILhYJ/HrQUcvz5q0/nJ++/lN96/rF8d8N2fu/TP+eDN9zHvpF0jv8Pl6qcc3wXX7n6XAA29w8nXJGITFDwz6NjlzTz97//fO74wKX85lkruH7dFi78qx/xP29+iB0D6Vrvf2i8QntTjhN62gB4rD8bQ1wii4GmcyZgeUcTn3rLOVy7a4j/++PNfPHfn+CffvY4L3peN3/0kjW8bO1ybJF/8WlovEp7U56u1gLdrQX1+EUWEPX4E3TK8nY++sYXcPv7LuG9l5/CY3tGuOYr63n1J+7kO7/atqiXO4iCP+pXnNjbymb1+EUWDAX/AnD80hbeednJ3PnBS/nIb61lvFLj3ddvoO8vbuX937qXB7YNJF3ic1KrO8OlqMcPcPLydjZuH9RaRiILhIZ6FpCmfMhVF67hLec9j7se28v167Zw073b+db6rZzY28qrzlzBK844htNWtC/obwNPrFfUEff4X752OV+/+yl+/Eg/rzjjmCRLExEU/AtSIRdw8Sm9XHxKLwNjFW7asI1/e2An/+fHj/LJ2x9laWuBy09fzgUnLuW8E7o5pqNpQZ0TmJjDPzHUc9FJPfS0Fbjxnq0KfpEFQMG/wHU25/mDC1bzBxesZu9wiR89vJvbNu7mlgd3cv26LQA050N624ucc/wSXveClZy0rI2VS5oJElogbaLHPzHUkwsDfvecVXzup49x12N7Of+EpYnUJSIRBf8isrStyBv6juMNfcdRrzsPbB9gw5YD/OqpA9y79QDf2bCd72zYDkSfGk7oaeWFxy9hrFxjZVczKzqbWdZe5KKTe2gpNO7QT6zT01Y82MafXXIit23cxZ997R4++7Y+fuN5XQ1rX0SemYJ/kQoC4+xVSzh71RLedkG0baRU5YFtA2zuH+Gx/mEe2TXEzffuYGjaNQIKYcCpx7RzXHczpyxvZ1VXC4VcwBnHdtDVUqCjKUfuKM4h9A+VAFjaVpjctqSlwOeufBF/+IVfcMV1d/HHF6/hmpecSGdL/ojbEZEjo+BPkdZijvNOWMp5U4ZS3J1a3Xli7yh1d/qHStzx637u3zbAjx7ezb/ev/Ow79XelAOH47pbOK67mY6mPLkwoKetwPKOJp7aN8ruwXEuOHEpv/G8LnrainQ25zGzyeUZVnW1HPKea3pa+e47LuS/3fQgn7p9M9fd8RgvPrGHc9d0c2JvG73tRZa2FuhuK5ALrKGfSkSyzNwX/uJZfX19vm6drt3aCGPlGjsHx9k5MM6uwXH2j5bZP1phYLTMwFiFnYPj7B+psH+0zO64J/908qFRCANGyjWO6Wjirj+/7Glfe//WAb6zYRt3/LqfTbtnfrkrDIyuljzFXEhHc54zju2gkAvoasnT1RJ9KayrtUB3S4GmfEh3a4H2phzFXLCgTnSLJMnM1rt73/Tt6lJlXHMhZE1PK2t6Wp/1te5OqVpn70iZwbHK5Hz9XYPj7Bkus3tonM27R2grhrzj0pOe8b3OWtXJWas6ARgYq7Bl3yj9wyX6h0r88vF9VOtOSyGkVK2za3Ccn27qp1JzBuJ2n05g0bTYnrYiLYUQM6O9KUdPW4FSpU4hFxAERmdznkIYkA+N5R1NbNo1zLKOIt2t0SeagbEKbcUcZtHJ86Z8SLlapzU+b1F3p6MpP3kiu5gLWNKSpykfMlquUanVcY9+gbUUQloKIQdGKyxpyVOu1SmEAbkwYNfgOKVKneFSlbNWdnJgrMxTe0dpLeZY1lEkFwTkQmNwrEJPW5FaPToGzfmQ8UqNYj6I99sIzBgcr9CcD8mHAYGBx7W6Qy4wxqt1irkAd6jWo/cZKdcYK9cIDLpbC/QPl8Cj4bkwMDbtHqK1kKO1mKOQCyiEAftHy5SrdY7pbGLPcImetiLu0Xc4mgvRz+rB7QM05UNOO6adUrWOWdTRaC6EVGrOln2jnLSsjXwYUKrWKMW1lap1dg+W6GrJs6SlwMBYhe7WaNiwXK2zdf8ox3VHnyZHSlW+dvdTXHrqMk5f0c7ekTJP7h2hrZhnxZImQjPy8XEGqNadnQPj9LQV2TU4zuqe1slPxSPlGi2FkO0Hxlje0USpUicIorW2DBiv1mjOh5Mdi0qtzvYDY+TCgO9u2MZTe0e59vKT6W0rkgsDqrU6dY+uSdGSjxYrrLlTdycfBJOTLybar7kzOFalu7XAaLk6+bNuREdGPX5ZVOp1Z2i8yr7RMvtGyuwfKTNSrnJgtMJIucpoqcZwqcqe4RKj5Rrlap1yrc7e4RKlahTGxXzA/pEyB8YqGLAYV4w2g0b81w3s0J9HPjQqtUMbCgOb/OV7uDoKuYBy9eAFeKa/5/T2cuGhrz+cQhhQd6c65Y2m1jEbucAO+fsTtQYG45Vnbn9iPwu5IPrlVKk/7UWGCmFAMR8wXKoS2Mwac4FRc6cQBtTqPqOmqT+vfGj867tewsnL22e9n4fWrR6/pEAQGJ0teTpb8rP6lPJMPA6SkVKVXBiQC6Ke9d6RMm3FHGPxkhnjlahHbGaMV2qMlqvU6lCuRc+3FfOUq/XodZXaZA+7KR9QrTv1ulOuOYVcwEipSmsxR7VWp1KrYxjNhZB9I2XCwHB3zIxl7UXGq3VqtTrVerRtIP5F1VIIGRir4EBoRksxZKRUJQwCOppyHBitkA8DzKIQmegxDo5XaC3kGC3XqNXrdLUWGB6vUq07y9qL5AJjz3CZSr1O/1CJVV0tlCo1ivmQ4kTgVeuMxJ9y2ppyjJSqdLcWGR6vMl6t0VoIGSpVyQcBD+8c4vjuFpoLAaEZThSegUXHsbe9yNb9Y9TdqdUci2td2lpgSUue4VKNfSMlth8YZ3lH02RAP7B9gPPXdDNWqdFcyLFt/xhnruxgx8A4gRnVWrRvY+UaHc05KjWf7AAYUMxFP799IyWWdzRRrTu5wCjmAjBj73C0vbWYo153RssTxzyqsVJzmvIh7k4uNMIgYO9wiUIuYElzgcHx6FNpR3OewbEKXS0Fxqu1ySVYdhwYZ3lHkaZ8SBjY5L+9kXKVJc0F9gyX6IinQo9Xa/S0FY/q3/nhKPgls8yMfGgsaTk4+6gpH7KsoynBqkQab+F+719ERBpCwS8ikjEKfhGRjFHwi4hkjIJfRCRjFPwiIhmj4BcRyRgFv4hIxiyKJRvMrB948gj/eg+wZw7LWQy0z9mgfc6Go9nn57l77/SNiyL4j4aZrTvcWhVppn3OBu1zNjRinzXUIyKSMQp+EZGMyULwX5d0AQnQPmeD9jkb5nyfUz/GLyIih8pCj19ERKZQ8IuIZEyqg9/MXmlmj5jZo2b2oaTrmQtmdpyZ3W5mD5nZg2Z2bby928x+aGab4j+74u1mZp+Ifwb3mdk5ye7BkTOz0Mx+ZWY3x4/XmNnd8b5db2aFeHsxfvxo/PzqJOs+Uma2xMxuMLOHzWyjmV2Q9uNsZu+J/10/YGbfMLOmtB1nM/snM9ttZg9M2facj6uZXRm/fpOZXflcakht8JtZCHwKeBWwFrjCzNYmW9WcqALvc/e1wPnAO+L9+hBwm7ufDNwWP4Zo/0+Ob9cAn57/kufMtcDGKY//GviYu58E7AeujrdfDeyPt38sft1i9HHgFnc/DXg+0b6n9jib2UrgXUCfu58JhMCbSN9x/iLwymnbntNxNbNu4CPAecC5wEcmflnMirun8gZcAHx/yuMPAx9Ouq4G7Od3gZcBjwAr4m0rgEfi+58Brpjy+snXLaYbsCr+D/FS4GbAiL7NmJt+vIHvAxfE93Px6yzpfXiO+9sJPD697jQfZ2AlsAXojo/bzcAr0nicgdXAA0d6XIErgM9M2X7I657tltoePwf/EU3YGm9Ljfij7QuBu4Hl7r4jfmonsDy+n5afwz8AHwDq8eOlwAF3r8aPp+7X5D7Hzw/Er19M1gD9wBfi4a3PmVkrKT7O7r4N+DvgKWAH0XFbT7qP84TnelyP6ninOfhTzczagG8D73b3wanPedQFSM08XTN7DbDb3dcnXcs8ygHnAJ929xcCIxz8+A+k8jh3Aa8j+qV3LNDKzCGR1JuP45rm4N8GHDfl8ap426JnZnmi0P+au98Yb95lZivi51cAu+Ptafg5XAi81syeAL5JNNzzcWCJmeXi10zdr8l9jp/vBPbOZ8FzYCuw1d3vjh/fQPSLIM3H+XLgcXfvd/cKcCPRsU/zcZ7wXI/rUR3vNAf/L4GT4xkBBaKTRDclXNNRMzMDPg9sdPePTnnqJmDizP6VRGP/E9vfFs8OOB8YmPKRclFw9w+7+yp3X010HH/k7m8BbgdeH79s+j5P/CxeH79+UfWM3X0nsMXMTo03XQY8RIqPM9EQz/lm1hL/O5/Y59Qe5yme63H9PvByM+uKPym9PN42O0mf5GjwCZRXA78GNgP/Oel65mifLiL6GHgfsCG+vZpobPM2YBNwK9Adv96IZjdtBu4nmjGR+H4cxf5fAtwc3z8B+AXwKPAtoBhvb4ofPxo/f0LSdR/hvr4AWBcf6+8AXWk/zsB/Bx4GHgC+AhTTdpyBbxCdw6gQfbK7+kiOK/D2eN8fBa56LjVoyQYRkYxJ81CPiIgchoJfRCRjFPwiIhmj4BcRyRgFv4hIxij4RQAzq5nZhim3OVvN1cxWT12JUSRpuWd/iUgmjLn7C5IuQmQ+qMcv8gzM7Akz+xszu9/MfmFmJ8XbV5vZj+I10m8zs+Pj7cvN7F/M7N749uL4rUIz+2y81vwPzKw5sZ2SzFPwi0Sapw31vHHKcwPufhbwSaJVQgH+EfiSu58NfA34RLz9E8BP3P35RGvrPBhvPxn4lLufARwAfq/B+yPytPTNXRHAzIbdve0w258AXuruj8WL4+1096Vmtodo/fRKvH2Hu/eYWT+wyt1LU95jNfBDjy6ygZl9EMi7+180fs9EZlKPX+TZ+dPcfy5KU+7X0Pk1SZCCX+TZvXHKnz+P7/870UqhAG8Bfhrfvw34U5i8RnDnfBUpMlvqdYhEms1sw5THt7j7xJTOLjO7j6jXfkW87Z1EV8d6P9GVsq6Kt18LXGdmVxP17P+UaCVGkQVDY/wizyAe4+9z9z1J1yIyVzTUIyKSMerxi4hkjHr8IiIZo+AXEckYBb+ISMYo+EVEMkbBLyKSMf8fzQ4l5vupbIsAAAAASUVORK5CYII=" - }, - "metadata": { - "needs_background": "light" - } - } - ], - "metadata": {} + "ax.set_xlabel(\"Epoch\")\n", + "ax.set_ylabel(\"Loss\")" + ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, + "id": "7aa743e0", + "metadata": {}, + "outputs": [], "source": [ "fig, ax = plt.subplots()\n", "num_data_test = 200\n", @@ -190,66 +159,47 @@ "ax.set_xlim(X.min() - X_MARGIN, X.max() + X_MARGIN)\n", "ax.plot(X, Y, \"kx\", alpha=0.5)\n", "ax.plot(X_test, mu, \"C1\")\n", - "ax.set_xlabel('time')\n", - "ax.set_ylabel('acc')\n" - ], - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "Text(0, 0.5, 'acc')" - ] - }, - "metadata": {}, - "execution_count": 6 - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "
" - ], - "image/png": "" - }, - "metadata": { - "needs_background": "light" - } - } - ], - "metadata": {} + "ax.set_xlabel(\"time\")\n", + "ax.set_ylabel(\"acc\")\n" + ] }, { "cell_type": "markdown", + "id": "be5c1a4e", + "metadata": {}, "source": [ - "The errorbars of the single layer model are not good: we observe an overestimation of the error bars on the left and right. " - ], - "metadata": {} + "The errorbars of the single layer model are not good: we observe an overestimation of the error bars on the left and right." + ] }, { "cell_type": "markdown", + "id": "77cb254c", + "metadata": {}, "source": [ "## Deep Gaussian process with latent variables\n", "\n", - "To tackle the problem we suggest a Deep Gaussian process with a latent variable in the first layer. The latent variable will be able to capture the \n", - "heteroscedasticity, while the two-layered deep GP is able to model the sharp transitions. \n", + "To tackle the problem we suggest a Deep Gaussian process with a latent variable in the first layer. The latent variable will be able to capture the\n", + "heteroscedasticity, while the two-layered deep GP is able to model the sharp transitions.\n", "\n", - "Note that a GPflux Deep Gaussian process by itself (i.e. without the latent variable layer) is not able to capture the heteroscedasticity of this dataset. This is a consequence of the noise-less hidden layers and the doubly-stochastic variational inference training procedure, as forumated in . On the contrary, the original deep GP suggested by Damianou and Lawrence , using a different variational approximation for training, can model this dataset without a latent variable, as shown in [this blogpost](https://inverseprobability.com/talks/notes/deep-gps.html). " - ], - "metadata": {} + "Note that a GPflux Deep Gaussian process by itself (i.e. without the latent variable layer) is not able to capture the heteroscedasticity of this dataset. This is a consequence of the noise-less hidden layers and the doubly-stochastic variational inference training procedure, as forumated in . On the contrary, the original deep GP suggested by Damianou and Lawrence , using a different variational approximation for training, can model this dataset without a latent variable, as shown in [this blogpost](https://inverseprobability.com/talks/notes/deep-gps.html)." + ] }, { "cell_type": "markdown", + "id": "83ce6ea5", + "metadata": {}, "source": [ "### Latent Variable Layer\n", "\n", "This layer concatenates the inputs with a latent variable. See Dutordoir, Salimbeni et al. Conditional Density with Gaussian processes (2018) for full details. We choose a one-dimensional input and a full parameterisation for the latent variables. This means that we do not need to train a recognition network, which is useful for fitting but can only be done in the case of small datasets, as is the case here." - ], - "metadata": {} + ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, + "id": "8b6cf7b2", + "metadata": {}, + "outputs": [], "source": [ "w_dim = 1\n", "prior_means = np.zeros(w_dim)\n", @@ -257,32 +207,34 @@ "encoder = gpflux.encoders.DirectlyParameterizedNormalDiag(num_data, w_dim)\n", "prior = tfp.distributions.MultivariateNormalDiag(prior_means, prior_std)\n", "lv = gpflux.layers.LatentVariableLayer(prior, encoder)" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "markdown", + "id": "121b7798", + "metadata": {}, "source": [ "### First GP layer\n", "\n", "GP Layer with two dimensional input because it acts on the inputs and the one-dimensional latent variable. We use a Squared Exponential kernel, a zero mean function, and inducing points, whose pseudo input locations are carefully chosen." - ], - "metadata": {} + ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, + "id": "3eb1860e", + "metadata": {}, + "outputs": [], "source": [ "\n", - "kernel = gpflow.kernels.SquaredExponential(lengthscales=[.05, .2], variance=1.)\n", + "kernel = gpflow.kernels.SquaredExponential(lengthscales=[0.05, 0.2], variance=1.0)\n", "inducing_variable = gpflow.inducing_variables.InducingPoints(\n", " np.concatenate(\n", " [\n", " np.linspace(X.min(), X.max(), NUM_INDUCING).reshape(-1, 1),\n", " np.random.randn(NUM_INDUCING, 1),\n", " ],\n", - " axis=1\n", + " axis=1,\n", " )\n", ")\n", "gp_layer = gpflux.layers.GPLayer(\n", @@ -292,22 +244,24 @@ " num_latent_gps=1,\n", " mean_function=gpflow.mean_functions.Zero(),\n", ")" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "markdown", + "id": "b93fe2e1", + "metadata": {}, "source": [ "### Second GP layer\n", "\n", "Final layer GP with Squared Exponential kernel" - ], - "metadata": {} + ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, + "id": "226ef693", + "metadata": {}, + "outputs": [], "source": [ "\n", "kernel = gpflow.kernels.SquaredExponential()\n", @@ -321,151 +275,82 @@ " num_latent_gps=1,\n", " mean_function=gpflow.mean_functions.Identity(),\n", ")\n", - "gp_layer2.q_sqrt.assign(gp_layer.q_sqrt * 1e-5);" - ], - "outputs": [], - "metadata": {} + "gp_layer2.q_sqrt.assign(gp_layer.q_sqrt * 1e-5)" + ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, + "id": "1c383ad3", + "metadata": {}, + "outputs": [], "source": [ "\n", "likelihood_layer = gpflux.layers.LikelihoodLayer(gpflow.likelihoods.Gaussian(0.01))\n", "gpflow.set_trainable(likelihood_layer, False)\n", "dgp = gpflux.models.DeepGP([lv, gp_layer, gp_layer2], likelihood_layer)\n", "gpflow.utilities.print_summary(dgp, fmt=\"notebook\")" - ], - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": [ - "" - ], - "text/html": [ - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
name class transform prior trainable shape dtype value
DeepGP.f_layers[0]._layers[0].means\n", - "DeepGP.f_layers[0].encoder.means ParameterIdentity True (94, 1) float64[[2.01673752e-02...
DeepGP.f_layers[0]._layers[0].stds\n", - "DeepGP.f_layers[0].encoder.stds ParameterSoftplus True (94, 1) float64[[1.e-05...
DeepGP.f_layers[1].kernel.variance ParameterSoftplus True () float641.0
DeepGP.f_layers[1].kernel.lengthscales ParameterSoftplus True (2,) float64[0.05 0.2 ]
DeepGP.f_layers[1].inducing_variable.Z ParameterIdentity True (20, 2) float64[[0.04166667, 0.66191201...
DeepGP.f_layers[1].q_mu ParameterIdentity True (20, 1) float64[[0....
DeepGP.f_layers[1].q_sqrt ParameterFillTriangular True (1, 20, 20)float64[[[1., 0., 0....
DeepGP.f_layers[2].kernel.variance ParameterSoftplus True () float641.0
DeepGP.f_layers[2].kernel.lengthscales ParameterSoftplus True () float641.0
DeepGP.f_layers[2].inducing_variable.Z ParameterIdentity True (20, 1) float64[[2.48779388...
DeepGP.f_layers[2].q_mu ParameterIdentity True (20, 1) float64[[0....
DeepGP.f_layers[2].q_sqrt ParameterFillTriangular True (1, 20, 20)float64[[[1.e-05, 0.e+00, 0.e+00...
DeepGP.likelihood_layer.likelihood.varianceParameterSoftplus + Shift False () float640.009999999999999998
" - ] - }, - "metadata": {} - } - ], - "metadata": {} + ] }, { "cell_type": "markdown", + "id": "36132b7d", + "metadata": {}, "source": [ "### Fit\n", "\n", "We can now fit the model. Because of the `DirectlyParameterizedEncoder` it is important to set the batch size to the number of datapoints and turn off shuffle. This is so that we use the associated latent variable for each datapoint. If we would use an amortized encoder network this would not be necessary." - ], - "metadata": {} + ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, + "id": "9f21ce5a", + "metadata": {}, + "outputs": [], "source": [ "model = dgp.as_training_model()\n", "model.compile(tf.optimizers.Adam(0.005))\n", - "history = model.fit({\"inputs\": X, \"targets\": Y}, epochs=int(20e3), verbose=0, batch_size=num_data, shuffle=False)" - ], - "outputs": [], - "metadata": {} + "history = model.fit(\n", + " {\"inputs\": X, \"targets\": Y}, epochs=int(20e3), verbose=0, batch_size=num_data, shuffle=False\n", + ")" + ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, + "id": "128f730a", + "metadata": {}, + "outputs": [], "source": [ "gpflow.utilities.print_summary(dgp, fmt=\"notebook\")" - ], - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": [ - "" - ], - "text/html": [ - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
name class transform prior trainable shape dtype value
DeepGP.f_layers[0]._metrics[0]._non_trainable_weights[0]\n", - "DeepGP.f_layers[0]._metrics[0].total ResourceVariable False () float641.0780249257681602
DeepGP.f_layers[0]._metrics[0]._non_trainable_weights[1]\n", - "DeepGP.f_layers[0]._metrics[0].count ResourceVariable False () float641.0
DeepGP.f_layers[0]._layers[0].means\n", - "DeepGP.f_layers[0].encoder.means Parameter Identity True (94, 1) float64[[0.03641934...
DeepGP.f_layers[0]._layers[0].stds\n", - "DeepGP.f_layers[0].encoder.stds Parameter Softplus True (94, 1) float64[[0.8204074...
DeepGP.f_layers[1]._metrics[0]._non_trainable_weights[0]\n", - "DeepGP.f_layers[1]._metrics[0].total ResourceVariable False () float640.4804222860958632
DeepGP.f_layers[1]._metrics[0]._non_trainable_weights[1]\n", - "DeepGP.f_layers[1]._metrics[0].count ResourceVariable False () float641.0
DeepGP.f_layers[1].kernel.variance Parameter Softplus True () float640.8957803239319818
DeepGP.f_layers[1].kernel.lengthscales Parameter Softplus True (2,) float64[0.13181667 4.24491404]
DeepGP.f_layers[1].inducing_variable.Z Parameter Identity True (20, 2) float64[[0.10892382, -0.15883231...
DeepGP.f_layers[1].q_mu Parameter Identity True (20, 1) float64[[3.44379760e-02...
DeepGP.f_layers[1].q_sqrt Parameter FillTriangular True (1, 20, 20)float64[[[8.69748551e-02, 0.00000000e+00, 0.00000000e+00...
DeepGP.f_layers[2]._metrics[0]._non_trainable_weights[0]\n", - "DeepGP.f_layers[2]._metrics[0].total ResourceVariable False () float640.14355680909786334
DeepGP.f_layers[2]._metrics[0]._non_trainable_weights[1]\n", - "DeepGP.f_layers[2]._metrics[0].count ResourceVariable False () float641.0
DeepGP.f_layers[2].kernel.variance Parameter Softplus True () float640.10763915659754926
DeepGP.f_layers[2].kernel.lengthscales Parameter Softplus True () float640.7436233151733747
DeepGP.f_layers[2].inducing_variable.Z Parameter Identity True (20, 1) float64[[3.28383233e+00...
DeepGP.f_layers[2].q_mu Parameter Identity True (20, 1) float64[[-0.25372763...
DeepGP.f_layers[2].q_sqrt Parameter FillTriangular True (1, 20, 20)float64[[[9.55995241e-01, 0.00000000e+00, 0.00000000e+00...
DeepGP.likelihood_layer.likelihood.varianceParameter Softplus + Shift False () float640.009999999999999998
" - ] - }, - "metadata": {} - } - ], - "metadata": {} + ] }, { "cell_type": "markdown", + "id": "19f6a9f1", + "metadata": {}, "source": [ "### Prediction and plotting code" - ], - "metadata": {} + ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, + "id": "d6359dee", + "metadata": {}, + "outputs": [], "source": [ "Xs = np.linspace(X.min() - X_MARGIN, X.max() + X_MARGIN, num_data_test).reshape(-1, 1)\n", "\n", + "\n", "def predict_y_samples(prediction_model, Xs, num_samples=25):\n", " samples = []\n", " for i in tqdm(range(num_samples)):\n", " out = prediction_model(Xs)\n", - " s = out.y_mean + out.y_var ** .5 * tf.random.normal(tf.shape(out.y_mean), dtype=out.y_mean.dtype)\n", + " s = out.y_mean + out.y_var ** 0.5 * tf.random.normal(\n", + " tf.shape(out.y_mean), dtype=out.y_mean.dtype\n", + " )\n", " samples.append(s)\n", " return tf.concat(samples, axis=1)\n", "\n", @@ -473,10 +358,10 @@ "def plot_samples(ax, N_samples=25):\n", " samples = predict_y_samples(dgp.as_prediction_model(), Xs, N_samples).numpy().T\n", " Xs_tiled = np.tile(Xs, [N_samples, 1])\n", - " ax.scatter(Xs_tiled.flatten(), samples.flatten(), marker='.', alpha=0.2, color='C0')\n", + " ax.scatter(Xs_tiled.flatten(), samples.flatten(), marker=\".\", alpha=0.2, color=\"C0\")\n", " ax.set_ylim(-2.5, 2.5)\n", " ax.set_xlim(min(Xs), max(Xs))\n", - " ax.scatter(X, Y, marker='.', color='C1')\n", + " ax.scatter(X, Y, marker=\".\", color=\"C1\")\n", "\n", "\n", "def plot_latent_variables(ax):\n", @@ -484,58 +369,46 @@ " if isinstance(l, gpflux.layers.LatentVariableLayer):\n", " m = l.encoder.means.numpy()\n", " s = l.encoder.stds.numpy()\n", - " ax.errorbar(X.flatten(), m.flatten(), yerr=s.flatten(), fmt='o')\n", - " return\n" - ], - "outputs": [], - "metadata": {} + " ax.errorbar(X.flatten(), m.flatten(), yerr=s.flatten(), fmt=\"o\")\n", + " return" + ] }, { "cell_type": "markdown", - "source": [], - "metadata": {} + "id": "27137aaf", + "metadata": {}, + "source": [] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, + "id": "ac5c63a6", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], "source": [ "fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4))\n", "plot_samples(ax1)\n", "plot_latent_variables(ax2)" - ], - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "100%|██████████| 25/25 [00:01<00:00, 24.88it/s]\n" - ] - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "
" - ], - "image/png": "" - }, - "metadata": { - "needs_background": "light" - } - } - ], - "metadata": {} + ] }, { "cell_type": "markdown", + "id": "ed058c5d", + "metadata": { + "lines_to_next_cell": 2 + }, "source": [ "Left we show the dataset and posterior samples of $y$. On the right we plot the mean and std. deviation of the latent variables corresponding to the datapoints." - ], - "metadata": {} + ] }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, + "id": "fb6e4233", + "metadata": {}, + "outputs": [], "source": [ "def plot_mean_and_var(ax, samples=None, N_samples=5_000):\n", " if samples is None:\n", @@ -553,50 +426,32 @@ " ax.set_ylim(Y.min() - Y_MARGIN, Y.max() + Y_MARGIN)\n", " ax.set_xlabel(\"time\")\n", " ax.set_ylabel(\"acceleration\")\n", - " return samples\n" - ], - "outputs": [], - "metadata": {} + " return samples" + ] }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, + "id": "3b7576bc", + "metadata": {}, + "outputs": [], "source": [ "fig, ax = plt.subplots()\n", - "plot_mean_and_var(ax);" - ], - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "100%|██████████| 5000/5000 [03:24<00:00, 24.44it/s]\n" - ] - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "
" - ], - "image/png": "" - }, - "metadata": { - "needs_background": "light" - } - } - ], - "metadata": {} + "plot_mean_and_var(ax)" + ] }, { "cell_type": "markdown", + "id": "efb2db26", + "metadata": {}, "source": [ "The deep GP model can handle the heteroscedastic noise in the dataset as well as the sharp-ish transition at $0.3$." - ], - "metadata": {} + ] }, { "cell_type": "markdown", + "id": "9dd1cf83", + "metadata": {}, "source": [ "## Conclusion\n", "\n", @@ -604,37 +459,22 @@ "\n", "\n", "[1] Silverman, B. W. (1985) “Some aspects of the spline smoothing approach to non-parametric curve fitting”. Journal of the Royal Statistical Society series B 47, 1-52." - ], - "metadata": {} + ] }, { "cell_type": "markdown", - "source": [], - "metadata": {} + "id": "c252fac0", + "metadata": {}, + "source": [] } ], "metadata": { - "orig_nbformat": 4, - "language_info": { - "name": "python", - "version": "3.7.4", - "mimetype": "text/x-python", - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "pygments_lexer": "ipython3", - "nbconvert_exporter": "python", - "file_extension": ".py" - }, "kernelspec": { - "name": "python3", - "display_name": "Python 3.7.4 64-bit ('gpflow2': conda)" - }, - "interpreter": { - "hash": "f5f45e34fe652d34c37da17ab3d28157e258abad05dd1da4f29284863e79e5cf" + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 5 } diff --git a/docs/notebooks/gpflux_features.py b/docs/notebooks/gpflux_features.py index 909ea7fd..7c2b0cac 100644 --- a/docs/notebooks/gpflux_features.py +++ b/docs/notebooks/gpflux_features.py @@ -61,15 +61,22 @@ def motorcycle_data(): """ # %% -import gpflux +from gpflow.kernels import SquaredExponential -from gpflux.architectures import Config, build_constant_input_dim_deep_gp +import gpflux +from gpflux.architectures.config import GaussianLikelihoodConfig, ModelHyperParametersConfig +from gpflux.architectures.factory import build_constant_input_dim_architecture from gpflux.models import DeepGP -config = Config( - num_inducing=25, inner_layer_qsqrt_factor=1e-5, likelihood_noise_variance=1e-2, whiten=True +config = ModelHyperParametersConfig( + num_layers=2, + kernel=SquaredExponential, + likelihood=GaussianLikelihoodConfig(noise_variance=1e-2), + inner_layer_qsqrt_factor=1e-5, + whiten=True, + num_inducing=25, ) -deep_gp: DeepGP = build_constant_input_dim_deep_gp(X, num_layers=2, config=config) +deep_gp: DeepGP = build_constant_input_dim_architecture(config, X) # %% [markdown] """ @@ -164,9 +171,7 @@ def plot(model, X, Y, ax=None): prediction_model.save_weights("weights") # %% -prediction_model_new = build_constant_input_dim_deep_gp( - X, num_layers=2, config=config -).as_prediction_model() +prediction_model_new = build_constant_input_dim_architecture(config, X).as_prediction_model() prediction_model_new.load_weights("weights") # %% diff --git a/docs/notebooks/plotting_functions.py b/docs/notebooks/plotting_functions.py new file mode 100644 index 00000000..f7a03694 --- /dev/null +++ b/docs/notebooks/plotting_functions.py @@ -0,0 +1,195 @@ +# +# Copyright (c) 2022 The GPflux Contributors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +""" +This module contains experimental code for plotting. As such, the disclaimer is clear. +""" +import numpy as np +import matplotlib.pyplot as plt +import tensorflow as tf +import io + + +def get_classification_detailed_plot( + num_layers, X_training, Y_training, where_to_save, f_mean_overall, f_var_overall, name_file +): + + xx, yy = np.mgrid[-5:5:0.1, -5:5:0.1] + grid = np.c_[xx.ravel(), yy.ravel()] + grid = grid.astype(np.float32) + + indices_class_1 = np.where(Y_training == 1.0) + indices_class_0 = np.where(Y_training == 0.0) + + fig, axs = plt.subplots( + nrows=2, ncols=num_layers, sharex=True, sharey=True, figsize=(20 * num_layers, 40) + ) + + for current_layer in range(num_layers): + + current_mean = f_mean_overall[current_layer] + current_mean = current_mean.reshape((100, 100)) + current_var = f_var_overall[current_layer] + current_var = current_var.reshape((100, 100)) + + ################### + ##### F mean ##### + ################### + + axis = axs[0, current_layer] + contour = axis.contourf(xx, yy, current_mean, 50, cmap="coolwarm") + cbar1 = fig.colorbar(contour, ax=axis) + + cbar1.ax.tick_params(labelsize=60) + + axis.set(xlim=(-5.0, 5.0), ylim=(-5.0, 5.0), xlabel="$X_1$", ylabel="$X_2$") + axis.set_title(label="Predictive Mean", fontdict={"fontsize": 60}) + axis.tick_params(axis="both", which="major", labelsize=80) + + axis.scatter( + X_training[indices_class_0, 0], + X_training[indices_class_0, 1], + s=100, + marker="X", + alpha=0.2, + c="green", + linewidth=1, + label="Class 0", + ) + axis.scatter( + X_training[indices_class_1, 0], + X_training[indices_class_1, 1], + s=100, + marker="D", + alpha=0.2, + c="purple", + linewidth=1, + label="Class 1", + ) + + # NOTE -- don't need this + # axis.scatter(Z_np[current_layer][:,0], Z_np[current_layer][:,1], + # s=750, marker="*", alpha=0.95, c = 'cyan', + # linewidth=1, label = 'Inducing Points') + + axis.legend(loc="upper right", prop={"size": 60}) + # axis.text(-4.5, 4.5, 'LL:'+"{:.2f}".format(total_nll_np)+'; Acc:'+"{:.2f}".format(precision_testing_overall_np), size=50, color='black') + + ################################# + ##### F var Distributional ##### + ################################# + + axis = axs[1, current_layer] + contour = axis.contourf(xx, yy, current_var, 50, cmap="coolwarm") + cbar1 = fig.colorbar(contour, ax=axis) + cbar1.ax.tick_params(labelsize=60) + + axis.set(xlim=(-5, 5), ylim=(-5, 5), xlabel="$X_1$", ylabel="$X_2$") + + axis.set_title(label="Predictive Variance", fontdict={"fontsize": 60}) + axis.tick_params(axis="both", which="major", labelsize=80) + + axis.scatter( + X_training[indices_class_0, 0], + X_training[indices_class_0, 1], + s=100, + marker="X", + alpha=0.2, + c="green", + linewidth=1, + label="Class 0", + ) + axis.scatter( + X_training[indices_class_1, 0], + X_training[indices_class_1, 1], + s=100, + marker="D", + alpha=0.2, + c="purple", + linewidth=1, + label="Class 1", + ) + + # axis.scatter(Z_np[current_layer][:,0], Z_np[current_layer][:,1], + # s=750, marker="*", alpha=0.95, c = 'cyan', + # linewidth=1, label = 'Inducing Points') + axis.legend(loc="upper right", prop={"size": 60}) + + plt.tight_layout() + plt.savefig(where_to_save + name_file) + plt.close() + + +def get_regression_detailed_plot( + num_layers, + X_training, + Y_training, + where_to_save, + mean, + var, + name_file, + x_margin, + y_margin, + X_test, +): + + figure, axs = plt.subplots( + nrows=1, ncols=num_layers, sharex=True, sharey=True, figsize=(10 * num_layers, 10) + ) + + for current_layer in range(num_layers): + current_mean = mean[current_layer] + current_var = var[current_layer] + + ################### + ##### F mean ##### + ################### + + X_test = X_test.squeeze() + lower = current_mean - 2 * np.sqrt(current_var) + upper = current_mean + 2 * np.sqrt(current_var) + + axis = axs[current_layer] + axis.set_ylim(Y_training.min() - y_margin, Y_training.max() + y_margin) + axis.plot(X_training, Y_training, "kx", alpha=0.5, label="Training") + axis.plot(X_test, current_mean, "C1") + + axis.fill_between(X_test, lower, upper, color="C1", alpha=0.3) + axis.legend(loc="upper right", prop={"size": 60}) + + axis.set_title(label=f"Layer {current_layer+1}", fontdict={"fontsize": 60}) + axis.tick_params(axis="both", which="major", labelsize=80) + + plt.tight_layout() + # plt.savefig(where_to_save+name_file) + # plt.close() + return figure + + +def plot_to_image(figure): + """Converts the matplotlib plot specified by 'figure' to a PNG image and + returns it. The supplied figure is closed and inaccessible after this call.""" + # Save the plot to a PNG in memory. + buf = io.BytesIO() + plt.savefig(buf, format="png") + # Closing the figure prevents it from being displayed directly inside + # the notebook. + plt.close(figure) + buf.seek(0) + # Convert PNG buffer to TF image + image = tf.image.decode_png(buf.getvalue(), channels=4) + # Add the batch dimension + image = tf.expand_dims(image, 0) + return image diff --git a/docs/notebooks/sparse_orthogonal_deep_gps.py b/docs/notebooks/sparse_orthogonal_deep_gps.py new file mode 100644 index 00000000..8032f371 --- /dev/null +++ b/docs/notebooks/sparse_orthogonal_deep_gps.py @@ -0,0 +1,240 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: light +# format_version: '1.5' +# jupytext_version: 1.14.1 +# kernelspec: +# display_name: Python 3 (ipykernel) +# language: python +# name: python3 +# --- + +# # Sparse Orthogonal Variational Inference for Deep Gaussian Processes +# +# In this notebook, we explore the use of a new interpretation of sparse variational approximations for Gaussian processes using inducing points, which can lead to more scalable algorithms than previous methods. It is based on decomposing a Gaussian process as a sum of two independent processes: one spanned by a finite basis of inducing points and the other capturing the remaining variation . +# +# Sparse orthogonal VI is based on decomposing the GP prior as the sum of a low-rank approximation using inducing points, and a full-rank residual process. It's been observed how the standard SVGP methods can be reinterpreted under such decomposition. By introducing another set of inducing variables for the orthogonal complement, we can increase the number of inducing points at a much lower additional computational cost. + +import gpflow +import gpflux +import numpy as np +import matplotlib.pyplot as plt +import tensorflow as tf + +# ## Load data +# +# The data comes from a motorcycle accident simulation [1] and shows some interesting behaviour. In particular the heteroscedastic nature of the noise. + + +def motorcycle_data(): + """Return inputs and outputs for the motorcycle dataset. We normalise the outputs.""" + import pandas as pd + + df = pd.read_csv("./data/motor.csv", index_col=0) + X, Y = df["times"].values.reshape(-1, 1), df["accel"].values.reshape(-1, 1) + Y = (Y - Y.mean()) / Y.std() + X /= X.max() + return X, Y + + +# + +X, Y = motorcycle_data() +num_data, d_xim = X.shape + +X_MARGIN, Y_MARGIN = 0.1, 0.5 +fig, ax = plt.subplots() +ax.scatter(X, Y, marker="x", color="k") +ax.set_ylim(Y.min() - Y_MARGIN, Y.max() + Y_MARGIN) +ax.set_xlim(X.min() - X_MARGIN, X.max() + X_MARGIN) +# - + +# ## Orthogonal Deep Gaussian process +# +# GPflux provides provides a class `OrthGPLayer`, which implements a Sparse Orthogonal Variational multioutput Gaussian Process as a `tf.keras.layers.Layer`. In the following, we build a 2-layer orthogonal deep GP model using this new layer type, with a Gaussian likelihood in the output layer. A standard squared exponential kernel is used throughout the layers. + +# + +from typing import Type +from gpflow.kernels import SquaredExponential, Stationary +from gpflow.mean_functions import Zero +from gpflow.likelihoods import Gaussian +from scipy.cluster.vq import kmeans2 + +from gpflux.helpers import ( + construct_basic_inducing_variables, + construct_basic_kernel, + construct_mean_function, +) +from gpflux.layers import OrthGPLayer +from gpflux.layers.likelihood_layer import LikelihoodLayer +from gpflux.models import OrthDeepGP + + +def build_kernel(input_dim: int, is_last_layer: bool, kernel: Type[Stationary]) -> Stationary: + """ + Return a :class:`gpflow.kernels.Stationary` kernel with ARD lengthscales set to + 1.0 and a small kernel variance of 1e-6 if the kernel is part of a hidden layer; + otherwise, the kernel variance is set to 1.0. + + :param input_dim: The input dimensionality of the layer. + :param is_last_layer: Whether the kernel is part of the last layer in the Deep GP. + :param kernel: the :class:`~gpflow.kernels.Stationary` type of the kernel + """ + assert input_dim > 0, "Cannot have non positive input dimension" + + variance = 1e-6 if not is_last_layer else 1.0 + lengthscales = [1.0] * input_dim + + return kernel(lengthscales=lengthscales, variance=variance) + + +def build_orthogonal_deep_gp( + num_layers: int, num_inducing_u: int, num_inducing_v: int, X: np.ndarray +) -> OrthDeepGP: + """ + :param num_layers: the number of (hidden) layers + :param num_inducing_u: The number of inducing points to use for the low-rank approximation + :param num_inducing_v: The number of inducing points to use for the full-rank residual process + :param X: the data + """ + num_data, input_dim = X.shape + X_running = X + + gp_layers = [] + centroids, _ = kmeans2(X, k=min(num_inducing_u + num_inducing_v, X.shape[0]), minit="points") + + centroids_u = centroids[:num_inducing_u, ...] + centroids_v = centroids[num_inducing_u:, ...] + + for i_layer in range(num_layers): + is_last_layer = i_layer == num_layers - 1 + D_in = input_dim + D_out = 1 if is_last_layer else input_dim + + inducing_var_u = construct_basic_inducing_variables( + num_inducing=num_inducing_u, + input_dim=D_in, + share_variables=True, + z_init=centroids_u, + ) + + inducing_var_v = construct_basic_inducing_variables( + num_inducing=num_inducing_v, + input_dim=D_in, + share_variables=True, + z_init=centroids_v, + ) + + kernel = construct_basic_kernel( + kernels=build_kernel(D_in, is_last_layer, SquaredExponential), + output_dim=D_out, + share_hyperparams=True, + ) + + if is_last_layer: + mean_function = Zero() + q_sqrt_scaling = 1.0 + else: + mean_function = construct_mean_function(X_running, D_out) + X_running = mean_function(X_running) + if tf.is_tensor(X_running): + X_running = cast(tf.Tensor, X_running).numpy() + q_sqrt_scaling = 1e-5 + + # NOTE: here we're using the specialised GPLayer + layer = OrthGPLayer( + kernel, + inducing_var_u, + inducing_var_v, + num_data, + mean_function=mean_function, + name=f"orth_gp_{i_layer}", + num_latent_gps=D_out, + ) + layer.q_sqrt_u.assign(layer.q_sqrt_u * q_sqrt_scaling) + layer.q_sqrt_v.assign(layer.q_sqrt_v * q_sqrt_scaling) + gp_layers.append(layer) + + # NOTE: here we return an instance of a DeeGP type specialised for sparse orthogonal VI + return OrthDeepGP(gp_layers, LikelihoodLayer(likelihood=Gaussian(variance=1e-2))) + + +# - + +# ### Create the model +# +# We now instantiate one model using the above utility function. Note how we can use substantial more inducing points compared to model defined in other notebooks, for both the low-rank approximation and the full-rank residual process. + +# + + +orthogonal_dgp = build_orthogonal_deep_gp(num_layers=1, num_inducing_u=50, num_inducing_v=50, X=X) +gpflow.utilities.print_summary(orthogonal_dgp, fmt="notebook") +# - + +# ### Model training + +# + +# Fit the model on the training data + +BATCH_SIZE = 32 +NUM_EPOCHS = 1000 + +model = orthogonal_dgp.as_training_model() +model.compile(tf.optimizers.Adam(5e-2)) + + +callbacks = [ + # Create callback that reduces the learning rate every time the ELBO plateaus + tf.keras.callbacks.ReduceLROnPlateau("loss", factor=0.95, patience=10, min_lr=1e-6, verbose=0) +] +history = model.fit( + {"inputs": X, "targets": Y}, + batch_size=BATCH_SIZE, + epochs=NUM_EPOCHS, + callbacks=callbacks, + verbose=0, +) +gpflow.utilities.print_summary(orthogonal_dgp, fmt="notebook") +# - + +fig, ax = plt.subplots() +ax.plot(history.history["loss"]) +ax.set_xlabel("Epoch") +ax.set_ylabel("Loss") + +# + +fig, ax = plt.subplots() +num_data_test = 200 +X_test = np.linspace(X.min() - X_MARGIN, X.max() + X_MARGIN, num_data_test).reshape(-1, 1) +model = orthogonal_dgp.as_prediction_model() +out = model(X_test) + +mu = out.y_mean.numpy().squeeze() +var = out.y_var.numpy().squeeze() +X_test = X_test.squeeze() + +for i in [1, 2]: + lower = mu - i * np.sqrt(var) + upper = mu + i * np.sqrt(var) + ax.fill_between(X_test, lower, upper, color="C1", alpha=0.3) + +ax.set_ylim(Y.min() - Y_MARGIN, Y.max() + Y_MARGIN) +ax.set_xlim(X.min() - X_MARGIN, X.max() + X_MARGIN) +ax.plot(X, Y, "kx", alpha=0.5) +ax.plot(X_test, mu, "C1") +ax.set_xlabel("time") +ax.set_ylabel("acc") +# - + +# ## Conclusion +# +# In this notebook we have shown how to create a variant of the deep gp model using the recently introduced sparse orthogonal variational inference of Gaussian processes in GPflux. +# +# +# ## References +# +# [1] Shi, J. et al. (2020) “Sparse Orthogonal Variational Inference for Gaussian Processes”. Proceedings of the 23rdInternational Conference on Artificial Intelligence and Statistics (AISTATS), 109. + +# diff --git a/docs/refs.bib b/docs/refs.bib index 824f8d65..e9c611ba 100644 --- a/docs/refs.bib +++ b/docs/refs.bib @@ -83,3 +83,14 @@ @article{yu2016orthogonal pages={1975--1983}, year={2016} } + +@inproceedings{shi2020sparseorthogonal, + title = {Sparse Orthogonal Variational Inference for Gaussian Processes}, + author = {Shi, Jiaxin and Titsias, Michalis K. and Mnih, Andriy}, + booktitle = {Proceedings of the 23rdInternational Conference on Artificial Intelligence and Statistics (AISTATS)}, + year = {2020}, + volume = {109}, + series = {Proceedings of Machine Learning Research}, + publisher = {PMLR}, + url = {http://proceedings.mlr.press/v108/shi20b/shi20b.pdf} +} diff --git a/docs/tutorials.rst b/docs/tutorials.rst index fd348eff..a0d51063 100644 --- a/docs/tutorials.rst +++ b/docs/tutorials.rst @@ -8,6 +8,7 @@ Tutorials notebooks/intro notebooks/gpflux_features notebooks/deep_cde + notebooks/sparse_orthogonal_deep_gps .. toctree:: :caption: Advanced diff --git a/gpflux/architectures/__init__.py b/gpflux/architectures/__init__.py index d2ab53fe..e69de29b 100644 --- a/gpflux/architectures/__init__.py +++ b/gpflux/architectures/__init__.py @@ -1,4 +0,0 @@ -""" -Pre-specified architectures -""" -from gpflux.architectures.constant_input_dim_deep_gp import Config, build_constant_input_dim_deep_gp diff --git a/gpflux/architectures/config.py b/gpflux/architectures/config.py new file mode 100644 index 00000000..d9b9ee89 --- /dev/null +++ b/gpflux/architectures/config.py @@ -0,0 +1,130 @@ +# +# Copyright (c) 2022 The GPflux Contributors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""This module declares configurations for building various types of architectures""" +from abc import ABC, abstractmethod +from dataclasses import dataclass +from typing import Type + +import tensorflow_probability as tfp + +import gpflow.likelihoods +from gpflow.kernels import Stationary + + +class LikelihoodConfig(ABC): + """Config for the model likelihood""" + + @abstractmethod + def create(self) -> gpflow.likelihoods.Likelihood: + """Create a likelihood instance with the parameters of the config""" + + +@dataclass +class GaussianLikelihoodConfig(LikelihoodConfig): + """Config for a :class:`~gpflow.likelihoods.Gaussian` likelihood""" + + noise_variance: float + """The variance of the likelihood""" + + def create(self) -> gpflow.likelihoods.Gaussian: + return gpflow.likelihoods.Gaussian(variance=self.noise_variance) + + +@dataclass +class StudenttLikelihoodConfig(LikelihoodConfig): + """Config for a :class:`~gpflow.likelihoods.Studentt` likelihood""" + + df: float + """The number of degrees of freedom""" + + scale: float + """The scale parameter""" + + def create(self) -> gpflow.likelihoods.StudentT: + return gpflow.likelihoods.StudentT(df=self.df, scale=self.scale) + + +@dataclass +class HeteroSkedasticLikelihoodConfig(LikelihoodConfig): + """Configuration for a :class:`~gpflow.likelihoods.HeteroskedasticTFPConditional`""" + + distribution_class: Type[tfp.distributions.Distribution] = tfp.distributions.Normal + """The distribution class""" + + def create(self) -> gpflow.likelihoods.HeteroskedasticTFPConditional: + return gpflow.likelihoods.HeteroskedasticTFPConditional( + distribution_class=self.distribution_class + ) + + +@dataclass +class HyperParametersConfig(ABC): + """Configuration of the hyperparameters of a model""" + + num_layers: int + """The number of GP layers in the model, excluded the likelihood one""" + + kernel: Type[Stationary] + """The (stationary) kernel to use in the layers""" + + likelihood: LikelihoodConfig + """Configuration for the model likelihood""" + + inner_layer_qsqrt_factor: float + """ + A multiplicative factor used to rescale the hidden layers' + :attr:`~gpflux.layers.GPLayer.q_sqrt`. Typically this value is chosen to be small + (e.g., 1e-5) to reduce noise at the start of training. + """ + + whiten: bool + """ + Determines the parameterisation of the inducing variables. + If `True`, :math:``p(u) = N(0, I)``, otherwise :math:``p(u) = N(0, K_{uu})``. + .. seealso:: :attr:`gpflux.layers.GPLayer.whiten` + """ + + def __post_init__(self) -> None: + assert self.num_layers > 0, "Cannot have non-positive number of layers" + assert self.whiten, "Non-whitened case not yet supported" + + +@dataclass +class ModelHyperParametersConfig(HyperParametersConfig): + """The configuration used to build a DeepGP model""" + + num_inducing: int + """ + The number of inducing variables, *M*. The Deep GP uses the same number + of inducing variables in each layer. + """ + + +@dataclass +class OrthogonalModelHyperparametersConfig(HyperParametersConfig): + """The configuration used to build an OrthDeepGP model""" + + num_inducing_u: int + """ + The number of inducing variables for the `main` process. The Deep GP uses the same number + of inducing variables in each layer. + """ + + num_inducing_v: int + """ + The number of inducing variables for the process capturing the remaining variation. + The Deep GP uses the same number of inducing variables in each layer. + """ diff --git a/gpflux/architectures/constant_input_dim_deep_gp.py b/gpflux/architectures/constant_input_dim_deep_gp.py deleted file mode 100644 index 50c07418..00000000 --- a/gpflux/architectures/constant_input_dim_deep_gp.py +++ /dev/null @@ -1,168 +0,0 @@ -# -# Copyright (c) 2021 The GPflux Contributors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -""" -This module provides :func:`build_constant_input_dim_deep_gp` to build a Deep GP of -arbitrary depth where each hidden layer has the same input dimensionality as the data. -""" - -from dataclasses import dataclass -from typing import cast - -import numpy as np -import tensorflow as tf -from scipy.cluster.vq import kmeans2 - -import gpflow -from gpflow.kernels import SquaredExponential -from gpflow.likelihoods import Gaussian - -from gpflux.helpers import ( - construct_basic_inducing_variables, - construct_basic_kernel, - construct_mean_function, -) -from gpflux.layers.gp_layer import GPLayer -from gpflux.layers.likelihood_layer import LikelihoodLayer -from gpflux.models import DeepGP - - -@dataclass -class Config: - """ - The configuration used by :func:`build_constant_input_dim_deep_gp`. - """ - - num_inducing: int - """ - The number of inducing variables, *M*. The Deep GP uses the same number - of inducing variables in each layer. - """ - - inner_layer_qsqrt_factor: float - """ - A multiplicative factor used to rescale the hidden layers' - :attr:`~gpflux.layers.GPLayer.q_sqrt`. Typically this value is chosen to be small - (e.g., 1e-5) to reduce noise at the start of training. - """ - - likelihood_noise_variance: float - """ - The variance of the :class:`~gpflow.likelihoods.Gaussian` likelihood that is used - by the Deep GP. - """ - - whiten: bool = True - """ - Determines the parameterisation of the inducing variables. - If `True`, :math:``p(u) = N(0, I)``, otherwise :math:``p(u) = N(0, K_{uu})``. - .. seealso:: :attr:`gpflux.layers.GPLayer.whiten` - """ - - -def _construct_kernel(input_dim: int, is_last_layer: bool) -> SquaredExponential: - """ - Return a :class:`gpflow.kernels.SquaredExponential` kernel with ARD lengthscales set to - 2 and a small kernel variance of 1e-6 if the kernel is part of a hidden layer; - otherwise, the kernel variance is set to 1.0. - - :param input_dim: The input dimensionality of the layer. - :param is_last_layer: Whether the kernel is part of the last layer in the Deep GP. - """ - variance = 1e-6 if not is_last_layer else 1.0 - - # TODO: Looking at this initializing to 2 (assuming N(0, 1) or U[0,1] normalized - # data) seems a bit weird - that's really long lengthscales? And I remember seeing - # something where the value scaled with the number of dimensions before - lengthscales = [2.0] * input_dim - return SquaredExponential(lengthscales=lengthscales, variance=variance) - - -def build_constant_input_dim_deep_gp(X: np.ndarray, num_layers: int, config: Config) -> DeepGP: - r""" - Build a Deep GP consisting of ``num_layers`` :class:`GPLayer`\ s. - All the hidden layers have the same input dimension as the data, that is, ``X.shape[1]``. - - The architecture is largely based on :cite:t:`salimbeni2017doubly`, with - the most notable difference being that we keep the hidden dimension equal - to the input dimensionality of the data. - - .. note:: - This architecture might be slow for high-dimensional data. - - .. note:: - This architecture assumes a :class:`~gpflow.likelihoods.Gaussian` likelihood - for regression tasks. Specify a different likelihood for performing - other tasks such as classification. - - :param X: The training input data, used to retrieve the number of datapoints and - the input dimension and to initialise the inducing point locations using k-means. A - tensor of rank two with the dimensions ``[num_data, input_dim]``. - :param num_layers: The number of layers in the Deep GP. - :param config: The configuration for (hyper)parameters. See :class:`Config` for details. - """ - if X.dtype != gpflow.default_float(): - raise ValueError( - f"X needs to have dtype according to gpflow.default_float() = {gpflow.default_float()} " - f"however got X with {X.dtype} dtype." - ) - - num_data, input_dim = X.shape - X_running = X - - gp_layers = [] - centroids, _ = kmeans2(X, k=config.num_inducing, minit="points") - - for i_layer in range(num_layers): - is_last_layer = i_layer == num_layers - 1 - D_in = input_dim - D_out = 1 if is_last_layer else input_dim - - # Pass in kernels, specify output dim (shared hyperparams/variables) - - inducing_var = construct_basic_inducing_variables( - num_inducing=config.num_inducing, input_dim=D_in, share_variables=True, z_init=centroids - ) - - kernel = construct_basic_kernel( - kernels=_construct_kernel(D_in, is_last_layer), - output_dim=D_out, - share_hyperparams=True, - ) - - assert config.whiten is True, "non-whitened case not implemented yet" - - if is_last_layer: - mean_function = gpflow.mean_functions.Zero() - q_sqrt_scaling = 1.0 - else: - mean_function = construct_mean_function(X_running, D_in, D_out) - X_running = mean_function(X_running) - if tf.is_tensor(X_running): - X_running = cast(tf.Tensor, X_running).numpy() - q_sqrt_scaling = config.inner_layer_qsqrt_factor - - layer = GPLayer( - kernel, - inducing_var, - num_data, - mean_function=mean_function, - name=f"gp_{i_layer}", - ) - layer.q_sqrt.assign(layer.q_sqrt * q_sqrt_scaling) - gp_layers.append(layer) - - likelihood = Gaussian(config.likelihood_noise_variance) - return DeepGP(gp_layers, LikelihoodLayer(likelihood)) diff --git a/gpflux/architectures/factory.py b/gpflux/architectures/factory.py new file mode 100644 index 00000000..f492ba27 --- /dev/null +++ b/gpflux/architectures/factory.py @@ -0,0 +1,230 @@ +# +# Copyright (c) 2022 The GPflux Contributors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""This module defines factories for various Deep GP architectures""" +from functools import singledispatch +from typing import Type, cast + +import numpy as np +import tensorflow as tf +from scipy.cluster.vq import kmeans2 + +from gpflow import default_float +from gpflow.kernels import Stationary +from gpflow.mean_functions import Zero + +from gpflux.architectures.config import ( + HyperParametersConfig, + ModelHyperParametersConfig, + OrthogonalModelHyperparametersConfig, +) +from gpflux.helpers import ( + construct_basic_inducing_variables, + construct_basic_kernel, + construct_mean_function, +) +from gpflux.layers import OrthGPLayer +from gpflux.layers.gp_layer import GPLayer +from gpflux.layers.likelihood_layer import LikelihoodLayer +from gpflux.models import DeepGP, OrthDeepGP + + +def build_kernel(input_dim: int, is_last_layer: bool, kernel: Type[Stationary]) -> Stationary: + """ + Return a :class:`gpflow.kernels.Stationary` kernel with ARD lengthscales set to + 1.0 and a small kernel variance of 1e-6 if the kernel is part of a hidden layer; + otherwise, the kernel variance is set to 1.0. + + :param input_dim: The input dimensionality of the layer. + :param is_last_layer: Whether the kernel is part of the last layer in the Deep GP. + :param kernel: the :class:`~gpflow.kernels.Stationary` type of the kernel + """ + assert input_dim > 0, "Cannot have non positive input dimension" + + variance = 1e-6 if not is_last_layer else 1.0 + lengthscales = [1.0] * input_dim + + return kernel(lengthscales=lengthscales, variance=variance) + + +@singledispatch +def build_constant_input_dim_architecture( + model_config: HyperParametersConfig, X: np.ndarray +) -> DeepGP: + r""" + Build a Deep GP consisting of a number of :class:`GPLayer`\ s. + All the hidden layers have the same input dimension as the data, that is, ``X.shape[1]``. + + The architecture is largely based on :cite:t:`salimbeni2017doubly`, with + the most notable difference being that we keep the hidden dimension equal + to the input dimensionality of the data. + + .. note:: + This architecture might be slow for high-dimensional data. + + :param model_config: The configuration for (hyper)parameters. + :param X: The training input data, used to retrieve the number of datapoints and + the input dimension and to initialise the inducing point locations using k-means. A + tensor of rank two with the dimensions ``[num_data, input_dim]``. + + :return: an instance of a DeepGP model + :raises ValueError: If the config type is not registered. + """ + raise ValueError( + f"Don't know how to create model from config of type: {type(HyperParametersConfig)}" + ) + + +@build_constant_input_dim_architecture.register +def build_constant_input_dim_deep_gp( + model_config: ModelHyperParametersConfig, X: np.ndarray +) -> DeepGP: + if X.dtype != default_float(): + raise ValueError( + f"X needs to have dtype according to gpflow.default_float() = {default_float()} " + f"however got X with {X.dtype} dtype." + ) + + num_data, input_dim = X.shape + X_running = X + + gp_layers = [] + centroids, _ = kmeans2(X, k=model_config.num_inducing, minit="points") + + num_layers = model_config.num_layers + for i_layer in range(num_layers): + is_last_layer = i_layer == num_layers - 1 + D_in = input_dim + D_out = 1 if is_last_layer else input_dim + + # Pass in kernels, specify output dim (shared hyperparams/variables) + + inducing_var = construct_basic_inducing_variables( + num_inducing=model_config.num_inducing, + input_dim=D_in, + share_variables=True, + z_init=centroids, + ) + + kernel = construct_basic_kernel( + kernels=build_kernel(D_in, is_last_layer, model_config.kernel), + output_dim=D_out, + share_hyperparams=True, + ) + + assert model_config.whiten is True, "non-whitened case not implemented yet" + + if is_last_layer: + mean_function = Zero() + q_sqrt_scaling = 1.0 + else: + mean_function = construct_mean_function(X_running, D_out) + X_running = mean_function(X_running) + if tf.is_tensor(X_running): + X_running = cast(tf.Tensor, X_running).numpy() + q_sqrt_scaling = model_config.inner_layer_qsqrt_factor + + layer = GPLayer( + kernel, + inducing_var, + num_data, + mean_function=mean_function, + name=f"gp_{i_layer}", + ) + layer.q_sqrt.assign(layer.q_sqrt * q_sqrt_scaling) + gp_layers.append(layer) + + likelihood = model_config.likelihood.create() + + return DeepGP(gp_layers, LikelihoodLayer(likelihood)) + + +@build_constant_input_dim_architecture.register +def build_constant_input_dim_orthogonal_deep_gp( + model_config: OrthogonalModelHyperparametersConfig, X: np.ndarray +) -> OrthDeepGP: + if X.dtype != default_float(): + raise ValueError( + f"X needs to have dtype according to gpflow.default_float() = {default_float()} " + f"however got X with {X.dtype} dtype." + ) + + num_data, input_dim = X.shape + X_running = X + + num_inducing_u = model_config.num_inducing_u + num_inducing_v = model_config.num_inducing_v + gp_layers = [] + centroids, _ = kmeans2(X, k=min(num_inducing_u + num_inducing_v, X.shape[0]), minit="points") + + centroids_u = centroids[:num_inducing_u, ...] + centroids_v = centroids[num_inducing_u:, ...] + + num_layers = model_config.num_layers + for i_layer in range(num_layers): + is_last_layer = i_layer == num_layers - 1 + D_in = input_dim + D_out = 1 if is_last_layer else input_dim + + # Pass in kernels, specify output dim (shared hyperparams/variables) + + inducing_var_u = construct_basic_inducing_variables( + num_inducing=num_inducing_u, + input_dim=D_in, + share_variables=True, + z_init=centroids_u, + ) + + inducing_var_v = construct_basic_inducing_variables( + num_inducing=num_inducing_v, + input_dim=D_in, + share_variables=True, + z_init=centroids_v, + ) + + kernel = construct_basic_kernel( + kernels=build_kernel(D_in, is_last_layer, model_config.kernel), + output_dim=D_out, + share_hyperparams=True, + ) + + assert model_config.whiten is True, "non-whitened case not implemented yet" + + if is_last_layer: + mean_function = Zero() + q_sqrt_scaling = 1.0 + else: + mean_function = construct_mean_function(X_running, D_out) + X_running = mean_function(X_running) + if tf.is_tensor(X_running): + X_running = cast(tf.Tensor, X_running).numpy() + q_sqrt_scaling = model_config.inner_layer_qsqrt_factor + + layer = OrthGPLayer( + kernel, + inducing_var_u, + inducing_var_v, + num_data, + mean_function=mean_function, + name=f"orth_gp_{i_layer}", + num_latent_gps=D_out, + ) + layer.q_sqrt_u.assign(layer.q_sqrt_u * q_sqrt_scaling) + layer.q_sqrt_v.assign(layer.q_sqrt_v * q_sqrt_scaling) + gp_layers.append(layer) + + likelihood = model_config.likelihood.create() + + return OrthDeepGP(gp_layers, LikelihoodLayer(likelihood)) diff --git a/gpflux/conditionals/__init__.py b/gpflux/conditionals/__init__.py new file mode 100644 index 00000000..39743565 --- /dev/null +++ b/gpflux/conditionals/__init__.py @@ -0,0 +1,27 @@ +# +# Copyright (c) 2022 The GPflux Contributors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from . import conditionals, multioutput +from .dispatch import conditional + +# from .util import base_conditional + + +__all__ = [ + "conditional", + "conditionals", + "dispatch", + "multioutput", +] diff --git a/gpflux/conditionals/conditionals.py b/gpflux/conditionals/conditionals.py new file mode 100644 index 00000000..ec89fef1 --- /dev/null +++ b/gpflux/conditionals/conditionals.py @@ -0,0 +1,89 @@ +# Copyright 2017-2020 The GPflow Contributors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Optional + +import tensorflow as tf + +from gpflow.base import MeanAndVariance +from gpflow.inducing_variables import InducingVariables +from gpflow.kernels import Kernel + +from gpflux.conditionals.dispatch import conditional +from gpflux.posteriors import get_posterior_class + + +@conditional._gpflow_internal_register( + object, InducingVariables, InducingVariables, Kernel, object, object +) +def _sparse_orthogonal_conditional( + Xnew: tf.Tensor, + inducing_variable_u: InducingVariables, + inducing_variable_v: InducingVariables, + kernel: Kernel, + f_u: tf.Tensor, + f_v: tf.Tensor, + *, + full_cov: bool = False, + full_output_cov: bool = False, + q_sqrt_u: Optional[tf.Tensor] = None, + q_sqrt_v: Optional[tf.Tensor] = None, + white: bool = False +) -> MeanAndVariance: + """ + Single-output distributional orthogonal GP conditional. + + The covariance matrices used to calculate the conditional have the following shape: + - Kuu: [M, M] + - Kuf: [M, N] + - Kff: [N, N] + + Further reference + ----------------- + - See `gpflow.conditionals._dense_conditional` (below) for a detailed explanation of + conditional in the single-output case. + - See the multiouput notebook for more information about the multiouput framework. + + Parameters + ---------- + :param Xnew: data matrix, size [N, D]. + :param f: data matrix, [M, R] + :param full_cov: return the covariance between the datapoints + :param full_output_cov: return the covariance between the outputs. + NOTE: as we are using a single-output kernel with repetitions + these covariances will be zero. + :param q_sqrt: matrix of standard-deviations or Cholesky matrices, + size [M, R] or [R, M, M]. + :param white: boolean of whether to use the whitened representation + :return: + - mean: [N, R] + - variance: [N, R], [R, N, N], [N, R, R] or [N, R, N, R] + Please see `gpflow.conditional._expand_independent_outputs` for more information + about the shape of the variance, depending on `full_cov` and `full_output_cov`. + """ + posterior_class = get_posterior_class(kernel, inducing_variable_u, inducing_variable_v) + + posterior = posterior_class( + kernel, + inducing_variable_u, + inducing_variable_v, + f_u, + f_v, + q_sqrt_u, + q_sqrt_v, + whiten=white, + mean_function=None, + ) + + return posterior.fused_predict_f(Xnew, full_cov=full_cov, full_output_cov=full_output_cov) diff --git a/gpflux/conditionals/dispatch.py b/gpflux/conditionals/dispatch.py new file mode 100644 index 00000000..d7725e30 --- /dev/null +++ b/gpflux/conditionals/dispatch.py @@ -0,0 +1,16 @@ +# +# Copyright (c) 2022 The GPflux Contributors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from gpflow.conditionals.dispatch import conditional # noqa: F401 diff --git a/gpflux/conditionals/multioutput/__init__.py b/gpflux/conditionals/multioutput/__init__.py new file mode 100644 index 00000000..a6dcada5 --- /dev/null +++ b/gpflux/conditionals/multioutput/__init__.py @@ -0,0 +1,18 @@ +# +# Copyright (c) 2022 The GPflux Contributors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from . import conditionals + +__all__ = ["conditionals"] diff --git a/gpflux/conditionals/multioutput/conditionals.py b/gpflux/conditionals/multioutput/conditionals.py new file mode 100644 index 00000000..dab8f437 --- /dev/null +++ b/gpflux/conditionals/multioutput/conditionals.py @@ -0,0 +1,92 @@ +# +# Copyright (c) 2022 The GPflux Contributors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from typing import Optional + +import tensorflow as tf + +from gpflow.base import MeanAndVariance +from gpflow.inducing_variables import SharedIndependentInducingVariables +from gpflow.kernels import SharedIndependent + +from gpflux.conditionals.dispatch import conditional +from gpflux.posteriors import get_posterior_class + + +@conditional._gpflow_internal_register( + object, + SharedIndependentInducingVariables, + SharedIndependentInducingVariables, + SharedIndependent, + object, + object, +) +def shared_independent_orthogonal_conditional( + Xnew: tf.Tensor, + inducing_variable_u: SharedIndependentInducingVariables, + inducing_variable_v: SharedIndependentInducingVariables, + kernel: SharedIndependent, + f_u: tf.Tensor, + f_v: tf.Tensor, + *, + full_cov: bool = False, + full_output_cov: bool = False, + q_sqrt_u: Optional[tf.Tensor] = None, + q_sqrt_v: Optional[tf.Tensor] = None, + white: bool = False +) -> MeanAndVariance: + """Multioutput conditional for an independent kernel and shared inducing inducing. + Same behaviour as conditional with non-multioutput kernels. + The covariance matrices used to calculate the conditional have the following shape: + - Kuu: [M, M] + - Kuf: [M, N] + - Kff: N or [N, N] + + Further reference + ----------------- + - See `gpflow.conditionals._conditional` for a detailed explanation of + conditional in the single-output case. + - See the multioutput notebook for more information about the multioutput framework. + Parameters + ---------- + :param Xnew: data matrix, size [N, D]. + :param f: data matrix, [M, P] + :param full_cov: return the covariance between the datapoints + :param full_output_cov: return the covariance between the outputs. + Note: as we are using a independent kernel these covariances will be zero. + :param q_sqrt: matrix of standard-deviations or Cholesky matrices, + size [M, P] or [P, M, M]. + :param white: boolean of whether to use the whitened representation + :return: + - mean: [N, P] + - variance: [N, P], [P, N, N], [N, P, P] or [N, P, N, P] + Please see `gpflow.conditional._expand_independent_outputs` for more information + about the shape of the variance, depending on `full_cov` and `full_output_cov`. + """ + + posterior_class = get_posterior_class(kernel, inducing_variable_u, inducing_variable_v) + + posterior = posterior_class( + kernel, + inducing_variable_u, + inducing_variable_v, + f_u, + f_v, + q_sqrt_u, + q_sqrt_v, + whiten=white, + mean_function=None, + ) + return posterior.fused_predict_f(Xnew, full_cov=full_cov, full_output_cov=full_output_cov) diff --git a/gpflux/conditionals/util.py b/gpflux/conditionals/util.py new file mode 100644 index 00000000..eb881c7b --- /dev/null +++ b/gpflux/conditionals/util.py @@ -0,0 +1,564 @@ +from typing import Optional, Tuple + +import tensorflow as tf + +from gpflow.base import MeanAndVariance +from gpflow.conditionals.util import rollaxis_left +from gpflow.config import default_float + + +def base_orthogonal_conditional( + Kmn: tf.Tensor, + Kmm: tf.Tensor, + Knn: tf.Tensor, + Cmn: tf.Tensor, + Cmm: tf.Tensor, + Cnn: tf.Tensor, + f_u: tf.Tensor, + f_v: tf.Tensor, + *, + full_cov: bool = False, + q_sqrt_u: Optional[tf.Tensor] = None, + q_sqrt_v: Optional[tf.Tensor] = None, + white: bool = False, + Lm: Optional[tf.Tensor] = None, +) -> MeanAndVariance: + r""" + + #TODO -- this needs to be updated to suit sparse orthogonal GPs + Given a g1 and g2, and distribution p and q such that + p(g2) = N(g2; 0, Kmm) + + p(g1) = N(g1; 0, Knn) + p(g1 | g2) = N(g1; Knm (Kmm⁻¹) g2, Knn - Knm (Kmm⁻¹) Kmn) + + And + q(g2) = N(g2; f, q_sqrt q_sqrtᵀ) + + This method computes the mean and (co)variance of + q(g1) = ∫ q(g2) p(g1 | g2) + + :param Kmn: [M, ..., N] + :param Kmm: [M, M] + :param Knn: [..., N, N] or N + :param f: [M, R] + :param full_cov: bool + :param q_sqrt: If this is a Tensor, it must have shape [R, M, M] (lower + triangular) or [M, R] (diagonal) + :param white: bool + :return: [N, R] or [R, N, N] + """ + + # NOTE -- this is now passed from _get_Cnn method of posterior class + # hence the Kmm argumnent is a bit redundant now + # Lm = tf.linalg.cholesky(Kmm) + return base_orthogonal_conditional_with_lm( + Kmn=Kmn, + Lm=Lm, + Knn=Knn, + L_Cmm=tf.linalg.cholesky(Cmm), + Cmn=Cmn, + Cnn=Cnn, + f_u=f_u, + f_v=f_v, + full_cov=full_cov, + q_sqrt_u=q_sqrt_u, + q_sqrt_v=q_sqrt_v, + white=white, + ) + + +def base_orthogonal_conditional_with_lm( + Kmn: tf.Tensor, + Lm: tf.Tensor, + Knn: tf.Tensor, + L_Cmm: tf.Tensor, + Cmn: tf.Tensor, + Cnn: tf.Tensor, + f_u: tf.Tensor, + f_v: tf.Tensor, + *, + full_cov: bool = False, + q_sqrt_u: Optional[tf.Tensor] = None, + q_sqrt_v: Optional[tf.Tensor] = None, + white: bool = False, +) -> MeanAndVariance: + r""" + Has the same functionality as the `base_conditional` function, except that instead of + `Kmm` this function accepts `Lm`, which is the Cholesky decomposition of `Kmm`. + + This allows `Lm` to be precomputed, which can improve performance. + """ + # compute kernel stuff + num_func = tf.shape(f_u)[-1] # R + N = tf.shape(Kmn)[-1] + M = tf.shape(f_u)[-2] + V = tf.shape(f_v)[-2] + + # get the leading dims in Kmn to the front of the tensor + # if Kmn has rank two, i.e. [M, N], this is the identity op. + K = tf.rank(Kmn) + perm = tf.concat( + [ + tf.reshape(tf.range(1, K - 1), [K - 2]), # leading dims (...) + tf.reshape(0, [1]), # [M] + tf.reshape(K - 1, [1]), + ], + 0, + ) # [N] + Kmn = tf.transpose(Kmn, perm) # [..., M, N] + Cmn = tf.transpose(Cmn, perm) # [..., M, N] + + shape_constraints = [ + (Kmn, [..., "M", "N"]), + (Lm, ["M", "M"]), + (Knn, [..., "N", "N"] if full_cov else [..., "N"]), + (f_u, ["M", "R"]), + (Cmn, [..., "V", "N"]), + (L_Cmm, ["V", "V"]), + (Cnn, [..., "N", "N"] if full_cov else [..., "N"]), + (f_v, ["V", "R"]), + ] + if q_sqrt_u is not None: + shape_constraints.append( + (q_sqrt_u, (["M", "R"] if q_sqrt_u.shape.ndims == 2 else ["R", "M", "M"])) + ) + + if q_sqrt_v is not None: + shape_constraints.append( + (q_sqrt_v, (["V", "R"] if q_sqrt_v.shape.ndims == 2 else ["R", "V", "V"])) + ) + + tf.debugging.assert_shapes( + shape_constraints, + message="base_orthogonal_conditional() arguments " + "[Note that this check verifies the shape of an alternative " + "representation of Kmn. See the docs for the actual expected " + "shape.]", + ) + + leading_dims = tf.shape(Kmn)[:-2] + + ################################################################################### + + fmean_u, fvar_u = conditional_GP_maths( + leading_dims=leading_dims, + Lm=Lm, + Kmn=Kmn, + Knn=Knn, + num_func=num_func, + M=M, + N=N, + f=f_u, + q_sqrt=q_sqrt_u, + white=white, + full_cov=full_cov, + just_parametric=True, + ) + + fmean_v, fvar_v = conditional_GP_maths( + leading_dims=leading_dims, + Lm=L_Cmm, + Kmn=Cmn, + Knn=Cnn, + num_func=num_func, + M=V, + N=N, + f=f_v, + q_sqrt=q_sqrt_v, + white=white, + full_cov=full_cov, + just_parametric=False, + ) + + ################################################################################### + + shape_constraints = [ + (Kmn, [..., "M", "N"]), # tensor included again for N dimension + (f_u, [..., "M", "R"]), # tensor included again for R dimension + (fmean_u, [..., "N", "R"]), + (fvar_u, [..., "R", "N", "N"] if full_cov else [..., "N", "R"]), + (fmean_v, [..., "N", "R"]), + (fvar_v, [..., "R", "N", "N"] if full_cov else [..., "N", "R"]), + ] + tf.debugging.assert_shapes( + shape_constraints, message="base_orthogonal_)conditional() return values" + ) + + return fmean_u + fmean_v, fvar_u + fvar_v + + +def base_heteroskedastic_orthogonal_conditional( + Kmn: tf.Tensor, + Kmm: tf.Tensor, + Knn: tf.Tensor, + Cmn: tf.Tensor, + Cmm: tf.Tensor, + Cnn: tf.Tensor, + f_u: tf.Tensor, + f_v: tf.Tensor, + *, + full_cov: bool = False, + q_sqrt_u: Optional[tf.Tensor] = None, + q_sqrt_v: Optional[tf.Tensor] = None, + white: bool = False, +) -> MeanAndVariance: + r""" + + #TODO -- this needs to be updated to suit sparse orthogonal GPs + Given a g1 and g2, and distribution p and q such that + p(g2) = N(g2; 0, Kmm) + + p(g1) = N(g1; 0, Knn) + p(g1 | g2) = N(g1; Knm (Kmm⁻¹) g2, Knn - Knm (Kmm⁻¹) Kmn) + + And + q(g2) = N(g2; f, q_sqrt q_sqrtᵀ) + + This method computes the mean and (co)variance of + q(g1) = ∫ q(g2) p(g1 | g2) + + :param Kmn: [M, ..., N] + :param Kmm: [M, M] + :param Knn: [..., N, N] or N + :param f: [M, R] + :param full_cov: bool + :param q_sqrt: If this is a Tensor, it must have shape [R, M, M] (lower + triangular) or [M, R] (diagonal) + :param white: bool + :return: [N, R] or [R, N, N] + """ + Lm = tf.linalg.cholesky(Kmm) + return base_heteroskedastic_orthogonal_conditional_with_lm( + Kmn=Kmn, + Lm=Lm, + Knn=Knn, + L_Cmm=tf.linalg.cholesky(Cmm), + Cmn=Cmn, + Cnn=Cnn, + f_u=f_u, + f_v=f_v, + full_cov=full_cov, + q_sqrt_u=q_sqrt_u, + q_sqrt_v=q_sqrt_v, + white=white, + ) + + +def base_heteroskedastic_orthogonal_conditional_with_lm( + Kmn: tf.Tensor, + Lm: tf.Tensor, + Knn: tf.Tensor, + L_Cmm: tf.Tensor, + Cmn: tf.Tensor, + Cnn: tf.Tensor, + f_u: tf.Tensor, + f_v: tf.Tensor, + *, + full_cov: bool = False, + q_sqrt_u: Optional[tf.Tensor] = None, + q_sqrt_v: Optional[tf.Tensor] = None, + white: bool = False, +) -> MeanAndVariance: + r""" + Has the same functionality as the `base_conditional` function, except that instead of + `Kmm` this function accepts `Lm`, which is the Cholesky decomposition of `Kmm`. + + This allows `Lm` to be precomputed, which can improve performance. + """ + # compute kernel stuff + num_func = tf.shape(f_u)[-1] # R + N = tf.shape(Kmn)[-1] + M = tf.shape(f_u)[-2] + + # get the leading dims in Kmn to the front of the tensor + # if Kmn has rank two, i.e. [M, N], this is the identity op. + K = tf.rank(Kmn) + perm = tf.concat( + [ + tf.reshape(tf.range(1, K - 1), [K - 2]), # leading dims (...) + tf.reshape(0, [1]), # [M] + tf.reshape(K - 1, [1]), + ], + 0, + ) # [N] + Kmn = tf.transpose(Kmn, perm) # [..., M, N] + Cmn = tf.transpose(Cmn, perm) # [..., M, N] + + shape_constraints = [ + (Kmn, [..., "M", "N"]), + (Lm, ["M", "M"]), + (Knn, [..., "N", "N"] if full_cov else [..., "N"]), + (f_u, ["M", "R"]), + (Cmn, [..., "V", "N"]), + (L_Cmm, ["V", "V"]), + (Cnn, [..., "N", "N"] if full_cov else [..., "N"]), + (f_v, ["V", "R"]), + ] + if q_sqrt_u is not None: + shape_constraints.append( + (q_sqrt_u, (["M", "R"] if q_sqrt_u.shape.ndims == 2 else ["R", "M", "M"])) + ) + + if q_sqrt_v is not None: + shape_constraints.append( + (q_sqrt_v, (["V", "R"] if q_sqrt_v.shape.ndims == 2 else ["R", "V", "V"])) + ) + + tf.debugging.assert_shapes( + shape_constraints, + message="base_orthogonal_conditional() arguments " + "[Note that this check verifies the shape of an alternative " + "representation of Kmn. See the docs for the actual expected " + "shape.]", + ) + + leading_dims = tf.shape(Kmn)[:-2] + + ################################################################################### + + fmean_u, fvar_u = conditional_GP_maths( + leading_dims=leading_dims, + Lm=Lm, + Kmn=Kmn, + Knn=Knn, + num_func=num_func, + M=M, + N=N, + f=f_u, + q_sqrt=q_sqrt_u, + white=white, + full_cov=full_cov, + just_parametric=True, + ) + + fmean_v, fvar_v = conditional_GP_maths( + leading_dims=leading_dims, + Lm=L_Cmm, + Kmn=Cmn, + Knn=Cnn, + num_func=num_func, + M=tf.shape(f_v)[-2], + N=N, + f=f_v, + q_sqrt=q_sqrt_v, + white=white, + full_cov=full_cov, + just_parametric=False, + ) + + ################################################################################### + + shape_constraints = [ + (Kmn, [..., "M", "N"]), # tensor included again for N dimension + (f_u, [..., "M", "R"]), # tensor included again for R dimension + (fmean_u, [..., "N", "R"]), + (fvar_u, [..., "R", "N", "N"] if full_cov else [..., "N", "R"]), + (fmean_v, [..., "N", "R"]), + (fvar_v, [..., "R", "N", "N"] if full_cov else [..., "N", "R"]), + ] + tf.debugging.assert_shapes( + shape_constraints, message="base_orthogonal_)conditional() return values" + ) + + return tf.concat([fmean_u, fmean_v], axis=-1), tf.concat([fvar_u, fvar_v], axis=-1) + + +def conditional_GP_maths( # noqa: C901 + leading_dims: tf.Tensor, + Lm: tf.Tensor, + Kmn: tf.Tensor, + Knn: tf.Tensor, + num_func: int, + M: int, + N: int, + f: tf.Tensor, + q_sqrt: tf.Tensor, + just_parametric: bool, + white: bool = True, + full_cov: bool = False, +) -> MeanAndVariance: + + # Compute the projection matrix A + Lm = tf.broadcast_to(Lm, tf.concat([leading_dims, tf.shape(Lm)], 0)) # [..., M, M] + A = tf.linalg.triangular_solve(Lm, Kmn, lower=True) # [..., M, N] + + if just_parametric: + pass + else: + + # compute the covariance due to the conditioning + if full_cov: + fvar = Knn - tf.linalg.matmul(A, A, transpose_a=True) # [..., N, N] + cov_shape = tf.concat([leading_dims, [num_func, N, N]], 0) + fvar = tf.broadcast_to(tf.expand_dims(fvar, -3), cov_shape) # [..., R, N, N] + else: + fvar = Knn - tf.reduce_sum(tf.square(A), -2) # [..., N] + cov_shape = tf.concat([leading_dims, [num_func, N]], 0) # [..., R, N] + fvar = tf.broadcast_to(tf.expand_dims(fvar, -2), cov_shape) # [..., R, N] + + # another backsubstitution in the unwhitened case + if not white: + A = tf.linalg.triangular_solve(tf.linalg.adjoint(Lm), A, lower=False) + + # construct the conditional mean + f_shape = tf.concat([leading_dims, [M, num_func]], 0) # [..., M, R] + f = tf.broadcast_to(f, f_shape) # [..., M, R] + fmean = tf.linalg.matmul(A, f, transpose_a=True) # [..., N, R] + + if q_sqrt is not None: + q_sqrt_dims = q_sqrt.shape.ndims + if q_sqrt_dims == 2: + LTA = A * tf.expand_dims(tf.transpose(q_sqrt), 2) # [R, M, N] + elif q_sqrt_dims == 3: + L = tf.linalg.band_part(q_sqrt, -1, 0) # force lower triangle # [R, M, M] + L_shape = tf.shape(L) + L = tf.broadcast_to(L, tf.concat([leading_dims, L_shape], 0)) + + shape = tf.concat([leading_dims, [num_func, M, N]], axis=0) + A_tiled = tf.broadcast_to(tf.expand_dims(A, -3), shape) + LTA = tf.linalg.matmul(L, A_tiled, transpose_a=True) # [R, M, N] + else: # pragma: no cover + raise ValueError("Bad dimension for q_sqrt: %s" % str(q_sqrt.shape.ndims)) + + # NOTE -- very ineffective at the moment + if just_parametric: + + if full_cov: + fvar = tf.linalg.matmul(LTA, LTA, transpose_a=True) # [R, N, N] + else: + fvar = tf.reduce_sum(tf.square(LTA), -2) # [R, N] + + else: + + if full_cov: + fvar = fvar + tf.linalg.matmul(LTA, LTA, transpose_a=True) # [R, N, N] + else: + fvar = fvar + tf.reduce_sum(tf.square(LTA), -2) # [R, N] + + if not full_cov: + fvar = tf.linalg.adjoint(fvar) # [N, R] + + #################################################################### + + return fmean, fvar + + +def separate_independent_orthogonal_conditional_implementation( + Kmns: tf.Tensor, + Kmms: tf.Tensor, + Knns: tf.Tensor, + Cmns: tf.Tensor, + Cmms: tf.Tensor, + Cnns: tf.Tensor, + f_u: tf.Tensor, + f_v: tf.Tensor, + *, + full_cov: bool = False, + q_sqrt_u: Optional[tf.Tensor] = None, + q_sqrt_v: Optional[tf.Tensor] = None, + white: bool = False, + Lms: Optional[tf.Tensor] = None, +) -> MeanAndVariance: + """ + Multi-output GP with independent GP priors. + + Number of latent processes equals the number of outputs (L = P). + + Further reference: + + - See `gpflow.conditionals._conditional` for a detailed explanation of + conditional in the single-output case. + - See the multioutput notebook for more information about the multioutput framework. + - See above for the parameters and the return value. + """ + fs_u = tf.transpose(f_u)[:, :, None] # [P, M_u, 1] + # [P, 1, M_u, M_u] or [P, M_u, 1] + + fs_v = tf.transpose(f_v)[:, :, None] # [P, M_v, 1] + # [P, 1, M_v, M_v] or [P, M_v, 1] + + if q_sqrt_u is not None and q_sqrt_v is not None: + q_sqrts_u = ( + tf.transpose(q_sqrt_u)[:, :, None] + if q_sqrt_u.shape.ndims == 2 + else q_sqrt_u[:, None, :, :] + ) + q_sqrts_v = ( + tf.transpose(q_sqrt_v)[:, :, None] + if q_sqrt_v.shape.ndims == 2 + else q_sqrt_v[:, None, :, :] + ) + + base_conditional_args_to_map: Tuple[tf.Tensor, ...] = ( + Kmms, + Kmns, + Knns, + Cmms, + Cmns, + Cnns, + fs_u, + fs_v, + q_sqrts_u, + q_sqrts_v, + Lms, + ) + + def single_orthogonal_gp_conditional( + t: Tuple[tf.Tensor, ...] + ) -> MeanAndVariance: # pragma: no cover - tf.map_fn is invisible to codecov + Kmm, Kmn, Knn, Cmm, Cmn, Cnn, f_u, f_v, q_sqrt_u, q_sqrt_v, Lm = t + + return base_orthogonal_conditional( + Kmn, + Kmm, + Knn, + Cmn, + Cmm, + Cnn, + f_u, + f_v, + full_cov=full_cov, + q_sqrt_u=q_sqrt_u, + q_sqrt_v=q_sqrt_v, + white=white, + Lm=Lm, + ) + + else: + base_conditional_args_to_map = (Kmms, Kmns, Knns, Cmms, Cmns, Cnns, fs_u, fs_v, Lms) + + def single_orthogonal_gp_conditional( + t: Tuple[tf.Tensor, ...] + ) -> MeanAndVariance: # pragma: no cover - tf.map_fn is invisible to codecov + Kmm, Kmn, Knn, Cmm, Cmn, Cnn, f_u, f_v, Lm = t + return base_orthogonal_conditional( + Kmn, + Kmm, + Knn, + Cmn, + Cmm, + Cnn, + f_u, + f_v, + full_cov=full_cov, + q_sqrt_u=q_sqrt_u, + q_sqrt_v=q_sqrt_v, + white=white, + Lm=Lm, + ) + + rmu, rvar = tf.map_fn( + single_orthogonal_gp_conditional, + base_conditional_args_to_map, + (default_float(), default_float()), + ) # [P, N, 1], [P, 1, N, N] or [P, N, 1] + + fmu = rollaxis_left(tf.squeeze(rmu, axis=-1), 1) # [N, P] + + if full_cov: + fvar = tf.squeeze(rvar, axis=-3) # [..., 0, :, :] # [P, N, N] + else: + fvar = rollaxis_left(tf.squeeze(rvar, axis=-1), 1) # [N, P] + + return fmu, fvar diff --git a/gpflux/covariances/__init__.py b/gpflux/covariances/__init__.py new file mode 100644 index 00000000..5fbe730d --- /dev/null +++ b/gpflux/covariances/__init__.py @@ -0,0 +1,29 @@ +# +# Copyright (c) 2022 The GPflux Contributors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# flake8: noqa +from . import cvfs, cvvs, multioutput +from .dispatch import Cvf, Cvv + +""" +__all__ = [ + "dispatch", + "multioutput", + "Cvf", + "Cvv", + "cvfs", + "cvvs", +] +""" diff --git a/gpflux/covariances/cvfs.py b/gpflux/covariances/cvfs.py new file mode 100644 index 00000000..ea7f8056 --- /dev/null +++ b/gpflux/covariances/cvfs.py @@ -0,0 +1,39 @@ +from typing import Optional + +import tensorflow as tf + +from gpflow.base import TensorLike, TensorType +from gpflow.config import default_jitter +from gpflow.inducing_variables import InducingPoints +from gpflow.kernels import Kernel + +from gpflux.covariances.dispatch import Cvf + + +@Cvf.register(InducingPoints, InducingPoints, Kernel, TensorLike) +def Cvf_kernel_inducingpoints( + inducing_variable_u: InducingPoints, + inducing_variable_v: InducingPoints, + kernel: Kernel, + Xnew: TensorType, + *, + L_Kuu: Optional[tf.Tensor] = None, +) -> tf.Tensor: + + Kvf = kernel(inducing_variable_v.Z, Xnew) + + if L_Kuu is None: + Kuu = kernel(inducing_variable_u.Z) + jittermat = tf.eye(inducing_variable_u.num_inducing, dtype=Kuu.dtype) * default_jitter() + Kuu += jittermat + L_Kuu = tf.linalg.cholesky(Kuu) + + Kuv = kernel(inducing_variable_u.Z, inducing_variable_v.Z) + Kuf = kernel(inducing_variable_u.Z, Xnew) + + L_Kuu_inv_Kuv = tf.linalg.triangular_solve(L_Kuu, Kuv) + L_Kuu_inv_Kuf = tf.linalg.triangular_solve(L_Kuu, Kuf) + + Cvf = Kvf - tf.linalg.matmul(L_Kuu_inv_Kuv, L_Kuu_inv_Kuf, transpose_a=True) + + return Cvf diff --git a/gpflux/covariances/cvvs.py b/gpflux/covariances/cvvs.py new file mode 100644 index 00000000..060db5b6 --- /dev/null +++ b/gpflux/covariances/cvvs.py @@ -0,0 +1,52 @@ +# +# Copyright (c) 2022 The GPflux Contributors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from typing import Optional + +import tensorflow as tf + +from gpflow.config import default_jitter +from gpflow.inducing_variables import InducingPoints +from gpflow.kernels import Kernel + +from gpflux.covariances.dispatch import Cvv + + +@Cvv.register(InducingPoints, InducingPoints, Kernel) +def Cvv_kernel_inducingpoints( + inducing_variable_u: InducingPoints, + inducing_variable_v: InducingPoints, + kernel: Kernel, + *, + jitter: float = 0.0, + L_Kuu: Optional[tf.Tensor] = None, +) -> tf.Tensor: + + Kvv = kernel(inducing_variable_v.Z) + + if L_Kuu is None: + Kuu = kernel(inducing_variable_u.Z) + jittermat = tf.eye(inducing_variable_u.num_inducing, dtype=Kuu.dtype) * default_jitter() + Kuu += jittermat + L_Kuu = tf.linalg.cholesky(Kuu) + + Kuv = kernel(inducing_variable_u.Z, inducing_variable_v.Z) + + L_Kuu_inv_Kuv = tf.linalg.triangular_solve(L_Kuu, Kuv) + Cvv = Kvv - tf.linalg.matmul(L_Kuu_inv_Kuv, L_Kuu_inv_Kuv, transpose_a=True) + + Cvv += jitter * tf.eye(inducing_variable_v.num_inducing, dtype=Cvv.dtype) + + return Cvv diff --git a/gpflux/covariances/dispatch.py b/gpflux/covariances/dispatch.py new file mode 100644 index 00000000..2b85a407 --- /dev/null +++ b/gpflux/covariances/dispatch.py @@ -0,0 +1,4 @@ +from gpflow.utilities import Dispatcher + +Cvv = Dispatcher("Cvv") +Cvf = Dispatcher("Cvf") diff --git a/gpflux/covariances/multioutput/__init__.py b/gpflux/covariances/multioutput/__init__.py new file mode 100644 index 00000000..30193266 --- /dev/null +++ b/gpflux/covariances/multioutput/__init__.py @@ -0,0 +1,18 @@ +# +# Copyright (c) 2022 The GPflux Contributors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from . import cvfs, cvvs + +__all__ = ["cvfs", "cvvs"] diff --git a/gpflux/covariances/multioutput/cvfs.py b/gpflux/covariances/multioutput/cvfs.py new file mode 100644 index 00000000..5ba597f0 --- /dev/null +++ b/gpflux/covariances/multioutput/cvfs.py @@ -0,0 +1,211 @@ +# +# Copyright (c) 2022 The GPflux Contributors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from typing import Optional + +import tensorflow as tf + +from gpflow.base import TensorType +from gpflow.experimental.check_shapes import check_shapes +from gpflow.inducing_variables import ( + InducingPoints, + SeparateIndependentInducingVariables, + SharedIndependentInducingVariables, +) +from gpflow.kernels import MultioutputKernel, SeparateIndependent, SharedIndependent + +from gpflux.covariances.dispatch import Cvf + + +@Cvf.register(InducingPoints, InducingPoints, MultioutputKernel, object) +@check_shapes( + "inducing_variable_u: [M_u, D, 1]", + "inducing_variable_v: [M_v, D, 1]", + "Xnew: [batch..., N, D]", + "return: [M_v, P, batch..., N, P]", +) +def Cvf_generic( + inducing_variable_u: InducingPoints, + inducing_variable_v: InducingPoints, + kernel: MultioutputKernel, + Xnew: TensorType, + *, + L_Kuu: Optional[tf.Tensor] = None, +) -> tf.Tensor: + + return Cvf( + inducing_variable_u, + inducing_variable_v, + kernel, + Xnew, + L_Kuu=L_Kuu, + ) # [M, N] + + +@Cvf.register( + SharedIndependentInducingVariables, + SharedIndependentInducingVariables, + SharedIndependent, + object, +) +@check_shapes( + "inducing_variable_u: [M_u, D, P]", + "inducing_variable_v: [M_v, D, P]", + "Xnew: [batch..., N, D]", + "return: [M_v, batch..., N]", +) +def Cvf_shared_shared( + inducing_variable_u: SharedIndependentInducingVariables, + inducing_variable_v: SharedIndependentInducingVariables, + kernel: SharedIndependent, + Xnew: tf.Tensor, + *, + L_Kuu: Optional[tf.Tensor] = None, +) -> tf.Tensor: + + return Cvf( + inducing_variable_u.inducing_variable, + inducing_variable_v.inducing_variable, + kernel.kernel, + Xnew, + L_Kuu=tf.unstack(L_Kuu, axis=0), + ) # [M_v, N] + + +@Cvf.register( + SeparateIndependentInducingVariables, + SeparateIndependentInducingVariables, + SharedIndependent, + object, +) +@check_shapes( + "inducing_variable_u: [M_u, D, P]", + "inducing_variable_v: [M_v, D, P]", + "Xnew: [batch..., N, D]", + "return: [L, M_v, batch..., N]", +) +def Cvf_separate_shared( + inducing_variable_u: SeparateIndependentInducingVariables, + inducing_variable_v: SeparateIndependentInducingVariables, + kernel: SharedIndependent, + Xnew: TensorType, + *, + L_Kuu: Optional[tf.Tensor] = None, +) -> tf.Tensor: + + return tf.stack( + [ + Cvf( + ind_var_u, + ind_var_v, + kernel.kernel, + Xnew, + L_Kuu=l_kuu, + ) + for ind_var_u, ind_var_v, l_kuu in zip( + inducing_variable_u.inducing_variable_list, + inducing_variable_v.inducing_variable_list, + tf.unstack(L_Kuu, axis=0), + ) + ], + axis=0, + ) + + +@Cvf.register( + SharedIndependentInducingVariables, + SharedIndependentInducingVariables, + SeparateIndependent, + object, +) +@check_shapes( + "inducing_variable_u: [M_u, D, P]", + "inducing_variable_v: [M_v, D, P]", + "Xnew: [batch..., N, D]", + "return: [L, M_v, batch..., N]", +) +def Cvf_shared_separate( + inducing_variable_u: SharedIndependentInducingVariables, + inducing_variable_v: SharedIndependentInducingVariables, + kernel: SeparateIndependent, + Xnew: TensorType, + *, + L_Kuu: Optional[tf.Tensor] = None, +) -> tf.Tensor: + + return tf.stack( + [ + Cvf( + inducing_variable_u.inducing_variable, + inducing_variable_v.inducing_variable, + k, + Xnew, + L_Kuu=l_kuu, + ) + for k, l_kuu in zip(kernel.kernels, tf.unstack(L_Kuu, axis=0)) + ], + axis=0, + ) + + +@Cvf.register( + SeparateIndependentInducingVariables, + SeparateIndependentInducingVariables, + SeparateIndependent, + object, +) +@check_shapes( + "inducing_variable_u: [M_u, D, P]", + "inducing_variable_v: [M_v, D, P]", + "Xnew: [batch..., N, D]", + "return: [L, M_v, batch..., N]", +) +def Cvf_separate_separate( + inducing_variable_u: SeparateIndependentInducingVariables, + inducing_variable_v: SeparateIndependentInducingVariables, + kernel: SeparateIndependent, + Xnew: TensorType, + *, + L_Kuu: Optional[tf.Tensor] = None, +) -> tf.Tensor: + n_iv_u = len(inducing_variable_u.inducing_variable_list) + n_iv_v = len(inducing_variable_v.inducing_variable_list) + n_k = len(kernel.kernels) + assert ( + n_iv_u == n_k + ), f"Must have same number of inducing variables and kernels. Found {n_iv_u} and {n_k}." + + assert ( + n_iv_v == n_k + ), f"Must have same number of inducing variables and kernels. Found {n_iv_v} and {n_k}." + + return tf.stack( + [ + Cvf( + ind_var_u, + ind_var_v, + k, + Xnew, + L_Kuu=l_kuu, + ) + for k, ind_var_u, ind_var_v, l_kuu in zip( + kernel.kernels, + inducing_variable_u.inducing_variable_list, + inducing_variable_v.inducing_variable_list, + tf.unstack(L_Kuu, axis=0), + ) + ], + axis=0, + ) diff --git a/gpflux/covariances/multioutput/cvvs.py b/gpflux/covariances/multioutput/cvvs.py new file mode 100644 index 00000000..4bd96b4d --- /dev/null +++ b/gpflux/covariances/multioutput/cvvs.py @@ -0,0 +1,213 @@ +# +# Copyright (c) 2022 The GPflux Contributors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from typing import Optional, Union + +import tensorflow as tf + +from gpflow.experimental.check_shapes import check_shapes +from gpflow.inducing_variables import ( + FallbackSeparateIndependentInducingVariables, + FallbackSharedIndependentInducingVariables, + InducingPoints, +) +from gpflow.kernels import ( + IndependentLatent, + MultioutputKernel, + SeparateIndependent, + SharedIndependent, +) + +from gpflux.covariances.dispatch import Cvv + + +@Cvv.register(InducingPoints, InducingPoints, MultioutputKernel) +@check_shapes( + "inducing_variable_u: [M_u, D, 1]", + "inducing_variable_v: [M_v, D, 1]", + "return: [M_v, P, M_v, P]", +) +def Kuu_generic( + inducing_variable_u: InducingPoints, + inducing_variable_v: InducingPoints, + kernel: MultioutputKernel, + *, + jitter: float = 0.0, + L_Kuu: Optional[tf.Tensor] = None, +) -> tf.Tensor: + + _Cvv = Cvv( + inducing_variable_u, + inducing_variable_v, + kernel, + L_Kuu=L_Kuu, + ) # [M, M] + jittermat = tf.eye(inducing_variable_v.num_inducing, dtype=_Cvv.dtype) * jitter + return _Cvv + jittermat + + +@Cvv.register( + FallbackSharedIndependentInducingVariables, + FallbackSharedIndependentInducingVariables, + SharedIndependent, +) +def Cvv_shared_shared( + inducing_variable_u: FallbackSharedIndependentInducingVariables, + inducing_variable_v: FallbackSharedIndependentInducingVariables, + kernel: SharedIndependent, + *, + jitter: float = 0.0, + L_Kuu: Optional[tf.Tensor] = None, +) -> tf.Tensor: + _Cvv = Cvv( + inducing_variable_u.inducing_variable, + inducing_variable_v.inducing_variable, + kernel.kernel, + L_Kuu=L_Kuu, + ) # [M, M] + jittermat = tf.eye(inducing_variable_v.num_inducing, dtype=_Cvv.dtype) * jitter + + return _Cvv + jittermat + + +@Cvv.register(FallbackSharedIndependentInducingVariables, SeparateIndependent) +@check_shapes( + "inducing_variable_u: [M_u, D, P]", + "inducing_variable_v: [M_v, D, P]", + "return: [L, M_v, M_v]", +) +def Cvv_fallback_shared( + inducing_variable_u: FallbackSharedIndependentInducingVariables, + inducing_variable_v: FallbackSharedIndependentInducingVariables, + kernel: Union[SeparateIndependent, IndependentLatent], + *, + jitter: float = 0.0, + L_Kuu: Optional[tf.Tensor] = None, +) -> tf.Tensor: + + _Cvv = tf.stack( + [ + Cvv( + inducing_variable_u.inducing_variable, + inducing_variable_v.inducing_variable, + kernel.kernel, + L_Kuu=l_kuu, + ) + for k, l_kuu in zip(kernel.kernels, tf.unstack(L_Kuu, axis=0)) + ], + axis=0, + ) + + jittermat = tf.eye(inducing_variable_v.num_inducing, dtype=Cvv.dtype)[None, :, :] * jitter + + return _Cvv + jittermat + + +@Cvv.register( + FallbackSeparateIndependentInducingVariables, + FallbackSeparateIndependentInducingVariables, + SharedIndependent, +) +@check_shapes( + "inducing_variable_u: [M_u, D, P]", + "inducing_variable_v: [M_v, D, P]", + "return: [L, M_v, M_v]", +) +def Kuu_fallback_separate_shared( + inducing_variable_u: FallbackSeparateIndependentInducingVariables, + inducing_variable_v: FallbackSeparateIndependentInducingVariables, + kernel: SharedIndependent, + *, + jitter: float = 0.0, + L_Kuu: Optional[tf.Tensor] = None, +) -> tf.Tensor: + + _Cvv = tf.stack( + [ + Cvv(ind_var_u, ind_var_v, kernel.kernel, L_Kuu=l_kuu) + for ind_var_u, ind_var_v, l_kuu in zip( + inducing_variable_u.inducing_variable_list, + inducing_variable_v.inducing_variable_list, + tf.unstack(L_Kuu, axis=0), + ) + ], + axis=0, + ) + + jittermat = tf.eye(inducing_variable_v.num_inducing, dtype=Cvv.dtype)[None, :, :] * jitter + + return _Cvv + jittermat + + +@Cvv.register( + FallbackSeparateIndependentInducingVariables, + FallbackSeparateIndependentInducingVariables, + SeparateIndependent, +) +@check_shapes( + "inducing_variable_u: [M_u, D, P]", + "inducing_variable_v: [M_v, D, P]", + "return: [L, M_v, M_v]", +) +def Kuu_fallback_separate( + inducing_variable_u: FallbackSeparateIndependentInducingVariables, + inducing_variable_v: FallbackSeparateIndependentInducingVariables, + kernel: SeparateIndependent, + *, + jitter: float = 0.0, + L_Kuu: Optional[tf.Tensor] = None, +) -> tf.Tensor: + n_iv_u = len(inducing_variable_u.inducing_variable_list) + n_iv_v = len(inducing_variable_v.inducing_variable_list) + n_k = len(kernel.kernels) + assert ( + n_iv_u == n_k + ), f"Must have same number of inducing variables and kernels. Found {n_iv_u} and {n_k}." + + assert ( + n_iv_v == n_k + ), f"Must have same number of inducing variables and kernels. Found {n_iv_v} and {n_k}." + + lista = [] + for ind_var_u, ind_var_v, l_kuu, k in zip( + inducing_variable_u.inducing_variable_list, + inducing_variable_v.inducing_variable_list, + tf.unstack(L_Kuu, axis=0), + kernel.kernels, + ): + + lista.append(Cvv(ind_var_u, ind_var_v, k, L_Kuu=l_kuu)) + + """ + lista = [ +>>>>>>> 53dae0f597e5aa12eea4bc13839a3c94807acd36 + Cvv(ind_var_u, ind_var_v, k, L_Kuu=l_kuu) + for ind_var_u, ind_var_v, l_kuu, k in zip( + inducing_variable_u.inducing_variable_list, + inducing_variable_v.inducing_variable_list, + L_Kuu, + kernel.kernels, + ) + ] + """ + + _Cvv = tf.stack( + lista, + axis=0, + ) + + jittermat = tf.eye(inducing_variable_v.num_inducing, dtype=_Cvv.dtype)[None, :, :] * jitter + + return _Cvv + jittermat diff --git a/gpflux/helpers.py b/gpflux/helpers.py index 45bfc6b6..585c53fa 100644 --- a/gpflux/helpers.py +++ b/gpflux/helpers.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2021 The GPflux Contributors. +# Copyright (c) 2022 The GPflux Contributors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,12 +13,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # -r""" +""" This module contains helper functions for constructing :class:`~gpflow.kernels.MultioutputKernel`, :class:`~gpflow.inducing_variables.MultioutputInducingVariables`, :class:`~gpflow.mean_functions.MeanFunction`, and :class:`~gpflux.layers.GPLayer` objects. """ - import inspect import warnings from dataclasses import fields @@ -33,7 +32,8 @@ SeparateIndependentInducingVariables, SharedIndependentInducingVariables, ) -from gpflow.kernels import SeparateIndependent, SharedIndependent +from gpflow.kernels import SeparateIndependent, SharedIndependent, SquaredExponential, Stationary +from gpflow.mean_functions import Identity, Linear, MeanFunction, Zero from gpflow.utilities import deepcopy from gpflux.layers.gp_layer import GPLayer @@ -64,8 +64,10 @@ def construct_basic_kernel( the different outputs, but the kernel can have different hyperparameter values for each. """ if isinstance(kernels, list): - mo_kern = SeparateIndependent(kernels) - elif not share_hyperparams: + return SeparateIndependent(kernels) + + assert output_dim, "With single kernel, you must specify the number of outputs" + if not share_hyperparams: copies = [deepcopy(kernels) for _ in range(output_dim)] mo_kern = SeparateIndependent(copies) else: @@ -125,7 +127,7 @@ def construct_basic_inducing_variables( z_init_is_given = z_init is not None - if isinstance(num_inducing, list): + if isinstance(num_inducing, List): if output_dim is not None: # TODO: the following assert may clash with MixedMultiOutputFeatures # where the number of independent GPs can differ from the output @@ -136,18 +138,25 @@ def construct_basic_inducing_variables( inducing_variables = [] for i, num_ind_var in enumerate(num_inducing): if z_init_is_given: + assert z_init is not None assert len(z_init[i]) == num_ind_var z_init_i = z_init[i] else: - z_init_i = np.random.randn(num_ind_var, input_dim).astype(dtype=default_float()) + z_init_i = np.random.uniform( + low=-0.5, high=0.5, size=(num_ind_var, input_dim) + ).astype(dtype=default_float()) assert z_init_i.shape == (num_ind_var, input_dim) inducing_variables.append(InducingPoints(z_init_i)) + return SeparateIndependentInducingVariables(inducing_variables) - elif not share_variables: + if not share_variables: + assert output_dim, "When num_inducing is a number, the number of outputs must be given" + inducing_variables = [] for o in range(output_dim): if z_init_is_given: + assert z_init is not None if z_init.shape != (output_dim, num_inducing, input_dim): raise ValueError( "When not sharing variables, z_init must have shape" @@ -155,28 +164,28 @@ def construct_basic_inducing_variables( ) z_init_o = z_init[o] else: - z_init_o = np.random.randn(num_inducing, input_dim).astype(dtype=default_float()) + z_init_o = np.random.uniform(-0.5, 0.5, (num_inducing, input_dim)).astype( + dtype=default_float() + ) inducing_variables.append(InducingPoints(z_init_o)) + return SeparateIndependentInducingVariables(inducing_variables) - else: - # TODO: should we assert output_dim is None ? + # Share the same inducing variables across the outputs + z_init = ( + z_init + if z_init_is_given + else np.random.uniform(-0.5, 0.5, (num_inducing, input_dim)).astype(dtype=default_float()) + ) + shared_ip = InducingPoints(z_init) - z_init = ( - z_init - if z_init_is_given - else np.random.randn(num_inducing, input_dim).astype(dtype=default_float()) - ) - shared_ip = InducingPoints(z_init) - return SharedIndependentInducingVariables(shared_ip) + return SharedIndependentInducingVariables(shared_ip) -def construct_mean_function( - X: np.ndarray, D_in: int, D_out: int -) -> gpflow.mean_functions.MeanFunction: +def construct_mean_function(X: np.ndarray, D_out: int) -> MeanFunction: """ Return :class:`gpflow.mean_functions.Identity` when ``D_in`` and ``D_out`` are - equal. Otherwise, use the principal components of the inputs matrix ``X`` to build a + equal. Otherwise, use the principal components of the input matrix ``X`` to build a :class:`~gpflow.mean_functions.Linear` mean function. .. note:: @@ -186,14 +195,15 @@ def construct_mean_function( :param X: A data array with the shape ``[N, D_in]`` used to determine the principal components to use to create a :class:`~gpflow.mean_functions.Linear` mean function when ``D_in != D_out``. - :param D_in: The dimensionality of the input data (or features) ``X``. - Typically, this corresponds to ``X.shape[-1]``. :param D_out: The dimensionality of the outputs (or targets) ``Y``. Typically, this corresponds to ``Y.shape[-1]`` or the number of latent GPs in the layer. + :return: a GPflow mean function """ + + D_in = X.shape[-1] assert X.shape[-1] == D_in if D_in == D_out: - mean_function = gpflow.mean_functions.Identity() + mean_function = Identity() else: if D_in > D_out: _, _, V = np.linalg.svd(X, full_matrices=False) @@ -202,7 +212,7 @@ def construct_mean_function( W = np.concatenate([np.eye(D_in), np.zeros((D_in, D_out - D_in))], axis=1) assert W.shape == (D_in, D_out) - mean_function = gpflow.mean_functions.Linear(W) + mean_function = Linear(W) gpflow.set_trainable(mean_function, False) return mean_function @@ -213,7 +223,7 @@ def construct_gp_layer( num_inducing: int, input_dim: int, output_dim: int, - kernel_class: Type[gpflow.kernels.Stationary] = gpflow.kernels.SquaredExponential, + kernel_class: Type[Stationary] = SquaredExponential, z_init: Optional[np.ndarray] = None, name: Optional[str] = None, ) -> GPLayer: @@ -252,7 +262,7 @@ def construct_gp_layer( kernel=kernel, inducing_variable=inducing_variable, num_data=num_data, - mean_function=gpflow.mean_functions.Zero(), + mean_function=Zero(), name=name, ) return gp_layer diff --git a/gpflux/layers/__init__.py b/gpflux/layers/__init__.py index 34f7e073..245a05eb 100644 --- a/gpflux/layers/__init__.py +++ b/gpflux/layers/__init__.py @@ -21,4 +21,5 @@ from gpflux.layers.gp_layer import GPLayer from gpflux.layers.latent_variable_layer import LatentVariableLayer, LayerWithObservations from gpflux.layers.likelihood_layer import LikelihoodLayer +from gpflux.layers.orth_gp_layer import OrthGPLayer from gpflux.layers.trackable_layer import TrackableLayer diff --git a/gpflux/layers/gp_layer.py b/gpflux/layers/gp_layer.py index a9324555..c4297664 100644 --- a/gpflux/layers/gp_layer.py +++ b/gpflux/layers/gp_layer.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2021 The GPflux Contributors. +# Copyright (c) 2022 The GPflux Contributors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -27,13 +27,13 @@ from gpflow import Parameter, default_float from gpflow.base import TensorType -from gpflow.conditionals import conditional from gpflow.inducing_variables import MultioutputInducingVariables from gpflow.kernels import MultioutputKernel from gpflow.kullback_leiblers import prior_kl from gpflow.mean_functions import Identity, MeanFunction from gpflow.utilities.bijectors import triangular +from gpflux.conditionals import conditional from gpflux.exceptions import GPLayerIncompatibilityException from gpflux.math import _cholesky_with_jitter from gpflux.runtime_checks import verify_compatibility diff --git a/gpflux/layers/orth_gp_layer.py b/gpflux/layers/orth_gp_layer.py new file mode 100644 index 00000000..7a46d5fb --- /dev/null +++ b/gpflux/layers/orth_gp_layer.py @@ -0,0 +1,263 @@ +# Copyright (c) 2022 The GPflux Contributors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""This module provides the implementation of the sparse orthogonal variational GP layer""" +from typing import Optional, Tuple + +import numpy as np +import tensorflow as tf + +from gpflow import Parameter, default_float +from gpflow.base import TensorType +from gpflow.conditionals import conditional +from gpflow.functions import MeanFunction +from gpflow.inducing_variables import MultioutputInducingVariables +from gpflow.kernels import MultioutputKernel +from gpflow.kullback_leiblers import prior_kl +from gpflow.utilities import triangular + +from gpflux.layers import GPLayer + + +class OrthGPLayer(GPLayer): + """ + A sparse orthogonal variational multioutput GP layer. This layer holds the kernel, + inducing variables and variational distribution, and mean function. + """ + + q_mu_u: Parameter + r""" + The mean of ``q(v)`` or ``q(u)`` (depending on whether :attr:`whiten`\ ed + parametrisation is used). + """ + + q_mu_v: Parameter + r""" + The mean of ``q(v)`` or ``q(u)`` (depending on whether :attr:`whiten`\ ed + parametrisation is used). + """ + + q_sqrt_u: Parameter + r""" + The lower-triangular Cholesky factor of the covariance of ``q(v)`` or ``q(u)`` + (depending on whether :attr:`whiten`\ ed parametrisation is used). + """ + + q_sqrt_v: Parameter + r""" + The lower-triangular Cholesky factor of the covariance of ``q(v)`` or ``q(u)`` + (depending on whether :attr:`whiten`\ ed parametrisation is used). + """ + + def __init__( + self, + kernel: MultioutputKernel, + inducing_variable_u: MultioutputInducingVariables, + inducing_variable_v: MultioutputInducingVariables, + num_data: int, + mean_function: Optional[MeanFunction] = None, + *, + num_samples: Optional[int] = None, + full_cov: bool = False, + full_output_cov: bool = False, + num_latent_gps: int = None, + whiten: bool = True, + name: Optional[str] = None, + verbose: bool = True, + ): + """ + :param kernel: The multioutput kernel for this layer. + :param inducing_variable: The inducing features for this layer. + :param num_data: The number of points in the training dataset (see :attr:`num_data`). + :param mean_function: The mean function that will be applied to the + inputs. Default: :class:`~gpflow.mean_functions.Identity`. + + .. note:: The Identity mean function requires the input and output + dimensionality of this layer to be the same. If you want to + change the dimensionality in a layer, you may want to provide a + :class:`~gpflow.mean_functions.Linear` mean function instead. + + :param num_samples: The number of samples to draw when converting the + :class:`~tfp.layers.DistributionLambda` into a `tf.Tensor`, see + :meth:`_convert_to_tensor_fn`. Will be stored in the + :attr:`num_samples` attribute. If `None` (the default), draw a + single sample without prefixing the sample shape (see + :class:`tfp.distributions.Distribution`'s `sample() + `_ + method). + :param full_cov: Sets default behaviour of calling this layer + (:attr:`full_cov` attribute): + If `False` (the default), only predict marginals (diagonal + of covariance) with respect to inputs. + If `True`, predict full covariance over inputs. + :param full_output_cov: Sets default behaviour of calling this layer + (:attr:`full_output_cov` attribute): + If `False` (the default), only predict marginals (diagonal + of covariance) with respect to outputs. + If `True`, predict full covariance over outputs. + :param num_latent_gps: The number of (latent) GPs in the layer + (which can be different from the number of outputs, e.g. with a + :class:`~gpflow.kernels.LinearCoregionalization` kernel). + This is used to determine the size of the + variational parameters :attr:`q_mu` and :attr:`q_sqrt`. + If possible, it is inferred from the *kernel* and *inducing_variable*. + :param whiten: If `True` (the default), uses the whitened parameterisation + of the inducing variables; see :attr:`whiten`. + :param name: The name of this layer. + :param verbose: The verbosity mode. Set this parameter to `True` + to show debug information. + """ + + super().__init__( + kernel=kernel, + inducing_variable=inducing_variable_u, + num_data=num_data, + mean_function=mean_function, + num_samples=num_samples, + full_cov=full_cov, + full_output_cov=full_output_cov, + num_latent_gps=num_latent_gps, + whiten=whiten, + name=name, + verbose=verbose, + ) + + self.inducing_variable_u = inducing_variable_u + self.inducing_variable_v = inducing_variable_v + + """ + try: + num_inducing, self.num_latent_gps = verify_compatibility( + kernel, mean_function, inducing_variable + ) + # TODO: if num_latent_gps is not None, verify it is equal to self.num_latent_gps + except GPLayerIncompatibilityException as e: + if num_latent_gps is None: + raise e + + if verbose: + warnings.warn( + "Could not verify the compatibility of the `kernel`, `inducing_variable` " + "and `mean_function`. We advise using `gpflux.helpers.construct_*` to create " + "compatible kernels and inducing variables. As " + f"`num_latent_gps={num_latent_gps}` has been specified explicitly, this will " + "be used to create the `q_mu` and `q_sqrt` parameters." + ) + + num_inducing, self.num_latent_gps = ( + inducing_variable.num_inducing, + num_latent_gps, + ) + """ + num_inducing_u = self.inducing_variable_u.num_inducing + num_inducing_v = self.inducing_variable_v.num_inducing + + ######################################################## + # Introduce variational parameters for q(U) # + ######################################################## + + self.q_mu_u = Parameter( + np.random.uniform( + -0.5, 0.5, (num_inducing_u, self.num_latent_gps) + ), # np.zeros((num_inducing, self.num_latent_gps)), + dtype=default_float(), + name=f"{self.name}_q_mu_u" if self.name else "q_mu_u", + ) # [num_inducing, num_latent_gps] + + self.q_sqrt_u = Parameter( + np.stack([np.eye(num_inducing_u) for _ in range(self.num_latent_gps)]), + transform=triangular(), + dtype=default_float(), + name=f"{self.name}_q_sqrt_u" if self.name else "q_sqrt_u", + ) # [num_latent_gps, num_inducing, num_inducing] + + ######################################################## + # Introduce variational parameters for q(V) # + ######################################################## + + self.q_mu_v = Parameter( + np.random.uniform( + -0.5, 0.5, (num_inducing_v, self.num_latent_gps) + ), # np.zeros((num_inducing, self.num_latent_gps)), + dtype=default_float(), + name=f"{self.name}_q_mu_v" if self.name else "q_mu_v", + ) # [num_inducing, num_latent_gps] + + self.q_sqrt_v = Parameter( + np.stack([np.eye(num_inducing_v) for _ in range(self.num_latent_gps)]), + transform=triangular(), + dtype=default_float(), + name=f"{self.name}_q_sqrt_v" if self.name else "q_sqrt_v", + ) # [num_latent_gps, num_inducing, num_inducing] + + self.num_samples = num_samples + + def predict( + self, + inputs: TensorType, + *, + full_cov: bool = False, + full_output_cov: bool = False, + ) -> Tuple[tf.Tensor, tf.Tensor]: + """ + Make a prediction at N test inputs for the Q outputs of this layer, + including the mean function contribution. + + The covariance and its shape is determined by *full_cov* and *full_output_cov* as follows: + + +--------------------+---------------------------+--------------------------+ + | (co)variance shape | ``full_output_cov=False`` | ``full_output_cov=True`` | + +--------------------+---------------------------+--------------------------+ + | ``full_cov=False`` | [N, Q] | [N, Q, Q] | + +--------------------+---------------------------+--------------------------+ + | ``full_cov=True`` | [Q, N, N] | [N, Q, N, Q] | + +--------------------+---------------------------+--------------------------+ + + :param inputs: The inputs to predict at, with a shape of [N, D], where D is + the input dimensionality of this layer. + :param full_cov: Whether to return full covariance (if `True`) or + marginal variance (if `False`, the default) w.r.t. inputs. + :param full_output_cov: Whether to return full covariance (if `True`) + or marginal variance (if `False`, the default) w.r.t. outputs. + + :returns: posterior mean (shape [N, Q]) and (co)variance (shape as above) at test points + """ + mean_function = self.mean_function(inputs) + mean_cond, cov = conditional( + inputs, + self.inducing_variable_u, + self.inducing_variable_v, + self.kernel, + self.q_mu_u, + self.q_mu_v, + q_sqrt_u=self.q_sqrt_u, + q_sqrt_v=self.q_sqrt_v, + full_cov=full_cov, + full_output_cov=full_output_cov, + white=self.whiten, + ) + + return mean_cond + mean_function, cov + + def prior_kl(self) -> tf.Tensor: + r""" + Returns the KL divergence ``KL[q(u)∥p(u)]`` from the prior ``p(u)`` to + the variational distribution ``q(u)``. If this layer uses the + :attr:`whiten`\ ed representation, returns ``KL[q(v)∥p(v)]``. + """ + return prior_kl( + self.inducing_variable_u, self.kernel, self.q_mu_u, self.q_sqrt_u, whiten=self.whiten + ) + prior_kl( + self.inducing_variable_v, self.kernel, self.q_mu_v, self.q_sqrt_v, whiten=self.whiten + ) diff --git a/gpflux/models/__init__.py b/gpflux/models/__init__.py index 479c4c57..d2541f88 100644 --- a/gpflux/models/__init__.py +++ b/gpflux/models/__init__.py @@ -16,4 +16,4 @@ """ Base model classes implemented in GPflux """ -from gpflux.models.deep_gp import DeepGP +from gpflux.models.deep_gp import DeepGP, OrthDeepGP diff --git a/gpflux/models/deep_gp.py b/gpflux/models/deep_gp.py index 529e2ca7..0ebbb118 100644 --- a/gpflux/models/deep_gp.py +++ b/gpflux/models/deep_gp.py @@ -179,6 +179,26 @@ def _evaluate_deep_gp( features = layer(features, training=training) return features + def _evaluate_layer_wise_deep_gp( + self, + inputs: TensorType, + *, + training: Optional[bool] = False, + ) -> tf.Tensor: + """ + Evaluate ``f(x) = fₙ(⋯ (f₂(f₁(x))))`` on the *inputs* argument. + """ + features = inputs + hidden_layers = [] + + for count, layer in enumerate(self.f_layers): + + features = layer(features, training=training) + moments = features.mean(), features.variance() + hidden_layers.append(moments) + + return hidden_layers + def _evaluate_likelihood( self, f_outputs: TensorType, @@ -291,6 +311,12 @@ def as_prediction_model( return model_class(self.inputs, outputs) +class OrthDeepGP(DeepGP): + """ + Orthogonal Deep Gaussian Processes + """ + + def sample_dgp(model: DeepGP) -> Sample: # TODO: should this be part of a [Vanilla]DeepGP class? function_draws = [layer.sample() for layer in model.f_layers] # TODO: error check that all layers implement .sample()? diff --git a/gpflux/posteriors.py b/gpflux/posteriors.py new file mode 100644 index 00000000..5b7f241b --- /dev/null +++ b/gpflux/posteriors.py @@ -0,0 +1,459 @@ +# +# Copyright (c) 2022 The GPflux Contributors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from typing import Optional, Tuple, Type, Union + +import tensorflow as tf + +from gpflow import kernels +from gpflow.base import MeanAndVariance, TensorType +from gpflow.conditionals.util import expand_independent_outputs +from gpflow.config import default_jitter +from gpflow.covariances import Kuf, Kuu +from gpflow.experimental.check_shapes import check_shapes +from gpflow.inducing_variables import ( + InducingVariables, + SeparateIndependentInducingVariables, + SharedIndependentInducingVariables, +) +from gpflow.kernels import Kernel, SeparateIndependent, SharedIndependent +from gpflow.mean_functions import MeanFunction +from gpflow.posteriors import ( + AbstractPosterior, + PrecomputedValue, + _DeltaDist, + _DiagNormal, + _MvNormal, + get_posterior_class, +) + +from gpflux.conditionals.util import ( # expand_independent_outputs, duplicate of gpflow? + base_orthogonal_conditional, + separate_independent_orthogonal_conditional_implementation, +) +from gpflux.covariances import Cvf, Cvv + +""" +#NOTE -- I don't think we need this +def create_posterior( + kernel: Kernel, + inducing_variable: InducingVariables, + q_mu: TensorType, + q_sqrt: TensorType, + whiten: bool, + mean_function: Optional[MeanFunction] = None, + precompute_cache: Union[PrecomputeCacheType, str, None] = PrecomputeCacheType.TENSOR, +) -> BasePosterior: + posterior_class = get_posterior_class(kernel, inducing_variable) + precompute_cache = _validate_precompute_cache_type(precompute_cache) + return posterior_class( # type: ignore + kernel, + inducing_variable, + q_mu, + q_sqrt, + whiten, + mean_function, + precompute_cache=precompute_cache, + ) +""" + + +class AbstractOrthogonalPosterior(AbstractPosterior): + def __init__( + self, + kernel: Kernel, + inducing_variable_u: Union[tf.Tensor, InducingVariables], + inducing_variable_v: Union[tf.Tensor, InducingVariables], + cache: Optional[Tuple[tf.Tensor, ...]] = None, + mean_function: Optional[MeanFunction] = None, + ) -> None: + """ + TODO -- add documentation here + """ + super().__init__(kernel, inducing_variable_u, mean_function=mean_function) + + self.kernel = kernel + self.inducing_variable_u = inducing_variable_u + self.inducing_variable_v = inducing_variable_v + self.cache = cache + self.mean_function = mean_function + + +class BaseOrthogonalPosterior(AbstractOrthogonalPosterior): + # TODO -- introduce a suitable check_shapes here + # @check_shapes( + # "inducing_variable_u: [M, D, broadcast P]", + # "q_mu_u: [N, P]", + # "q_sqrt_U: [N_P_or_P_N_N...]", + # ) + def __init__( + self, + kernel: Kernel, + inducing_variable_u: InducingVariables, + inducing_variable_v: InducingVariables, + q_mu_u: tf.Tensor, + q_mu_v: tf.Tensor, + q_sqrt_u: tf.Tensor, + q_sqrt_v: tf.Tensor, + whiten: bool = True, + mean_function: Optional[MeanFunction] = None, + ): + + super().__init__( + kernel, inducing_variable_u, inducing_variable_v, mean_function=mean_function + ) + self.whiten = whiten + self._set_qdist(q_mu_u, q_sqrt_u, q_mu_v, q_sqrt_v) + + @property + def q_mu_u(self) -> tf.Tensor: + return self._q_dist_u.q_mu + + @property + def q_mu_v(self) -> tf.Tensor: + return self._q_dist_v.q_mu + + @property + def q_sqrt_u(self) -> tf.Tensor: + return self._q_dist_u.q_sqrt + + @property + def q_sqrt_v(self) -> tf.Tensor: + return self._q_dist_v.q_sqrt + + def _set_qdist( + self, q_mu_u: TensorType, q_sqrt_u: TensorType, q_mu_v: TensorType, q_sqrt_v: TensorType + ) -> None: + + if q_sqrt_u is None: + self._q_dist_u = _DeltaDist(q_mu_u) + elif len(q_sqrt_u.shape) == 2: # q_diag + self._q_dist_u = _DiagNormal(q_mu_u, q_sqrt_u) + else: + self._q_dist_u = _MvNormal(q_mu_u, q_sqrt_u) + + if q_sqrt_v is None: + self._q_dist_v = _DeltaDist(q_mu_v) + elif len(q_sqrt_v.shape) == 2: # q_diag + self._q_dist_v = _DiagNormal(q_mu_v, q_sqrt_v) + else: + self._q_dist_v = _MvNormal(q_mu_v, q_sqrt_v) + + def _precompute(self) -> Tuple[PrecomputedValue, ...]: + """ + #TODO -- needs to be implemented + """ + raise NotImplementedError + + +class IndependentOrthogonalPosterior(BaseOrthogonalPosterior): + @check_shapes( + "mean: [batch..., N, P]", + "cov: [batch..., P, N, N] if full_cov", + "cov: [batch..., N, P] if not full_cov", + "return[0]: [batch..., N, P]", + "return[1]: [batch..., N, P, N, P] if full_cov and full_output_cov", + "return[1]: [batch..., N, P, P] if (not full_cov) and full_output_cov", + "return[1]: [batch..., P, N, N] if full_cov and (not full_output_cov)", + "return[1]: [batch..., N, P] if (not full_cov) and (not full_output_cov)", + ) + def _post_process_mean_and_cov( + self, mean: TensorType, cov: TensorType, full_cov: bool, full_output_cov: bool + ) -> MeanAndVariance: + return mean, expand_independent_outputs(cov, full_cov, full_output_cov) + + @check_shapes( + "Xnew: [N, D]", + "return: [broadcast P, N, N] if full_cov", + "return: [broadcast P, N] if (not full_cov)", + ) + def _get_Kff(self, Xnew: TensorType, full_cov: bool) -> tf.Tensor: + + # TODO: this assumes that Xnew has shape [N, D] and no leading dims + + if isinstance(self.kernel, kernels.SeparateIndependent): + # NOTE calling kernel(Xnew, full_cov=full_cov, full_output_cov=False) directly would + # return + # if full_cov: [P, N, N] -- this is what we want + # else: [N, P] instead of [P, N] as we get from the explicit stack below + Kff = tf.stack([k(Xnew, full_cov=full_cov) for k in self.kernel.kernels], axis=0) + elif isinstance(self.kernel, kernels.MultioutputKernel): + # effectively, SharedIndependent path + Kff = self.kernel.kernel(Xnew, full_cov=full_cov) + # NOTE calling kernel(Xnew, full_cov=full_cov, full_output_cov=False) directly would + # return + # if full_cov: [P, N, N] instead of [N, N] + # else: [N, P] instead of [N] + else: + # standard ("single-output") kernels + Kff = self.kernel(Xnew, full_cov=full_cov) # [N, N] if full_cov else [N] + + return Kff + + # TODO -- check_shapes has to be updated + # @check_shapes( + # "Xnew: [N, D]", + # "return: [N, N] if full_cov", + # "return: [N] if (not full_cov)", + # ) + def _get_single_Cff( + self, + Xnew: TensorType, + kernel: Kernel, + inducing_variable_u: InducingVariables, + full_cov: bool, + ) -> tf.Tensor: + + # TODO: this assumes that Xnew has shape [N, D] and no leading dims + + Kff = kernel(Xnew, full_cov=full_cov) + # NOTE calling kernel(Xnew, full_cov=full_cov, full_output_cov=False) directly would + # return + # if full_cov: [P, N, N] instead of [N, N] + # else: [N, P] instead of [N] + + Kmm = Kuu(inducing_variable_u, kernel, jitter=default_jitter()) + L_Kmm = tf.linalg.cholesky(Kmm) + + Kmf = Kuf(inducing_variable_u, kernel, Xnew) + L_Kmm_inv_Kmf = tf.linalg.triangular_solve(L_Kmm, Kmf) + + # compute the covariance due to the conditioning + if full_cov: + # TODO -- need to add broadcasting capability + Cff = Kff - tf.linalg.matmul( + L_Kmm_inv_Kmf, L_Kmm_inv_Kmf, transpose_a=True + ) # [..., N, N] + # num_func = tf.shape(self.q_mu_u)[-1] + # N = tf.shape(Kuf)[-1] + # cov_shape = [num_func, N, N] + # Cff = tf.broadcast_to(tf.expand_dims(Cff, -3), cov_shape) # [..., R, N, N] + else: + # TODO -- need to add broadcasting capability + Cff = Kff - tf.reduce_sum(tf.square(L_Kmm_inv_Kmf), -2) # [..., N] + # num_func = tf.shape(self.q_mu_u)[-1] + # N = tf.shape(Kuf)[-1] + # cov_shape = [num_func, N] # [..., R, N] + # Cff = tf.broadcast_to(tf.expand_dims(Cff, -2), cov_shape) # [..., R, N] + + return Cff, L_Kmm + + # TODO -- need to update check_shapes + # @check_shapes( + # "Xnew: [N, D]", + # "return: [broadcast P, N, N] if full_cov", + # "return: [broadcast P, N] if (not full_cov)", + # ) + def _get_Cff(self, Xnew: TensorType, full_cov: bool) -> tf.Tensor: + + # TODO: this assumes that Xnew has shape [N, D] and no leading dims + + if isinstance(self.kernel, SeparateIndependent): + # NOTE calling kernel(Xnew, full_cov=full_cov, full_output_cov=False) directly would + # return + # if full_cov: [P, N, N] -- this is what we want + # else: [N, P] instead of [P, N] as we get from the explicit stack below + + # TODO -- this could probably be done in a smarter way + # NOTE -- at the moment it's incurring a double computation + Cff = tf.stack( + [ + self._get_single_Cff(Xnew, k, ind_var, full_cov)[0] + for k, ind_var in zip( + self.kernel.kernels, self.inducing_variable_u.inducing_variable_list + ) + ], + axis=0, + ) + + L_Kmm = tf.stack( + [ + self._get_single_Cff(Xnew, k, ind_var, full_cov)[1] + for k, ind_var in zip( + self.kernel.kernels, self.inducing_variable_u.inducing_variable_list + ) + ], + axis=0, + ) + + elif isinstance(self.kernel, kernels.MultioutputKernel): + # effectively, SharedIndependent path + Cff, L_Kmm = self._get_single_Cff( + Xnew, self.kernel.kernel, self.inducing_variable_u.inducing_variable, full_cov + ) + + else: + # standard ("single-output") kernels + Cff, L_Kmm = self._get_single_Cff( + Xnew, self.kernel, self.inducing_variable_u, full_cov + ) # [N, N] if full_cov else [N] + + return Cff, L_Kmm + + def _conditional_with_precompute( + self, + cache: Tuple[tf.Tensor, ...], + Xnew: TensorType, + full_cov: bool = False, + full_output_cov: bool = False, + ) -> MeanAndVariance: + """ + #TODO -- need to implement this + """ + raise NotImplementedError + + +class IndependentOrthogonalPosteriorSingleOutput(IndependentOrthogonalPosterior): + + # could almost be the same as IndependentPosteriorMultiOutput ... + # TODO -- @inherit_check_shapes results in an error atm + # @inherit_check_shapes + def _conditional_fused( + self, Xnew: TensorType, full_cov: bool = False, full_output_cov: bool = False + ) -> MeanAndVariance: + # same as IndependentPosteriorMultiOutput, Shared~/Shared~ branch, except for following + # line: + + Knn = self._get_Kff(Xnew, full_cov=full_cov) + Cnn = self._get_Cff(Xnew, full_cov=full_cov) + + Kmm = Kuu(self.inducing_variable_u, self.kernel, jitter=default_jitter()) # [M_u, M_u] + Kmn = Kuf(self.inducing_variable_u, self.kernel, Xnew) # [M_U, N] + + Cmm = Cvv( + self.inducing_variable_u, self.inducing_variable_v, self.kernel, jitter=default_jitter() + ) # [M_v, M_v] + Cmn = Cvf(self.inducing_variable_u, self.inducing_variable_v, self.kernel, Xnew) # [M_v, N] + + fmean, fvar = base_orthogonal_conditional( + Kmn, + Kmm, + Knn, + Cmn, + Cmm, + Cnn, + self.q_mu_u, + self.q_mu_v, + full_cov=full_cov, + q_sqrt_u=self.q_sqrt_u, + q_sqrt_v=self.q_sqrt_v, + white=self.whiten, + ) # [N, P], [P, N, N] or [N, P] + + return self._post_process_mean_and_cov(fmean, fvar, full_cov, full_output_cov) + + +class IndependentOrthogonalPosteriorMultiOutput(IndependentOrthogonalPosterior): + # TODO -- @inherit_check_shapes results in an error atm + # @inherit_check_shapes + def _conditional_fused( + self, Xnew: TensorType, full_cov: bool = False, full_output_cov: bool = False + ) -> MeanAndVariance: + + if isinstance(self.inducing_variable_u, SharedIndependentInducingVariables) and isinstance( + self.kernel, SharedIndependent + ): + # same as IndependentPosteriorSingleOutput except for following line + + Knn = self._get_Kff(Xnew, full_cov=full_cov) + Cnn, L_Kuu = self._get_Cff(Xnew, full_cov=full_cov) + + Kmm = Kuu(self.inducing_variable_u, self.kernel, jitter=default_jitter()) # [M_u, M_u] + Kmn = Kuf(self.inducing_variable_u, self.kernel, Xnew) # [M_U, N] + + Cmm = Cvv( + self.inducing_variable_u, + self.inducing_variable_v, + self.kernel, + jitter=default_jitter(), + L_Kuu=L_Kuu, + ) # [M_v, M_v] + Cmn = Cvf( + self.inducing_variable_u, self.inducing_variable_v, self.kernel, Xnew, L_Kuu=L_Kuu + ) # [M_v, N] + + fmean, fvar = base_orthogonal_conditional( + Kmn, + Kmm, + Knn, + Cmn, + Cmm, + Cnn, + self.q_mu_u, + self.q_mu_v, + full_cov=full_cov, + q_sqrt_u=self.q_sqrt_u, + q_sqrt_v=self.q_sqrt_v, + white=self.whiten, + Lm=L_Kuu, + ) # [N, P], [P, N, N] or [N, P] + + else: + + # Following are: [P, M, M] - [P, M, N] - [P, N](x N) + Kmms = Kuu(self.X_data, self.kernel, jitter=default_jitter()) # [P, M, M] + Kmns = Kuf(self.X_data, self.kernel, Xnew) # [P, M, N] + Knns = self._get_Kff(Xnew, full_cov=full_cov) # [P, N](x N) + + Cnns, L_Kuus = self._get_Cff(Xnew, full_cov=full_cov) # [P, N](x N) + Cmms = Cvv( + self.inducing_variable_u, + self.inducing_variable_v, + self.kernel, + jitter=default_jitter(), + L_Kuu=L_Kuus, + ) # [P, M_v, M_v] + Cmns = Cvf( + self.inducing_variable_u, self.inducing_variable_v, self.kernel, Xnew, L_Kuu=L_Kuus + ) # [P, M_v, N] + + # TODO -- this fails in tests + fmean, fvar = separate_independent_orthogonal_conditional_implementation( + Kmns, + Kmms, + Knns, + Cmns, + Cmms, + Cnns, + self.q_mu_u, + self.q_mu_v, + full_cov=full_cov, + q_sqrt_u=self.q_sqrt_u, + q_sqrt_v=self.q_sqrt_v, + white=self.whiten, + Lms=L_Kuus, + ) + + return self._post_process_mean_and_cov(fmean, fvar, full_cov, full_output_cov) + + +@get_posterior_class.register(kernels.Kernel, InducingVariables, InducingVariables) +def _get_posterior_base_case( + kernel: Kernel, inducing_variable_u: InducingVariables, inducing_variable_v: InducingVariables +) -> Type[BaseOrthogonalPosterior]: + # independent single output + return IndependentOrthogonalPosteriorSingleOutput + + +@get_posterior_class.register( + (kernels.SharedIndependent, kernels.SeparateIndependent), + (SeparateIndependentInducingVariables, SharedIndependentInducingVariables), + (SeparateIndependentInducingVariables, SharedIndependentInducingVariables), +) +def _get_posterior_independent_mo( + kernel: Kernel, inducing_variable_u: InducingVariables, inducing_variable_v: InducingVariables +) -> Type[BaseOrthogonalPosterior]: + # independent multi-output + return IndependentOrthogonalPosteriorMultiOutput diff --git a/tests/experiment_support/__init__.py b/tests/experiment_support/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/experiment_support/test_ci_utils.py b/tests/experiment_support/test_ci_utils.py new file mode 100644 index 00000000..4ba9ecdf --- /dev/null +++ b/tests/experiment_support/test_ci_utils.py @@ -0,0 +1,76 @@ +# +# Copyright (c) 2022 The GPflux Contributors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import os +from typing import Union + +import pytest + +from gpflux.experiment_support.ci_utils import ( + is_continuous_integration, + notebook_list, + notebook_niter, + notebook_range, +) + + +class CIEnviroment: + """Context manager to simulate a set up where a CI env variable is used""" + + def __init__(self, flag: Union[str, bool]) -> None: + """ + :param flag: the CI env variable value + """ + if isinstance(flag, str): + self._flag = False if flag == "" or flag == "0" else True + else: + self._flag = flag + + try: + self._ci = os.environ["CI"] + except KeyError: + self._ci = "false" + + def __enter__(self): + os.environ["CI"] = str(self._flag).lower() + + def __exit__(self, exc_type, exc_val, exc_tb): + os.environ["CI"] = self._ci + + +@pytest.mark.parametrize("ci", [True, False, "1", "0", ""]) +def test_is_continuous_integration(ci: bool) -> None: + with CIEnviroment(ci): + if isinstance(ci, str): + ci = False if ci == "" or ci == "0" else True + assert is_continuous_integration() == ci + + +@pytest.mark.parametrize("ci,niter", [(True, 2), (False, 10)]) +def test_notebook_niter(ci: bool, niter: int) -> None: + with CIEnviroment(ci): + assert notebook_niter(10) == niter + + +@pytest.mark.parametrize("ci,niter", [(True, 2), (False, 10)]) +def test_notebook_range(ci: bool, niter: int) -> None: + with CIEnviroment(ci): + assert notebook_range(10) == range(niter) + + +@pytest.mark.parametrize("ci,niter", [(True, 2), (False, 10)]) +def test_notebook_list(ci: bool, niter: int) -> None: + with CIEnviroment(ci): + assert list(range(niter)) == notebook_list(list(range(10))) diff --git a/tests/gpflux/architectures/test_config.py b/tests/gpflux/architectures/test_config.py new file mode 100644 index 00000000..fd422caf --- /dev/null +++ b/tests/gpflux/architectures/test_config.py @@ -0,0 +1,64 @@ +# +# Copyright (c) 2022 The GPflux Contributors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from typing import Any + +import pytest +import tensorflow_probability as tfp + +from gpflow.kernels import SquaredExponential + +from gpflux.architectures.config import ( + GaussianLikelihoodConfig, + HeteroSkedasticLikelihoodConfig, + LikelihoodConfig, + ModelHyperParametersConfig, + StudenttLikelihoodConfig, +) + + +@pytest.fixture( + name="likelihood_config", + params=[ + GaussianLikelihoodConfig(noise_variance=1e-3), + StudenttLikelihoodConfig(df=3, scale=1.0), + HeteroSkedasticLikelihoodConfig(distribution_class=tfp.distributions.Normal), + HeteroSkedasticLikelihoodConfig(distribution_class=tfp.distributions.StudentT), + ], +) +def _likelihood_config(request: Any) -> LikelihoodConfig: + return request.param + + +def test_likelihood_create(likelihood_config: LikelihoodConfig) -> None: + try: + likelihood_config.create() + except: + pytest.fail(f"Could not create likelihood with config: {type(likelihood_config)}") + + +@pytest.mark.parametrize("num_layers, whiten", [(0, True), (-1, True), (2, False), (-1, False)]) +def test_hyperparameters_model_config__raises_with_invalid_parameters( + num_layers: int, whiten: bool +) -> None: + with pytest.raises(AssertionError): + _ = ModelHyperParametersConfig( + num_layers=num_layers, + kernel=SquaredExponential, + likelihood=GaussianLikelihoodConfig(noise_variance=1e-2), + inner_layer_qsqrt_factor=1e-3, + whiten=whiten, + num_inducing=7, + ) diff --git a/tests/gpflux/architectures/test_constant_input_dim_deep_gp.py b/tests/gpflux/architectures/test_constant_input_dim_deep_gp.py deleted file mode 100644 index 5a6d660a..00000000 --- a/tests/gpflux/architectures/test_constant_input_dim_deep_gp.py +++ /dev/null @@ -1,37 +0,0 @@ -import numpy as np -import pytest -import tensorflow as tf - -from gpflux.architectures import Config, build_constant_input_dim_deep_gp -from gpflux.helpers import make_dataclass_from_class - - -class DemoConfig: - num_inducing = 7 - inner_layer_qsqrt_factor = 1e-3 - between_layer_noise_variance = 1e-3 - likelihood_noise_variance = 1e-2 - whiten = True - - -@pytest.mark.parametrize("input_dim", [7]) -@pytest.mark.parametrize("num_layers", [3]) -def test_smoke_build_constant_input_dim_deep_gp(input_dim, num_layers): - config = make_dataclass_from_class(Config, DemoConfig) - X = np.random.randn(13, input_dim) - Y = np.random.randn(13, 1) - dgp = build_constant_input_dim_deep_gp(X, num_layers, config) - model_train = dgp.as_training_model() - model_train.compile("Adam") - model_train.fit((X, Y), epochs=1) - model_test = dgp.as_prediction_model() - _ = model_test(X) - - -@pytest.mark.parametrize("dtype", [np.float16, np.float32, np.int32]) -def test_build_constant_input_dim_deep_gp_raises_on_incorrect_dtype(dtype): - config = make_dataclass_from_class(Config, DemoConfig) - X = np.random.randn(13, 2).astype(dtype) - - with pytest.raises(ValueError): - build_constant_input_dim_deep_gp(X, 2, config) diff --git a/tests/gpflux/architectures/test_factory.py b/tests/gpflux/architectures/test_factory.py new file mode 100644 index 00000000..ec5d1b6d --- /dev/null +++ b/tests/gpflux/architectures/test_factory.py @@ -0,0 +1,181 @@ +# +# Copyright (c) 2022 The GPflux Contributors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from typing import Any, Type + +import numpy as np +import pytest +import tensorflow_probability as tfp + +from gpflow.kernels import Matern12, Matern32, Matern52, SquaredExponential, Stationary + +from gpflux.architectures.config import ( + GaussianLikelihoodConfig, + HeteroSkedasticLikelihoodConfig, + HyperParametersConfig, + ModelHyperParametersConfig, + OrthogonalModelHyperparametersConfig, + StudenttLikelihoodConfig, +) +from gpflux.architectures.factory import build_constant_input_dim_architecture, build_kernel + + +@pytest.fixture( + name="kernel_type", + params=[ + SquaredExponential, + Matern12, + Matern32, + Matern52, + ], +) +def _kernel_type(request: Any) -> Type[Stationary]: + return request.param + + +@pytest.fixture(name="is_last_layer", params=[False, True]) +def _is_last_layer(request: Any) -> bool: + return request.param + + +@pytest.mark.parametrize("input_dim", [-1, 0]) +def test_build_kernel__raises_with_invalid_input_dim(input_dim) -> None: + with pytest.raises(AssertionError): + build_kernel(input_dim, False, SquaredExponential) + + +def test_build_kernel(kernel_type: Type[Stationary], is_last_layer: bool) -> None: + kernel = build_kernel(3, is_last_layer, kernel_type) + + assert isinstance(kernel, kernel_type) + expected_variance = 1.0 if is_last_layer else 1e-6 + expected_lengthscales = [1.0] * 3 + + np.testing.assert_allclose(kernel.variance.numpy(), expected_variance) + np.testing.assert_allclose(kernel.lengthscales, expected_lengthscales) + + +MODEL_CONFIGS = [ + ModelHyperParametersConfig( + num_layers=3, + kernel=SquaredExponential, + likelihood=GaussianLikelihoodConfig(noise_variance=1e-2), + inner_layer_qsqrt_factor=1e-3, + whiten=True, + num_inducing=7, + ), + ModelHyperParametersConfig( + num_layers=3, + kernel=SquaredExponential, + likelihood=StudenttLikelihoodConfig(df=3, scale=1e-2), + inner_layer_qsqrt_factor=1e-3, + whiten=True, + num_inducing=7, + ), + pytest.param( + ModelHyperParametersConfig( + num_layers=3, + kernel=SquaredExponential, + likelihood=HeteroSkedasticLikelihoodConfig(), + inner_layer_qsqrt_factor=1e-3, + whiten=True, + num_inducing=7, + ), + marks=pytest.mark.xfail, + ), + pytest.param( + ModelHyperParametersConfig( + num_layers=3, + kernel=SquaredExponential, + likelihood=HeteroSkedasticLikelihoodConfig( + distribution_class=tfp.distributions.StudentT + ), + inner_layer_qsqrt_factor=1e-3, + whiten=True, + num_inducing=7, + ), + marks=pytest.mark.xfail, + ), + OrthogonalModelHyperparametersConfig( + num_layers=3, + kernel=SquaredExponential, + likelihood=GaussianLikelihoodConfig(noise_variance=1e-2), + inner_layer_qsqrt_factor=1e-3, + whiten=True, + num_inducing_u=7, + num_inducing_v=7, + ), + OrthogonalModelHyperparametersConfig( + num_layers=3, + kernel=SquaredExponential, + likelihood=StudenttLikelihoodConfig(df=3, scale=1e-2), + inner_layer_qsqrt_factor=1e-3, + whiten=True, + num_inducing_u=7, + num_inducing_v=7, + ), + pytest.param( + OrthogonalModelHyperparametersConfig( + num_layers=3, + kernel=SquaredExponential, + likelihood=HeteroSkedasticLikelihoodConfig(), + inner_layer_qsqrt_factor=1e-3, + whiten=True, + num_inducing_u=7, + num_inducing_v=7, + ), + marks=pytest.mark.xfail, + ), + pytest.param( + OrthogonalModelHyperparametersConfig( + num_layers=3, + kernel=SquaredExponential, + likelihood=HeteroSkedasticLikelihoodConfig( + distribution_class=tfp.distributions.StudentT + ), + inner_layer_qsqrt_factor=1e-3, + whiten=True, + num_inducing_u=7, + num_inducing_v=7, + ), + marks=pytest.mark.xfail, + ), +] + + +@pytest.mark.parametrize("dtype", [np.float16, np.float32, np.int32]) +@pytest.mark.parametrize("model_config", MODEL_CONFIGS) +def test_build_constant_input_dim_architecture__raises_on_incorrect_dtype( + dtype, model_config: HyperParametersConfig +) -> None: + X = np.random.randn(13, 2).astype(dtype) + + with pytest.raises(ValueError): + build_constant_input_dim_architecture(model_config, X) + + +@pytest.mark.parametrize("model_config", MODEL_CONFIGS) +def test_build_constant_input_dim_architecture__does_not_smoke( + model_config: HyperParametersConfig, +) -> None: + X = np.random.randn(13, 2) + Y = np.random.randn(13, 1) + + model = build_constant_input_dim_architecture(model_config, X) + model_train = model.as_training_model() + model_train.compile("Adam") + model_train.fit((X, Y), epochs=1) + model_test = model.as_prediction_model() + _ = model_test(X) diff --git a/tests/gpflux/layers/test_likelihood_layer.py b/tests/gpflux/layers/test_likelihood_layer.py index a978aca2..465c7a28 100644 --- a/tests/gpflux/layers/test_likelihood_layer.py +++ b/tests/gpflux/layers/test_likelihood_layer.py @@ -18,7 +18,14 @@ import tensorflow as tf from gpflow.kernels import Matern52 -from gpflow.likelihoods import Bernoulli, Beta, Gaussian, Poisson +from gpflow.likelihoods import ( + Bernoulli, + Beta, + Gaussian, + HeteroskedasticTFPConditional, + Poisson, + StudentT, +) from gpflow.mean_functions import Zero from gpflux.helpers import construct_basic_inducing_variables, construct_basic_kernel @@ -26,7 +33,14 @@ from gpflux.layers.likelihood_layer import LikelihoodOutputs from gpflux.losses import LikelihoodLoss -TEST_GPFLOW_LIKELIHOODS = [Bernoulli, Beta, Gaussian, Poisson] +TEST_GPFLOW_LIKELIHOODS = [ + Bernoulli, + Beta, + Gaussian, + Poisson, + StudentT, + pytest.param(HeteroskedasticTFPConditional, marks=pytest.mark.xfail), +] def setup_gp_layer_and_data(num_inducing: int, **gp_layer_kwargs): diff --git a/tests/gpflux/layers/test_orth_gp_layer.py b/tests/gpflux/layers/test_orth_gp_layer.py new file mode 100644 index 00000000..e626510b --- /dev/null +++ b/tests/gpflux/layers/test_orth_gp_layer.py @@ -0,0 +1,201 @@ +# +# Copyright (c) 2021 The GPflux Contributors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import numpy as np +import pytest +import tensorflow as tf +import tensorflow_probability as tfp + +from gpflow.kernels import RBF +from gpflow.mean_functions import Zero + +from gpflux.helpers import construct_basic_inducing_variables, construct_basic_kernel +from gpflux.layers import OrthGPLayer +from gpflux.types import unwrap_dist + + +def setup_orth_gp_layer_and_data(num_inducing_u: int, num_inducing_v: int, **gp_layer_kwargs): + input_dim = 30 + output_dim = 5 + num_data = 100 + data = make_data(input_dim, output_dim, num_data=num_data) + + kernel = construct_basic_kernel(RBF(), output_dim) + inducing_vars_u = construct_basic_inducing_variables(num_inducing_u, input_dim, output_dim) + inducing_vars_v = construct_basic_inducing_variables(num_inducing_v, input_dim, output_dim) + mean_function = Zero(output_dim) + + gp_layer = OrthGPLayer( + kernel, + inducing_vars_u, + inducing_vars_v, + num_data, + mean_function=mean_function, + **gp_layer_kwargs + ) + return gp_layer, data + + +def make_data(input_dim: int, output_dim: int, num_data: int): + lim = [0, 20] + sigma = 0.1 + + X = np.random.random(size=(num_data, input_dim)) * lim[1] + cov = RBF().K(X) + np.eye(num_data) * sigma ** 2 + Y = [np.random.multivariate_normal(np.zeros(num_data), cov)[:, None] for _ in range(output_dim)] + Y = np.hstack(Y) + return X, Y + + +def test_build(): + num_inducing_u = 5 + num_inducing_v = 10 + + gp_layer, (X, Y) = setup_orth_gp_layer_and_data(num_inducing_u, num_inducing_v) + output_dim = Y.shape[-1] + + gp_layer.build(X.shape) + assert gp_layer.q_mu_u.shape == (num_inducing_u, output_dim) + assert gp_layer.q_sqrt_u.shape == (output_dim, num_inducing_u, num_inducing_u) + + assert gp_layer.q_mu_v.shape == (num_inducing_v, output_dim) + assert gp_layer.q_sqrt_v.shape == (output_dim, num_inducing_v, num_inducing_v) + + +def test_kl_change_q_mean(): + gp_layer, (X, Y) = setup_orth_gp_layer_and_data(num_inducing_u=5, num_inducing_v=5) + + gp_layer.build(X.shape) + assert gp_layer.prior_kl() > 0.0 + + q_mu_u_random = np.random.random(size=gp_layer.q_mu_u.shape) + gp_layer.q_mu_u.assign(q_mu_u_random) + + q_mu_v_random = np.random.random(size=gp_layer.q_mu_v.shape) + gp_layer.q_mu_v.assign(q_mu_v_random) + + assert gp_layer.prior_kl() > 0.0 + + +def test_kl_change_q_sqrt(): + gp_layer, (X, Y) = setup_orth_gp_layer_and_data(num_inducing_u=5, num_inducing_v=5) + + gp_layer.build(X.shape) + assert gp_layer.prior_kl() > 0.0 + + gp_layer.q_sqrt_u.assign(gp_layer.q_sqrt_u * 3) + gp_layer.q_sqrt_v.assign(gp_layer.q_sqrt_v * 3) + assert gp_layer.prior_kl() > 0.0 + + +def test_call_shapes(): + gp_layer, (X, Y) = setup_orth_gp_layer_and_data(num_inducing_u=5, num_inducing_v=5) + gp_layer.build(X.shape) + + output_dim = Y.shape[-1] + batch_size = X.shape[0] + + samples = tf.convert_to_tensor(gp_layer(X, training=False)) + assert samples.shape == (batch_size, output_dim) + + assert not gp_layer.full_cov and not gp_layer.full_output_cov + + distribution = gp_layer(X, training=False) + assert isinstance(unwrap_dist(distribution), tfp.distributions.MultivariateNormalDiag) + assert distribution.shape == (batch_size, output_dim) + + gp_layer.full_cov = True + distribution = gp_layer(X, training=False) + assert isinstance(unwrap_dist(distribution), tfp.distributions.MultivariateNormalTriL) + assert distribution.shape == (batch_size, output_dim) + assert distribution.covariance().shape == (output_dim, batch_size, batch_size) + + gp_layer.full_output_cov = True + gp_layer.full_cov = False + distribution = gp_layer(X, training=False) + assert isinstance(unwrap_dist(distribution), tfp.distributions.MultivariateNormalTriL) + assert distribution.shape == (batch_size, output_dim) + assert distribution.covariance().shape == (batch_size, output_dim, output_dim) + + gp_layer.full_output_cov = True + gp_layer.full_cov = True + with pytest.raises(NotImplementedError): + gp_layer(X) + + +def test_call_shapes_num_samples(): + num_samples = 10 + gp_layer, (X, Y) = setup_orth_gp_layer_and_data( + num_inducing_u=5, num_inducing_v=5, num_samples=num_samples + ) + gp_layer.build(X.shape) + + output_dim = Y.shape[-1] + batch_size = X.shape[0] + + samples = tf.convert_to_tensor(gp_layer(X, training=False)) + assert samples.shape == (num_samples, batch_size, output_dim) + + gp_layer.full_cov = True + samples = tf.convert_to_tensor(gp_layer(X, training=False)) + assert samples.shape == (num_samples, batch_size, output_dim) + + +def test_predict_shapes(): + gp_layer, (X, Y) = setup_orth_gp_layer_and_data(num_inducing_u=5, num_inducing_v=5) + gp_layer.build(X.shape) + + output_dim = Y.shape[-1] + batch_size = X.shape[0] + + mean, cov = gp_layer.predict(X) + assert mean.shape == (batch_size, output_dim) + assert cov.shape == (batch_size, output_dim) + + mean, cov = gp_layer.predict(X, full_cov=True) + assert mean.shape == (batch_size, output_dim) + assert cov.shape == (output_dim, batch_size, batch_size) + + mean, cov = gp_layer.predict(X, full_output_cov=True) + assert mean.shape == (batch_size, output_dim) + assert cov.shape == (batch_size, output_dim, output_dim) + + +def test_losses_are_added(): + gp_layer, (X, Y) = setup_orth_gp_layer_and_data(num_inducing_u=5, num_inducing_v=5) + gp_layer.build(X.shape) + + # to make KL non-zero + gp_layer.q_mu_u.assign(tf.ones_like(gp_layer.q_mu_u) * 3) + assert gp_layer.prior_kl() > 0.0 + + assert len(gp_layer.losses) == 0 + + _ = gp_layer(X, training=True) + assert gp_layer.losses == [gp_layer.prior_kl() / gp_layer.num_data] + + # Check loss is 0 when training is False + _ = gp_layer(X, training=False) + assert gp_layer.losses == [tf.zeros_like(gp_layer.losses[0])] + + # Check calling multiple times only adds one loss + _ = gp_layer(X, training=True) + _ = gp_layer(X, training=True) + assert len(gp_layer.losses) == 1 + assert gp_layer.losses == [gp_layer.prior_kl() / gp_layer.num_data] + + +if __name__ == "__main__": + test_call_shapes() diff --git a/tests/gpflux/models/test_deep_gp.py b/tests/gpflux/models/test_deep_gp.py index b5fc8a42..a2c361d8 100644 --- a/tests/gpflux/models/test_deep_gp.py +++ b/tests/gpflux/models/test_deep_gp.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from typing import Tuple + import numpy as np import pytest import tensorflow as tf @@ -23,14 +25,32 @@ from gpflow.mean_functions import Zero from gpflux.helpers import construct_basic_inducing_variables, construct_basic_kernel -from gpflux.layers import GPLayer, LikelihoodLayer -from gpflux.models import DeepGP +from gpflux.layers import GPLayer, OrthGPLayer +from gpflux.models import DeepGP, OrthDeepGP MAXITER = int(80e3) PLOTTER_INTERVAL = 60 +Dataset = Tuple[np.ndarray, np.ndarray] + + +@pytest.fixture(name="dataset", scope="module") +def _dataset() -> Dataset: + input_dim = 2 + num_data = 1000 + + lim = [0, 100] + kernel = RBF(lengthscales=20) + sigma = 0.01 + X = np.random.random(size=(num_data, input_dim)) * lim[1] + cov = kernel.K(X) + np.eye(num_data) * sigma ** 2 + Y = np.random.multivariate_normal(np.zeros(num_data), cov)[:, None] + Y = np.clip(Y, -0.5, 0.5) + + return X, Y -def build_deep_gp(input_dim, num_data): + +def build_deep_gp(input_dim, num_data) -> DeepGP: layers = [input_dim, 2, 2, 1] # Below are different ways to build layers @@ -62,7 +82,54 @@ def build_deep_gp(input_dim, num_data): return DeepGP(gp_layers, Gaussian(0.1)) -def train_deep_gp(deep_gp, data, maxiter=MAXITER, plotter=None, plotter_interval=PLOTTER_INTERVAL): +def build_orth_deep_gp(input_dim, num_data) -> OrthDeepGP: + layers = [input_dim, 2, 2, 1] + # Below are different ways to build layers + + # 1. Pass in Lists: + kernel_list = [RBF(), Matern12()] + num_inducing_u = [25, 25] + num_inducing_v = [10, 10] + l1_kernel = construct_basic_kernel(kernels=kernel_list) + l1_inducing_u = construct_basic_inducing_variables( + num_inducing=num_inducing_u, input_dim=layers[0] + ) + l1_inducing_v = construct_basic_inducing_variables( + num_inducing=num_inducing_v, input_dim=layers[0] + ) + + # 2. Pass in kernels, specify output dims (shared hyperparams/variables) + l2_kernel = construct_basic_kernel(kernels=RBF(), output_dim=layers[2], share_hyperparams=True) + l2_inducing_u = construct_basic_inducing_variables( + num_inducing=25, input_dim=layers[1], share_variables=True + ) + + l2_inducing_v = construct_basic_inducing_variables( + num_inducing=10, input_dim=layers[1], share_variables=True + ) + + # 3. Pass in kernels, specify output dims (independent hyperparams/vars) + # By default and the constructor will make indep. copies + l3_kernel = construct_basic_kernel(kernels=RBF(), output_dim=layers[3]) + l3_inducing_u = construct_basic_inducing_variables( + num_inducing=25, input_dim=layers[2], output_dim=layers[3] + ) + l3_inducing_v = construct_basic_inducing_variables( + num_inducing=10, input_dim=layers[2], output_dim=layers[3] + ) + + # Assemble at the end + gp_layers = [ + OrthGPLayer(l1_kernel, l1_inducing_u, l1_inducing_v, num_data), + OrthGPLayer(l2_kernel, l2_inducing_u, l2_inducing_v, num_data), + OrthGPLayer(l3_kernel, l3_inducing_u, l3_inducing_v, num_data, mean_function=Zero()), + ] + return OrthDeepGP(gp_layers, Gaussian(0.1)) + + +def train_deep_gp( + deep_gp, data, maxiter=MAXITER, plotter=None, plotter_interval=PLOTTER_INTERVAL +) -> None: optimizer = tf.optimizers.Adam() @tf.function(autograph=False) @@ -82,17 +149,6 @@ def step(): plotter() -def setup_dataset(input_dim: int, num_data: int, dtype: np.dtype = np.float64): - lim = [0, 100] - kernel = RBF(lengthscales=20) - sigma = 0.01 - X = np.random.random(size=(num_data, input_dim)) * lim[1] - cov = kernel.K(X) + np.eye(num_data) * sigma ** 2 - Y = np.random.multivariate_normal(np.zeros(num_data), cov)[:, None] - Y = np.clip(Y, -0.5, 0.5) - return X.astype(dtype), Y.astype(dtype) - - def get_live_plotter(train_data, model): from matplotlib import pyplot as plt from mpl_toolkits import mplot3d @@ -133,11 +189,9 @@ def plotter(*args, **kwargs): return fig, plotter -def run_demo(maxiter=int(80e3), plotter_interval=60): - input_dim = 2 - num_data = 1000 - data = setup_dataset(input_dim, num_data) - deep_gp = build_deep_gp(input_dim, num_data) +def run_demo( + deep_gp: DeepGP, data: Dataset, maxiter: int = int(80e3), plotter_interval: int = 60 +) -> None: fig, plotter = get_live_plotter(data, deep_gp) train_deep_gp( deep_gp, @@ -148,19 +202,20 @@ def run_demo(maxiter=int(80e3), plotter_interval=60): ) -def test_smoke(): +@pytest.mark.parametrize("model", [build_deep_gp(2, 1000), build_orth_deep_gp(2, 1000)]) +def test_smoke(dataset: Dataset, model: DeepGP) -> None: import matplotlib matplotlib.use("PS") # Agg does not support 3D - run_demo(maxiter=2, plotter_interval=1) + run_demo(deep_gp=model, data=dataset, maxiter=2, plotter_interval=1) +@pytest.mark.parametrize("model", [build_deep_gp(2, 1000), build_orth_deep_gp(2, 1000)]) @pytest.mark.parametrize("dtype", [np.float16, np.float32, np.int32]) -def test_deep_gp_raises_on_incorrect_dtype(dtype): - input_dim = 2 - num_data = 1000 - X, Y = setup_dataset(input_dim, num_data, dtype) - model = build_deep_gp(input_dim, num_data) +def test_deep_gp_raises_on_incorrect_dtype( + dataset: Dataset, model: DeepGP, dtype: np.dtype +) -> None: + X, Y = dataset[0].astype(dtype), dataset[1].astype(dtype) with pytest.raises(ValueError): model.predict_f(X) diff --git a/tests/gpflux/test_helpers.py b/tests/gpflux/test_helpers.py index 04517deb..3980b661 100644 --- a/tests/gpflux/test_helpers.py +++ b/tests/gpflux/test_helpers.py @@ -19,7 +19,6 @@ import pytest import gpflow -from gpflow import mean_functions from gpflow.inducing_variables import ( InducingPoints, MultioutputInducingVariables, @@ -142,14 +141,14 @@ def test_construct_inducing_shared_independent_duplicates(z_init): def test_construct_mean_function_Identity(): num_data, input_dim, output_dim = 11, 5, 5 X = np.random.randn(num_data, input_dim) - mean_functions = construct_mean_function(X, input_dim, output_dim) + mean_functions = construct_mean_function(X, output_dim) assert isinstance(mean_functions, gpflow.mean_functions.Identity) def test_construct_mean_function_Linear(): num_data, input_dim, output_dim = 11, 5, 7 X = np.random.randn(num_data, input_dim) - mean_functions = construct_mean_function(X, input_dim, output_dim) + mean_functions = construct_mean_function(X, output_dim) assert isinstance(mean_functions, gpflow.mean_functions.Linear)