Automated updates to slides.

ubcecon · Dec 6, 2023 · e07882c · e07882c
1 parent d88ea18
commit e07882c
Show file tree

Hide file tree

Showing 6 changed files with 63 additions and 58 deletions.
diff --git a/paul/feed.html b/paul/feed.html
@@ -320,7 +320,7 @@ <h3 class="no-anchor listing-title">
 </a>
 </div>
 </div>
-<div class="quarto-post image-right" data-index="6" data-listing-file-modified-sort="1701877750289" data-listing-reading-time-sort="9">
+<div class="quarto-post image-right" data-index="6" data-listing-file-modified-sort="1701894779411" data-listing-reading-time-sort="9">
 <div class="thumbnail">
 <p><a href="./neuralnets.html"> <div class="listing-item-img-placeholder card-img-top" >&nbsp;</div> </a></p>
 </div>

diff --git a/paul/neuralnets.html b/paul/neuralnets.html
@@ -455,8 +455,9 @@ <h2>Single Layer Perceptron</h2>
 <li><span class="math inline">\(x_i \in \R^d\)</span>, want to approximate some <span class="math inline">\(f: \R^d \to \R\)</span></li>
 <li>Approximate by <span class="math display">\[
 \begin{align*}
-f(x_i; \mathbf{w},\mathbf{b}) = \psi_1\left( \sum_{u=1}^m w_{u,1} \psi_0( x_i'w_{u,0} + b_{u,0}) + b_{u,1} \right)
-\end{align}
+f(x_i; \mathbf{w},\mathbf{b}) =
+\psi_1 \left( \sum_{u=1}^m w_{u,1} \psi_0( x_i'w_{u,0} + b_{u,0}) + b_{u,1} \right)
+\end{align*}
 \]</span> where
 <ul>
 <li>Weights: <span class="math inline">\(w_{u,1} \in \R\)</span>, <span class="math inline">\(w_{u,0} \in \R^d\)</span></li>
@@ -468,7 +469,7 @@ <h2>Single Layer Perceptron</h2>
 </section>
 <section id="activation-functions" class="slide level2">
 <h2>Activation functions</h2>
-<div class="cell" data-execution_count="2">
+<div class="cell" data-execution_count="1">
 <details>
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb1"><pre class="sourceCode numberSource python number-lines code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
@@ -497,7 +498,7 @@ <h2>Activation functions</h2>
 </section>
 <section id="single-layer-perceptron-1" class="slide level2">
 <h2>Single Layer Perceptron</h2>
-<div class="cell" data-execution_count="3">
+<div class="cell" data-execution_count="2">
 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode numberSource python number-lines code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1"></a><span class="im">import</span> torch.nn <span class="im">as</span> nn</span>
 <span id="cb2-2"><a href="#cb2-2"></a><span class="im">import</span> torch</span>
 <span id="cb2-3"><a href="#cb2-3"></a><span class="kw">class</span> SingleLayerPerceptron(nn.Module):</span>
@@ -552,7 +553,7 @@ <h2>Computing Gradients</h2>
 <li>E.g. <span class="math inline">\(\ell(\theta) = f(g(h(\theta)))\)</span>
 <ul>
 <li><span class="math inline">\(\ell : \R^p \to \R\)</span>, <span class="math inline">\(h: \R^p \to \R^q\)</span>, <span class="math inline">\(g: \R^q \to \R^j\)</span>, <span class="math inline">\(f: \R^j \to \R\)</span> <span class="math display">\[
-\left(\nabla \ell(\theta)\riight)^T = &amp; \underbrace{Df_{g(h(\theta))}}_{1 \times j} \underbrace{Dg_{h(\theta)}}_{j \times q} \underbrace{Dh_\theta}_{q \times p}
+\left(\nabla \ell(\theta)\riight)^T = \underbrace{Df_{g(h(\theta))}}_{1 \times j} \underbrace{Dg_{h(\theta)}}_{j \times q} \underbrace{Dh_\theta}_{q \times p}
 \]</span></li>
 </ul></li>
 <li>Forward mode:
@@ -587,7 +588,7 @@ <h2>Computing Gradients</h2>
 </section>
 <section id="gradient-descent-1" class="slide level2">
 <h2>Gradient Descent</h2>
-<div class="cell" data-execution_count="4">
+<div class="cell" data-execution_count="3">
 <details>
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode numberSource python number-lines code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1"></a>n <span class="op">=</span> <span class="dv">100</span></span>
@@ -628,7 +629,7 @@ <h2>Gradient Descent</h2>
 </section>
 <section id="multi-layer-perceptron" class="slide level2">
 <h2>Multi Layer Perceptron</h2>
-<div class="cell" data-execution_count="5">
+<div class="cell" data-execution_count="4">
 <div class="sourceCode cell-code" id="cb4"><pre class="sourceCode numberSource python number-lines code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1"></a><span class="kw">def</span> multilayer(d,width,depth,activation<span class="op">=</span>nn.ReLU()):</span>
 <span id="cb4-2"><a href="#cb4-2"></a>    mlp <span class="op">=</span> nn.Sequential(</span>
 <span id="cb4-3"><a href="#cb4-3"></a>        nn.Linear(d,width),</span>
@@ -650,7 +651,9 @@ <h2>Multi Layer Perceptron</h2>
 </section>
 <section id="multi-layer-perceptron-1" class="slide level2">
 <h2>Multi Layer Perceptron</h2>
-<div class="cell" data-execution_count="6">
+<div class="cell" data-execution_count="5">
+<details>
+<summary>Code</summary>
 <div class="sourceCode cell-code" id="cb5"><pre class="sourceCode numberSource python number-lines code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1"></a>mlp <span class="op">=</span> multilayer(<span class="dv">1</span>,<span class="dv">4</span>,<span class="dv">4</span>,nn.ReLU())</span>
 <span id="cb5-2"><a href="#cb5-2"></a><span class="bu">print</span>(count_parameters(mlp))</span>
 <span id="cb5-3"><a href="#cb5-3"></a>optimizer <span class="op">=</span> torch.optim.Adam(mlp.parameters(), lr<span class="op">=</span><span class="fl">0.01</span>)</span>
@@ -669,6 +672,7 @@ <h2>Multi Layer Perceptron</h2>
 <span id="cb5-16"><a href="#cb5-16"></a>ax.plot(xlin, f(xlin), label<span class="op">=</span><span class="st">"f"</span>, lw<span class="op">=</span><span class="dv">8</span>)</span>
 <span id="cb5-17"><a href="#cb5-17"></a>ax.scatter(x.flatten(),y.flatten())</span>
 <span id="cb5-18"><a href="#cb5-18"></a>fig.show()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</details>
 <div class="cell-output cell-output-stdout">
 <pre><code>73</code></pre>
 </div>
@@ -696,7 +700,7 @@ <h2>Overparameterization</h2>
 </section>
 <section id="double-descent" class="slide level2">
 <h2>Double Descent</h2>
-<div class="cell" data-execution_count="7">
+<div class="cell" data-execution_count="6">
 <details>
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb7"><pre class="sourceCode numberSource python number-lines code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1"></a><span class="im">from</span> joblib <span class="im">import</span> Parallel, delayed</span>
@@ -749,7 +753,7 @@ <h2>Double Descent</h2>
 </section>
 <section id="double-descent-1" class="slide level2">
 <h2>Double Descent</h2>
-<div class="cell" data-execution_count="8">
+<div class="cell" data-execution_count="7">
 <div class="sourceCode cell-code" id="cb8"><pre class="sourceCode numberSource python number-lines code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1"></a>f <span class="op">=</span> <span class="kw">lambda</span> x: np.exp(x[<span class="dv">0</span>]<span class="op">-</span>x[<span class="dv">1</span>])</span>
 <span id="cb8-2"><a href="#cb8-2"></a>n <span class="op">=</span> <span class="dv">20</span></span>
 <span id="cb8-3"><a href="#cb8-3"></a>torch.manual_seed(<span class="dv">1234</span>)</span>
@@ -777,7 +781,7 @@ <h2>Double Descent</h2>
 </section>
 <section id="double-descent-2" class="slide level2">
 <h2>Double Descent</h2>
-<div class="cell" data-execution_count="9">
+<div class="cell" data-execution_count="8">
 <details>
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb10"><pre class="sourceCode numberSource python number-lines code-with-copy"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1"></a><span class="kw">def</span> plotdd(losses, nonoise):</span>
@@ -818,7 +822,7 @@ <h2>Double Descent</h2>
 </section>
 <section id="double-descent-low-noise" class="slide level2">
 <h2>Double Descent: Low Noise</h2>
-<div class="cell" data-execution_count="10">
+<div class="cell" data-execution_count="9">
 <details>
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb11"><pre class="sourceCode numberSource python number-lines code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1"></a>sigma <span class="op">=</span> <span class="fl">0.01</span></span>
@@ -839,14 +843,14 @@ <h2>Double Descent: Low Noise</h2>
 width 9</code></pre>
 </div>
 </div>
-<div class="cell" data-execution_count="11">
+<div class="cell" data-execution_count="10">
 <details>
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb13"><pre class="sourceCode numberSource python number-lines code-with-copy"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1"></a>fig<span class="op">=</span>plotdd(ddlowsig[<span class="dv">0</span>].mean(axis<span class="op">=</span><span class="dv">2</span>), ddlowsig[<span class="dv">1</span>].mean(axis<span class="op">=</span><span class="dv">2</span>))</span>
 <span id="cb13-2"><a href="#cb13-2"></a>fig.show()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 <div class="cell-output cell-output-display">
-<p><img data-src="neuralnets_files/figure-revealjs/cell-11-output-1.png" width="959" height="430"></p>
+<p><img data-src="neuralnets_files/figure-revealjs/cell-11-output-1.png" width="950" height="430"></p>
 </div>
 </div>
 </section>

diff --git a/paul/neuralnets.ipynb b/paul/neuralnets.ipynb
diff --git a/paul/neuralnets_files/figure-revealjs/cell-11-output-1.png b/paul/neuralnets_files/figure-revealjs/cell-11-output-1.png
diff --git a/paul/neuralnets_files/figure-revealjs/cell-9-output-1.png b/paul/neuralnets_files/figure-revealjs/cell-9-output-1.png
diff --git a/paul/search.json b/paul/search.json
@@ -1327,7 +1327,7 @@
     "href": "neuralnets.html#single-layer-perceptron",
     "title": "Neural Networks",
     "section": "Single Layer Perceptron",
-    "text": "Single Layer Perceptron\n\n\\(x_i \\in \\R^d\\), want to approximate some \\(f: \\R^d \\to \\R\\)\nApproximate by \\[\n\\begin{align*}\nf(x_i; \\mathbf{w},\\mathbf{b}) = \\psi_1\\left( \\sum_{u=1}^m w_{u,1} \\psi_0( x_i'w_{u,0} + b_{u,0}) + b_{u,1} \\right)\n\\end{align}\n\\] where\n\nWeights: \\(w_{u,1} \\in \\R\\), \\(w_{u,0} \\in \\R^d\\)\nBiases: \\(b_{u,1}, b_{u,0} \\in \\R\\)\nActivation functions \\(\\psi_1, \\psi_0: \\R \\to \\R\\)\nWidth: \\(m\\)"
+    "text": "Single Layer Perceptron\n\n\\(x_i \\in \\R^d\\), want to approximate some \\(f: \\R^d \\to \\R\\)\nApproximate by \\[\n\\begin{align*}\nf(x_i; \\mathbf{w},\\mathbf{b}) =\n\\psi_1 \\left( \\sum_{u=1}^m w_{u,1} \\psi_0( x_i'w_{u,0} + b_{u,0}) + b_{u,1} \\right)\n\\end{align*}\n\\] where\n\nWeights: \\(w_{u,1} \\in \\R\\), \\(w_{u,0} \\in \\R^d\\)\nBiases: \\(b_{u,1}, b_{u,0} \\in \\R\\)\nActivation functions \\(\\psi_1, \\psi_0: \\R \\to \\R\\)\nWidth: \\(m\\)"
   },
   {
     "objectID": "neuralnets.html#activation-functions",
@@ -1362,7 +1362,7 @@
     "href": "neuralnets.html#computing-gradients",
     "title": "Neural Networks",
     "section": "Computing Gradients",
-    "text": "Computing Gradients\n\nAutomatic differentiation: automatically use chainrule on each step of computation\nE.g. \\(\\ell(\\theta) = f(g(h(\\theta)))\\)\n\n\\(\\ell : \\R^p \\to \\R\\), \\(h: \\R^p \\to \\R^q\\), \\(g: \\R^q \\to \\R^j\\), \\(f: \\R^j \\to \\R\\) \\[\n\\left(\\nabla \\ell(\\theta)\\riight)^T = & \\underbrace{Df_{g(h(\\theta))}}_{1 \\times j} \\underbrace{Dg_{h(\\theta)}}_{j \\times q} \\underbrace{Dh_\\theta}_{q \\times p}\n\\]\n\nForward mode:\n\nCalculate \\(h(\\theta)\\) and \\(D_1=Dh_\\theta\\)\nCalculate \\(g(h(\\theta))\\) and \\(Dg_{h(\\theta)}\\), multiply \\(D_2=Dg_{h(\\theta)} D_1\\) (\\(jqp\\) scalar products and additions)\nCalculate \\(f(g(h(\\theta)))\\) and \\(Df_{g(h(\\theta))}\\), multiply \\(Df_{g(h(\\theta))} D_2\\) (\\(1jp\\) scalar products and additions)\n\n\nWork to propagate derivative \\(\\propto jqp + 1jp\\)"
+    "text": "Computing Gradients\n\nAutomatic differentiation: automatically use chainrule on each step of computation\nE.g. \\(\\ell(\\theta) = f(g(h(\\theta)))\\)\n\n\\(\\ell : \\R^p \\to \\R\\), \\(h: \\R^p \\to \\R^q\\), \\(g: \\R^q \\to \\R^j\\), \\(f: \\R^j \\to \\R\\) \\[\n\\left(\\nabla \\ell(\\theta)\\riight)^T = \\underbrace{Df_{g(h(\\theta))}}_{1 \\times j} \\underbrace{Dg_{h(\\theta)}}_{j \\times q} \\underbrace{Dh_\\theta}_{q \\times p}\n\\]\n\nForward mode:\n\nCalculate \\(h(\\theta)\\) and \\(D_1=Dh_\\theta\\)\nCalculate \\(g(h(\\theta))\\) and \\(Dg_{h(\\theta)}\\), multiply \\(D_2=Dg_{h(\\theta)} D_1\\) (\\(jqp\\) scalar products and additions)\nCalculate \\(f(g(h(\\theta)))\\) and \\(Df_{g(h(\\theta))}\\), multiply \\(Df_{g(h(\\theta))} D_2\\) (\\(1jp\\) scalar products and additions)\n\n\nWork to propagate derivative \\(\\propto jqp + 1jp\\)"
   },
   {
     "objectID": "neuralnets.html#computing-gradients-1",
@@ -1390,7 +1390,7 @@
     "href": "neuralnets.html#multi-layer-perceptron-1",
     "title": "Neural Networks",
     "section": "Multi Layer Perceptron",
-    "text": "Multi Layer Perceptron\n\nmlp = multilayer(1,4,4,nn.ReLU())\nprint(count_parameters(mlp))\noptimizer = torch.optim.Adam(mlp.parameters(), lr=0.01)\nfig, ax = plt.subplots()\nepochs = 1000\nfor i in range(epochs):\n    if (i % 50) == 0 :\n        ax.plot(xlin,mlp(xlin.reshape(-1,1)).data, alpha=i/epochs, color='k')\n        mlp.zero_grad()\n    loss = lossfn(y,mlp(x))\n    #print(f\"{i}: {loss.item()}\")\n    optimizer.zero_grad()\n    loss.backward()\n    optimizer.step()\n\nax.plot(xlin, f(xlin), label=\"f\", lw=8)\nax.scatter(x.flatten(),y.flatten())\nfig.show()\n\n73"
+    "text": "Multi Layer Perceptron\n\n\nCode\nmlp = multilayer(1,4,4,nn.ReLU())\nprint(count_parameters(mlp))\noptimizer = torch.optim.Adam(mlp.parameters(), lr=0.01)\nfig, ax = plt.subplots()\nepochs = 1000\nfor i in range(epochs):\n    if (i % 50) == 0 :\n        ax.plot(xlin,mlp(xlin.reshape(-1,1)).data, alpha=i/epochs, color='k')\n        mlp.zero_grad()\n    loss = lossfn(y,mlp(x))\n    #print(f\"{i}: {loss.item()}\")\n    optimizer.zero_grad()\n    loss.backward()\n    optimizer.step()\n\nax.plot(xlin, f(xlin), label=\"f\", lw=8)\nax.scatter(x.flatten(),y.flatten())\nfig.show()\n\n\n73"
   },
   {
     "objectID": "neuralnets.html#overparameterization",