diff --git a/slides/04-multivariate-first-order/slides-multivar-first-order-12-comparison.tex b/slides/04-multivariate-first-order/slides-multivar-first-order-12-comparison.tex index c9e2c54..5924f00 100644 --- a/slides/04-multivariate-first-order/slides-multivar-first-order-12-comparison.tex +++ b/slides/04-multivariate-first-order/slides-multivar-first-order-12-comparison.tex @@ -43,8 +43,8 @@ \vspace{-0.4cm} GD with medium $\alpha=2\cdot10^{-3}$ and indep. features: \begin{figure} - \includegraphics[width=0.8\textwidth]{slides/04-multivariate-first-order/figure_man/simu_linmod/GD_reg_med_lr_iters.pdf} \\ - \includegraphics[width=0.8\textwidth]{slides/04-multivariate-first-order/figure_man/simu_linmod/GD_reg_coef_med.pdf}\\ + \includegraphics[width=0.8\textwidth]{figure_man/simu_linmod/GD_reg_med_lr_iters.pdf} \\ + \includegraphics[width=0.8\textwidth]{figure_man/simu_linmod/GD_reg_coef_med.pdf}\\ \begin{footnotesize} Irreducible error due to additive noise is $\sigma=1$. Dotted lines indicate global minimizers. \end{footnotesize} @@ -56,8 +56,8 @@ \vspace{-0.4cm} GD with medium $\alpha=2\cdot10^{-3}$ and bad conditioning (corr. features): \begin{figure} - \includegraphics[width=0.8\textwidth]{slides/04-multivariate-first-order/figure_man/simu_linmod/GD_reg_med_lr_corr_iters.pdf} \\ - \includegraphics[width=0.8\textwidth]{slides/04-multivariate-first-order/figure_man/simu_linmod/GD_reg_coef_med_corr.pdf}\\ + \includegraphics[width=0.8\textwidth]{figure_man/simu_linmod/GD_reg_med_lr_corr_iters.pdf} \\ + \includegraphics[width=0.8\textwidth]{figure_man/simu_linmod/GD_reg_coef_med_corr.pdf}\\ \begin{footnotesize} Irreducible error due to additive noise is $\sigma=1$. Dotted lines indicate global minimizers. \end{footnotesize} @@ -72,8 +72,8 @@ \vspace{-0.4cm} GD with (too small) $\alpha=3\cdot10^{-4}$ and indep. features: \begin{figure} - \includegraphics[width=0.8\textwidth]{slides/04-multivariate-first-order/figure_man/simu_linmod/GD_reg_small_lr_iters.pdf} \\ - \includegraphics[width=0.8\textwidth]{slides/04-multivariate-first-order/figure_man/simu_linmod/GD_reg_coef_small.pdf}\\ + \includegraphics[width=0.8\textwidth]{figure_man/simu_linmod/GD_reg_small_lr_iters.pdf} \\ + \includegraphics[width=0.8\textwidth]{figure_man/simu_linmod/GD_reg_coef_small.pdf}\\ \begin{footnotesize} Irreducible error due to additive noise is $\sigma=1$. Dotted lines indicate global minimizers. \end{footnotesize} @@ -85,8 +85,8 @@ \vspace{-0.5cm} GD with large $\alpha=1.5$ and indep. features: \begin{figure} - \includegraphics[width=0.8\textwidth]{slides/04-multivariate-first-order/figure_man/simu_linmod/GD_reg_large_lr_iters.pdf} \\ - \includegraphics[width=0.7\textwidth]{slides/04-multivariate-first-order/figure_man/simu_linmod/GD_reg_coef_large.pdf}\\ + \includegraphics[width=0.8\textwidth]{figure_man/simu_linmod/GD_reg_large_lr_iters.pdf} \\ + \includegraphics[width=0.7\textwidth]{figure_man/simu_linmod/GD_reg_coef_large.pdf}\\ \begin{footnotesize} Irreducible error due to additive noise is $\sigma=1$. Dotted lines indicate global minimizers. \end{footnotesize} @@ -101,8 +101,8 @@ \vspace{-0.4cm} SGD with medium $\alpha=2\cdot10^{-3}$ and indep. features: \begin{figure} - \includegraphics[width=0.8\textwidth]{slides/04-multivariate-first-order/figure_man/simu_linmod/SGD_reg_med_lr_iters.pdf} \\ - \includegraphics[width=0.8\textwidth]{slides/04-multivariate-first-order/figure_man/simu_linmod/SGD_reg_coef_med.pdf}\\ + \includegraphics[width=0.8\textwidth]{figure_man/simu_linmod/SGD_reg_med_lr_iters.pdf} \\ + \includegraphics[width=0.8\textwidth]{figure_man/simu_linmod/SGD_reg_coef_med.pdf}\\ \begin{footnotesize} %Irreducible error due to additive noise is $\sigma=1$ \end{footnotesize} @@ -115,8 +115,8 @@ \vspace{-0.4cm} SGD with medium $\alpha=2\cdot10^{-3}$ and bad conditioning (corr. features): \begin{figure} - \includegraphics[width=0.8\textwidth]{slides/04-multivariate-first-order/figure_man/simu_linmod/SGD_reg_med_lr_corr_iters.pdf} \\ - \includegraphics[width=0.8\textwidth]{slides/04-multivariate-first-order/figure_man/simu_linmod/SGD_reg_coef_med_corr.pdf}\\ + \includegraphics[width=0.8\textwidth]{figure_man/simu_linmod/SGD_reg_med_lr_corr_iters.pdf} \\ + \includegraphics[width=0.8\textwidth]{figure_man/simu_linmod/SGD_reg_coef_med_corr.pdf}\\ \begin{footnotesize} Irreducible error due to additive noise is $\sigma=1$ \end{footnotesize} @@ -130,8 +130,8 @@ \vspace{-0.4cm} SGD with small $\alpha=3\cdot10^{-4}$ and indep. features: \begin{figure} - \includegraphics[width=0.8\textwidth]{slides/04-multivariate-first-order/figure_man/simu_linmod/SGD_reg_small_lr_iters.pdf} \\ - \includegraphics[width=0.8\textwidth]{slides/04-multivariate-first-order/figure_man/simu_linmod/SGD_reg_coef_small.pdf}\\ + \includegraphics[width=0.8\textwidth]{figure_man/simu_linmod/SGD_reg_small_lr_iters.pdf} \\ + \includegraphics[width=0.8\textwidth]{figure_man/simu_linmod/SGD_reg_coef_small.pdf}\\ \begin{footnotesize} Irreducible error due to additive noise is $\sigma=1$ \end{footnotesize} @@ -145,8 +145,8 @@ \vspace{-0.4cm} SGD with large $\alpha=1 \cdot 10^{-2}$ and indep. features: \begin{figure} - \includegraphics[width=0.8\textwidth]{slides/04-multivariate-first-order/figure_man/simu_linmod/SGD_reg_large_lr_iters.pdf} \\ - \includegraphics[width=0.8\textwidth]{slides/04-multivariate-first-order/figure_man/simu_linmod/SGD_reg_coef_large.pdf}\\ + \includegraphics[width=0.8\textwidth]{figure_man/simu_linmod/SGD_reg_large_lr_iters.pdf} \\ + \includegraphics[width=0.8\textwidth]{figure_man/simu_linmod/SGD_reg_coef_large.pdf}\\ \begin{footnotesize} Irreducible error due to additive noise is $\sigma=1$ \end{footnotesize} @@ -246,8 +246,8 @@ \vspace{-0.4cm} GD with medium $\alpha=0.25$ and indep. features: \begin{figure} - \includegraphics[width=0.8\textwidth]{slides/04-multivariate-first-order/figure_man/simu_linmod/GD_log_med_lr_iters.pdf} \\ - \includegraphics[width=0.8\textwidth]{slides/04-multivariate-first-order/figure_man/simu_linmod/GD_log_coef_med.pdf}\\ + \includegraphics[width=0.8\textwidth]{figure_man/simu_linmod/GD_log_med_lr_iters.pdf} \\ + \includegraphics[width=0.8\textwidth]{figure_man/simu_linmod/GD_log_coef_med.pdf}\\ \begin{footnotesize} Dotted lines indicate global minimizers. %Dashed line in test loss indicates irreducible error due to $\sigma=1$ @@ -261,8 +261,8 @@ \vspace{-0.4cm} GD with medium $\alpha=0.25$ and bad conditioning (corr. features): \begin{figure} - \includegraphics[width=0.8\textwidth]{slides/04-multivariate-first-order/figure_man/simu_linmod/GD_log_med_lr_corr_iters.pdf} \\ - \includegraphics[width=0.8\textwidth]{slides/04-multivariate-first-order/figure_man/simu_linmod/GD_log_coef_med_corr.pdf}\\ + \includegraphics[width=0.8\textwidth]{figure_man/simu_linmod/GD_log_med_lr_corr_iters.pdf} \\ + \includegraphics[width=0.8\textwidth]{figure_man/simu_linmod/GD_log_coef_med_corr.pdf}\\ \begin{footnotesize} Dotted lines indicate global minimizers. %Dashed line in test loss indicates irreducible error due to $\sigma=1$ @@ -277,8 +277,8 @@ \vspace{-0.4cm} GD with small $\alpha=0.025$ and indep. features: \begin{figure} - \includegraphics[width=0.8\textwidth]{slides/04-multivariate-first-order/figure_man/simu_linmod/GD_log_small_lr_iters.pdf} \\ - \includegraphics[width=0.8\textwidth]{slides/04-multivariate-first-order/figure_man/simu_linmod/GD_log_coef_small.pdf}\\ + \includegraphics[width=0.8\textwidth]{figure_man/simu_linmod/GD_log_small_lr_iters.pdf} \\ + \includegraphics[width=0.8\textwidth]{figure_man/simu_linmod/GD_log_coef_small.pdf}\\ \begin{footnotesize} %Dashed line in test loss indicates irreducible error due to $\sigma=1$ \end{footnotesize} @@ -292,8 +292,8 @@ \vspace{-0.5cm} GD with large $\alpha=10$ and indep. features: \begin{figure} - \includegraphics[width=0.8\textwidth]{slides/04-multivariate-first-order/figure_man/simu_linmod/GD_log_large_lr_iters.pdf} \\ - \includegraphics[width=0.8\textwidth]{slides/04-multivariate-first-order/figure_man/simu_linmod/GD_log_coef_large.pdf}\\ + \includegraphics[width=0.8\textwidth]{figure_man/simu_linmod/GD_log_large_lr_iters.pdf} \\ + \includegraphics[width=0.8\textwidth]{figure_man/simu_linmod/GD_log_coef_large.pdf}\\ \begin{footnotesize} %Dashed line in test loss indicates irreducible error due to $\sigma=1$ \end{footnotesize} @@ -309,8 +309,8 @@ \vspace{-0.4cm} SGD with medium $\alpha=0.03$ and indep. features: \begin{figure} - \includegraphics[width=0.8\textwidth]{slides/04-multivariate-first-order/figure_man/simu_linmod/SGD_log_med_lr_iters.pdf} \\ - \includegraphics[width=0.8\textwidth]{slides/04-multivariate-first-order/figure_man/simu_linmod/SGD_log_coef_med.pdf}\\ + \includegraphics[width=0.8\textwidth]{figure_man/simu_linmod/SGD_log_med_lr_iters.pdf} \\ + \includegraphics[width=0.8\textwidth]{figure_man/simu_linmod/SGD_log_coef_med.pdf}\\ \begin{footnotesize} %Dashed line in test loss indicates irreducible error due to $\sigma=1$ \end{footnotesize} @@ -323,8 +323,8 @@ \vspace{-0.4cm} SGD with medium $\alpha=5\cdot10^{-2}$ and bad conditioning (corr. features): \begin{figure} - \includegraphics[width=0.8\textwidth]{slides/04-multivariate-first-order/figure_man/simu_linmod/SGD_log_med_lr_corr_iters.pdf} \\ - \includegraphics[width=0.8\textwidth]{slides/04-multivariate-first-order/figure_man/simu_linmod/SGD_log_coef_med_corr.pdf}\\ + \includegraphics[width=0.8\textwidth]{figure_man/simu_linmod/SGD_log_med_lr_corr_iters.pdf} \\ + \includegraphics[width=0.8\textwidth]{figure_man/simu_linmod/SGD_log_coef_med_corr.pdf}\\ \begin{footnotesize} %Dashed line in test loss indicates irreducible error due to $\sigma=1$ \end{footnotesize} @@ -338,8 +338,8 @@ \vspace{-0.4cm} SGD with small $\alpha=3\cdot10^{-4}$ and indep. features: \begin{figure} - \includegraphics[width=0.8\textwidth]{slides/04-multivariate-first-order/figure_man/simu_linmod/SGD_log_small_lr_iters.pdf} \\ - \includegraphics[width=0.8\textwidth]{slides/04-multivariate-first-order/figure_man/simu_linmod/SGD_log_coef_small.pdf}\\ + \includegraphics[width=0.8\textwidth]{figure_man/simu_linmod/SGD_log_small_lr_iters.pdf} \\ + \includegraphics[width=0.8\textwidth]{figure_man/simu_linmod/SGD_log_coef_small.pdf}\\ \begin{footnotesize} %Dashed line in test loss indicates irreducible error due to $\sigma=1$ \end{footnotesize} @@ -353,8 +353,8 @@ \vspace{-0.4cm} SGD with large $\alpha=0.3$ and indep. features: \begin{figure} - \includegraphics[width=0.8\textwidth]{slides/04-multivariate-first-order/figure_man/simu_linmod/SGD_log_large_lr_iters.pdf} \\ - \includegraphics[width=0.8\textwidth]{slides/04-multivariate-first-order/figure_man/simu_linmod/SGD_log_coef_large.pdf}\\ + \includegraphics[width=0.8\textwidth]{figure_man/simu_linmod/SGD_log_large_lr_iters.pdf} \\ + \includegraphics[width=0.8\textwidth]{figure_man/simu_linmod/SGD_log_coef_large.pdf}\\ \begin{footnotesize} %Dashed line in test loss indicates irreducible error due to $\sigma=1$ \end{footnotesize} @@ -383,7 +383,7 @@ \vspace{-0.3cm} Mini-batch SGD with $\alpha \in \{0.01, 0.1\}$ for 100 epochs (5000 iterations): \begin{figure} - \includegraphics[width=1.0\textwidth]{slides/04-multivariate-first-order/figure_man/simu_mnist/SGD_compar.pdf} \\ + \includegraphics[width=1.0\textwidth]{figure_man/simu_mnist/SGD_compar.pdf} \\ %\begin{footnotesize} % Irreducible error due to additive noise is $\sigma=1$ %\end{footnotesize} @@ -404,7 +404,7 @@ \vspace{-0.2cm} Why is it not a good idea to use GD in most DL applications? SGD is much faster. Compare runtime of mini-batch SGD (batch size=$100$) with GD (constant $\alpha=0.01$ without momentum for $t_{\text{max}}=5000$ iterations): \begin{figure} - \includegraphics[width=1.0\textwidth]{slides/04-multivariate-first-order/figure_man/simu_mnist/SGD_GD_compar.pdf} \\ + \includegraphics[width=1.0\textwidth]{figure_man/simu_mnist/SGD_GD_compar.pdf} \\ %\begin{footnotesize} % Irreducible error due to additive noise is $\sigma=1$ %\end{footnotesize} diff --git a/slides/05-multivariate-second-order/slides-multivar-second-order-5-comparison.tex b/slides/05-multivariate-second-order/slides-multivar-second-order-5-comparison.tex index 59cdefe..68fc98d 100644 --- a/slides/05-multivariate-second-order/slides-multivar-second-order-5-comparison.tex +++ b/slides/05-multivariate-second-order/slides-multivar-second-order-5-comparison.tex @@ -89,8 +89,8 @@ \vspace{-0.3cm} Recall comparison of GD variants on log. reg. in last chapter: \begin{figure} - \includegraphics[width=0.8\textwidth]{slides/04-multivariate-first-order/figure_man/simu_linmod/GD_log_med_lr_iters.pdf} \\ - \includegraphics[width=0.8\textwidth]{slides/04-multivariate-first-order/figure_man/simu_linmod/GD_log_coef_med.pdf}\\ + \includegraphics[width=0.8\textwidth]{figure_man/simu_linmod/GD_log_med_lr_iters.pdf} \\ + \includegraphics[width=0.8\textwidth]{figure_man/simu_linmod/GD_log_coef_med.pdf}\\ \begin{footnotesize} Dotted lines indicate global minimizers. %Dashed line in test loss indicates irreducible error due to $\sigma=1$ @@ -105,8 +105,8 @@ \vspace{-0.4cm} Let's run GD vs. NR for \textbf{$1000$ steps} (independent features): \begin{figure} - \includegraphics[width=0.8\textwidth]{slides/05-multivariate-second-order/figure_man/simu-newton/NR_GD_log_indep_1000iters.pdf} \\ - \includegraphics[width=0.8\textwidth]{slides/05-multivariate-second-order/figure_man/simu-newton/NR_GD_log_coef_1000indep.pdf}\\ + \includegraphics[width=0.8\textwidth]{figure_man/simu-newton/NR_GD_log_indep_1000iters.pdf} \\ + \includegraphics[width=0.8\textwidth]{figure_man/simu-newton/NR_GD_log_coef_1000indep.pdf}\\ \begin{footnotesize} Dotted lines indicate global minimizers. %Dashed line in test loss indicates irreducible error due to $\sigma=1$ @@ -120,8 +120,8 @@ \vspace{-0.4cm} Let's run the same configuration only for \textbf{$50$ steps} to see clearer picture: \begin{figure} - \includegraphics[width=0.8\textwidth]{slides/05-multivariate-second-order/figure_man/simu-newton/NR_GD_log_indep_50iters.pdf} \\ - \includegraphics[width=0.8\textwidth]{slides/05-multivariate-second-order/figure_man/simu-newton/NR_GD_log_coef_50indep.pdf}\\ + \includegraphics[width=0.8\textwidth]{figure_man/simu-newton/NR_GD_log_indep_50iters.pdf} \\ + \includegraphics[width=0.8\textwidth]{figure_man/simu-newton/NR_GD_log_coef_50indep.pdf}\\ \begin{footnotesize} Dotted lines indicate global minimizers. %Dashed line in test loss indicates irreducible error due to $\sigma=1$ @@ -136,7 +136,7 @@ {\small Clearly, NR makes more progress than GD per iteration. OTOH Newton steps are much more expensive than GD updates\\ $\Rightarrow$ How do NR and GD compare wrt runtime instead of iterations (50 steps)?} \begin{figure} - \includegraphics[width=1.0\textwidth]{slides/05-multivariate-second-order/figure_man/simu-newton/NR_GD_runtime_comparison.pdf} \\ + \includegraphics[width=1.0\textwidth]{figure_man/simu-newton/NR_GD_runtime_comparison.pdf} \\ %\begin{footnotesize} % Irreducible error due to additive noise is $\sigma=1$ %\end{footnotesize} @@ -150,8 +150,8 @@ \vspace{-0.4cm} In case of correlated features the results are very similar: \begin{figure} - \includegraphics[width=0.8\textwidth]{slides/05-multivariate-second-order/figure_man/simu-newton/NR_GD_log_indep_50iters_corr.pdf} \\ - \includegraphics[width=0.8\textwidth]{slides/05-multivariate-second-order/figure_man/simu-newton/NR_GD_log_coef_50indep_corr.pdf}\\ + \includegraphics[width=0.8\textwidth]{figure_man/simu-newton/NR_GD_log_indep_50iters_corr.pdf} \\ + \includegraphics[width=0.8\textwidth]{figure_man/simu-newton/NR_GD_log_coef_50indep_corr.pdf}\\ \begin{footnotesize} Dotted lines indicate global minimizers. %Dashed line in test loss indicates irreducible error due to $\sigma=1$ @@ -166,7 +166,7 @@ \bigskip \begin{figure} - \includegraphics[width=1.0\textwidth]{slides/05-multivariate-second-order/figure_man/simu-newton/NR_GD_runtime_comparison_corr.pdf} \\ + \includegraphics[width=1.0\textwidth]{figure_man/simu-newton/NR_GD_runtime_comparison_corr.pdf} \\ %\begin{footnotesize} % Irreducible error due to additive noise is $\sigma=1$ %\end{footnotesize}