04-01-ACA-Inference-Classification.tex

% move all configuration stuff into includes file so we can focus on the content
\input{include}


\subtitle{module 4.1: classification}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{document}
    % generate title page
	\input{include/titlepage}

    \section[overview]{lecture overview}
        \begin{frame}{introduction}{overview}
            \begin{block}{corresponding textbook section}
                    %\href{http://ieeexplore.ieee.org/xpl/articleDetails.jsp?arnumber=6331125}{Chapter 8: Musical Genre, Similarity, and Mood} (pp.~155)
                    section~4.1
            \end{block}

            \begin{itemize}
                \item   \textbf{lecture content}
                    \begin{itemize}
                        \item   intuitive intro to machine learning
                        \item   classifier examples
                    \end{itemize}
                \bigskip
                \item<2->   \textbf{learning objectives}
                    \begin{itemize}
                        \item   describe the basic principles of data-driven machine learning approaches
                        \item   implement a kNN classifier in Python
                    \end{itemize}
            \end{itemize}
            \inserticon{directions}
        \end{frame}

    \section[intro]{introduction}
        \begin{frame}{classification}{introduction}
            remember the flow chart of a general ACA system:
            \vspace{-3mm}
            \begin{figure}
                \input{pict/introduction_ACASystem_3}
            \end{figure}
            \begin{itemize}
                \item<2->   \color<3->{highlight}{\textit{classification}}: 
                    \begin{itemize}
                        \item   assign class labels to data
                    \end{itemize}
                \item<2->   \textit{regression}:
                    \begin{itemize}
                        \item	estimate numerical labels for data
                    \end{itemize}
                \item<2->   \textit{clustering}:
                    \begin{itemize}
                        \item	find grouping patterns in data
                    \end{itemize}
            \end{itemize}
        \end{frame}
        \begin{frame}{classification}{basic example}
              %\vspace{-3mm}
             \begin{columns}
                \column{.4\linewidth} 
                    hypothetical system:
                    \begin{itemize}
                        \item   one feature (envelope)
                        \item   predefined threshold
                            \begin{itemize}
                                \item   higher than threshold\\ $\Rightarrow$ class 1 (onset)
                                \item   lower than threshold\\ $\Rightarrow$ class 0 (no onset)
                            \end{itemize}
                    \end{itemize}
                \column{.6\linewidth} 
                \vspace{-10mm}
                            \figwithmatlab{ThresholdClassification}
            \end{columns}
        \end{frame}
        
        \begin{frame}{classification}{data-driven}
             \vspace{-3mm}
             \begin{columns}
                \column{.4\linewidth} 
                   \begin{itemize}
                        \item   derive classification parameters from data, e.g.,
                        \smallskip
                        \item[$\Rightarrow$]  learn common feature distributions per class
                        \item[$\Rightarrow$]  learn separation metrics per class
                    \end{itemize}
                \column{.6\linewidth} 
                %\vspace{-10mm}
                    \figwithmatlab{FeatureSpace}
            \end{columns}
        \end{frame}
        
        %\begin{frame}{classification}{general steps}
            %\begin{enumerate}
                %\item	\textbf{define training set}: annotated results
                %\smallskip
                %\item<2->	\textbf{normalize} training set
                %\smallskip
                %\item<3->	\textbf{train} classifier
                %\smallskip
                %\item<4->	\textbf{evaluate} classifier with test (or validation) set
                %\smallskip
                %\item<5->	(\textbf{adjust} classifier settings, return to 4.)
            %\end{enumerate}
        %\end{frame}
        %
    %\section{classification}
        %\begin{frame}{classification}{rules of thumb}
            %\vspace{-3mm}
            %\begin{itemize}
                %\item   \textbf{training set}
                    %\begin{itemize}
                        %\item	training set size vs.\ number of features
                            %\begin{itemize}
                                %\item	training set too small,% $\Rightarrow$ \textit{overfitting}
                                %feature number too large $\Rightarrow$ \textit{overfitting}
                            %\end{itemize}
                        %\item<1->	training set \textbf{too noisy} $\Rightarrow$ \textit{underfitting}
                        %\item<1->	training set \textbf{not representative} $\Rightarrow$ \textit{bad classification performance}
                    %\end{itemize}
                %\smallskip
                %\item<2->   \textbf{classifier}
                    %\begin{itemize}
                        %\item<2->   classifier too complex $\Rightarrow$ \textit{overfitting}
                        %\item<2->	\textbf{poor classifier} $\Rightarrow$ \textit{bad classification performance}
                            %\begin{itemize}
                                %\item[$\rightarrow$]	different classifier
                            %\end{itemize}
                    %\end{itemize}
                %\smallskip
                %\item<3->   \textbf{features}
                    %\begin{itemize}
                        %\item<3->	\textbf{poor features} $\Rightarrow$ \textit{bad classification performance}
                            %\begin{itemize}
                                %\item[$\rightarrow$]	new, better features
                            %\end{itemize}
                        %\item<3->	features \textbf{not normalized} $\Rightarrow$ possibly \textit{bad classification performance}
                            %\begin{itemize}
                                %\item	feature distribution (range, mean, symmetry)
                            %\end{itemize}
                    %\end{itemize}
            %\end{itemize}
        %\end{frame}
        %\begin{frame}{classification}{evaluation}
            %\begin{itemize}
                %\item	define \textbf{test set} for evaluation
                    %\begin{itemize}
                        %\item	test set \textit{different} from training set
                        %\item	otherwise, same requirements
                    %\end{itemize}
                %
                %\bigskip
                %\item<2->	example: \textbf{$N$-fold cross validation}
                    %\begin{enumerate}
                        %\item<2->	split training set into $N$ parts (randomly, but preferably identical number per class)
                        %\item<3->	select one part as test set
                        %\item<4->	train the classifier with all observations from remaining $N-1$ parts
                        %\item<5->	compute the classification rate for the test set
                        %\item<6->	repeat until all $N$ parts have been tested
                        %\item<7->	overall result: \textit{average} classification rate
                    %\end{enumerate}
            %\end{itemize}
        %\end{frame}
        %
     \section[examples]{classifier examples}
        \begin{frame}{classifier examples}{k-Nearest Neighbor (kNN)}
            \vspace{-5mm}
            \begin{columns}
                \column{.6\linewidth} 
                    \begin{itemize}
                        \item	\textbf{training}: extract reference vectors from training set 
                            \begin{itemize}
                                \item store coordinates and class labels
                            \end{itemize}
                        \bigskip
                        \item<2->	\textbf{classification}: extract query vector and set class to majority of $k$ nearest reference vectors
                            \begin{enumerate}
                                \item   compute distance between query and all training vectors
                                \item   sort distances to find closest vectors
                                \item   choose majority class out of the $k$ closest vectors
                            \end{enumerate}
                        \bigskip
                        \item<3->	\textbf{classifier model}: all training vectors
                    \end{itemize}
                \column{.4\linewidth} 
                    %\only<1,6>{\begin{figure}\includegraphics[width=\columnwidth]{Knn-0}\end{figure}}
                    %\setcounter{i}{1}
                    %\setcounter{j}{2}
                    %\whiledo{\value{i}<5}
                    %{
                        %\only<\value{j}>{\begin{figure}\includegraphics[width=\columnwidth]{Knn-\arabic{i}}\end{figure}}
                        %\stepcounter{i}
                        %\stepcounter{j}
                    %}
                    \figwithmatlab{Knn}
                    \only<4>{$k = 3 \Rightarrow$ blue majority}
                    \only<5>{$k = 5 \Rightarrow$ black majority}
                    \only<6>{$k = 7 \Rightarrow$ black majority}
            \end{columns}
        \end{frame}
        
        \begin{frame}{classifier examples}{Gaussian Mixture Model (GMM)}
            \vspace{-5mm}
             \begin{columns}
                \column{.4\linewidth} 
                   \begin{itemize}
                        \item	\textbf{training}:\\ model each class distribution as superposition of Gaussian distributions
                        \bigskip
                        \item<2->	\textbf{classification}:\\ compute output of each Gaussian and select class with highest probability
                        \bigskip
                        \item<3->	\textbf{classifier data}:\\ per class per Gaussian: $\mu$ and covariance, mixture weight
                    \end{itemize}
                \column{.6\linewidth} 
                \vspace{-11mm}
                    \figwithmatlab{Gmm}
            \end{columns}
        \end{frame}
        
        \begin{frame}{classifier examples}{Support Vector Machine (SVM)}
            \begin{itemize}
                \item	\textbf{training}:
                    \begin{itemize}
                        \item   map features to high dimensional space
                            \figwithref{graph/SVM}{\href{https://en.wikipedia.org/wiki/Support\_vector\_machine}{https://en.wikipedia.org/wiki/Support\_vector\_machine}}
                        \item   find separating hyperplane through maximum distance of support vectors (data points)
                    \end{itemize}
                \item<2->	\textbf{classification}: apply feature transform and proceed with 'linear' classification
                \item<3->	\textbf{classifier data}: support vectors, kernel, kernel parameters
            \end{itemize}
        \end{frame}
    
    \section{summary}
        \begin{frame}{summary}{lecture content}
            \begin{itemize}
                \item   \textbf{data-driven approach}
                    \begin{itemize}
                        \item   'general' system learns parameters/behavior from data
                        \item   human interaction through
                            \begin{itemize}
                                \item   parametrization and procedures
                                \item   data selection
                            \end{itemize}
                    \end{itemize}
                \bigskip
                \item   \textbf{many classifiers with different levels of complexity}
                    \begin{enumerate}
                        \item   kNN
                        \item   GMM
                        \item   SVM
                        \item   RandomForest
                        \item   DNN
                        \item   \ldots
                    \end{enumerate}
                %\bigskip
                %\item   \textbf{fine balance of inputs}
                    %\begin{enumerate}
                        %\item   number of features
                        %\item   classifier complexity
                        %\item   amount and variability of data
                    %\end{enumerate}
            \end{itemize}
            \inserticon{summary}
        \end{frame}
\end{document}