-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy path01-00-ACA-Introduction-2-MIR.tex
422 lines (405 loc) · 20 KB
/
01-00-ACA-Introduction-2-MIR.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
% move all configuration stuff into include file so we can focus on the content
\input{include}
\subtitle{module 1.0: introduction to MIR/ACA}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{document}
% generate title page
\input{include/titlepage}
\section[overview]{lecture overview}
\begin{frame}{introduction}{overview}
\begin{block}{corresponding textbook section}
%\href{http://ieeexplore.ieee.org/xpl/articleDetails.jsp?tp=&arnumber=6331118&}{Chapter 1~---~Introduction}: pp.~1--6
chapter~1
\end{block}
\vspace{5mm}
\begin{itemize}
\item \textbf{lecture content}
\begin{itemize}
\item audio content analysis
\item typical applications
%\item audio content
%\item processing steps in a typical ACA system
\end{itemize}
\bigskip
\item<2-> \textbf{learning objectives}
\begin{itemize}
\item list goals and applications in ACA
\item understand the general development of the field
\item differentiate various fields related to ACA
%\item discuss typical forms of content in an audio signal
%\item describe the typical signal flow in an ACA system
\end{itemize}
\end{itemize}
\inserticon{directions}
\end{frame}
\section[intro]{introduction}
\begin{frame}{introduction}{content in audio signals}
\begin{columns}
\column{.4\textwidth}
examples for audio signal content
\begin{itemize}
\item \textbf{speech}
\begin{itemize}
\item text information
\item speaker
\item recording environment
\item \dots
\end{itemize}
\item \textbf{music}
\begin{itemize}
\item melody
\item harmony
\item structure
\item instruments
\item mood
\item genre
\item \dots
\end{itemize}
\end{itemize}
\column{.6\textwidth}
\figwithmatlab{Waveform}
%\begin{figure}
%\includegraphics[scale=.4]{waveform}
%\end{figure}
\end{columns}
\end{frame}
\section[aca]{audio content analysis}
\begin{frame}{introduction}{audio content analysis --- goals}
\begin{block}{Audio Content Analysis}
The field of Audio Content Analysis (ACA) aims at designing and applying algorithms for the \textbf{automatic extraction of content information from the raw (digital) audio signal}.
This enables content-driven and content-adaptive services which describe, categorize, sort, retrieve, segment, process, and visualize the signal and its content.
\end{block}
\end{frame}
\begin{frame}{introduction}{audio content analysis --- research fields}
\vspace{-3mm}
\begin{itemize}
\item \textbf{speech} analysis
\begin{itemize}
\item speech recognition
\item speech emotion recognition
\item \ldots
\end{itemize}
\smallskip
\item<2-> \textbf{urban sound} analysis
\begin{itemize}
\item noise pollution monitoring
\item audio surveillance
\item \ldots
\end{itemize}
\smallskip
\item<3-> \textbf{industrial sound} analysis
\begin{itemize}
\item monitoring the state of mechanical devices (engines, etc.)
\item monitoring the health of livestock
\item \ldots
\end{itemize}
\smallskip
\item<4-> \only<5->{\textcolor{highlight}}{\textbf{musical audio} analysis}
\begin{itemize}
\item music transcription
\item music classification
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}{introduction}{musical audio vs.\ other audio}
\vspace{-3mm}
\textbf{music} \ldots
\begin{itemize}
\item is a \textbf{wide band} signal\\ unlike many other audio signals
\item<2-> comprises both \textbf{tonal and noise} components\\ like most audio signals
\item<3-> combines \textbf{multiple sound sources}\\ unlike speech, like urban sound
\item<4-> is a \textbf{poly-timbral} mixture\\ unlike industrial sound
\item<5-> sources are \textbf{harmonically related and synchronous}\\ unlike other multi-source signals
\item<6-> has a highly structured language that is \textbf{abstract}\\ unlike speech
\end{itemize}
\end{frame}
\begin{frame}{introduction}{audio content analysis --- related terms and areas}
\begin{itemize}
\item \textbf{terminology}
\bigskip
\begin{itemize}
\item \textit{Music Informatics}
\begin{itemize}
\item overarching term for nearly everything with music and computers
\end{itemize}
\smallskip
\item \textit{Music Information Retrieval (MIR)}:
\begin{itemize}
\item analysis and retrieval of music data
\item includes audio, symbolic, and other data
\item might also cover other tasks (source separation, generation)
\end{itemize}
\smallskip
\item \textit{Machine Listening} \& \textit{Computer Audition}
\begin{itemize}
\item focus on the recognition and understanding of music
\end{itemize}
\smallskip
\item \textit{Computational Auditory Scene Analysis (CASA)}
\begin{itemize}
\item focus on human perception \& cognition, understanding of the auditory scene
\end{itemize}
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}{introduction}{audio content analysis --- research field}
\vspace{-7mm}
\begin{columns}
\column{.8\textwidth}
\begin{itemize}
\item<1-> \textbf{interdisciplinary}
\begin{itemize}
\item digital signal processing
\item machine learning / data mining
\item musicology
\item music psychology
\item \ldots
\end{itemize}
\smallskip
\item<2-> ISMIR \textbf{community}
\begin{itemize}
\item annual conferences
\item conference papers \& Transactions
\item ISMIR-Community mailing list
\item MIREX: MIR Evaluation eXchange
\end{itemize}
\smallskip
\item<3-> \textbf{related publication outlets}
\begin{itemize}
\item \textit{conferences}: ISMIR, ICASSP, ICME, SMC, DAFx, ACM~MM, \ldots
\item \textit{journals}: TISMIR, TASLP, Computer Music, JNMR, JAES, \ldots
\end{itemize}
\end{itemize}
\column{.2\textwidth}
\vspace{25mm}
\begin{figure}
\includegraphics[height=.8cm,keepaspectratio]{graph/logo_ismir}
\end{figure}
\end{columns}
\addreference{\href{http://www.ismir.net}{www.ismir.net}}
\end{frame}
\begin{frame}{introduction}{audio content analysis --- history}
\begin{columns}
\column{.5\textwidth}
\begin{itemize}
\item<1-> \textbf{historic}
\begin{itemize}
\item mechanical devices
\end{itemize}
\smallskip
\item<2-> \textbf{expert systems}
\begin{itemize}
\item rule-based approaches
\end{itemize}
\smallskip
\item<3-> data-driven \textbf{traditional ML systems }
\begin{itemize}
\item feature design plus ML engine
\end{itemize}
\smallskip
\item<4-> \textbf{deep neural networks}
\begin{itemize}
\item role of expert knowledge diminishes
\end{itemize}
\end{itemize}
\column{.5\textwidth}
\begin{figure}
\includegraphics[width=\columnwidth]{graph/tonoscope}
\end{figure}
\end{columns}
\end{frame}
\section[apps]{applications}
\begin{frame}{introduction}{applications}
\begin{itemize}
\item \textbf{music browsing and music discovery}
\begin{itemize}
\item search \& retrieval, similarity, interfaces (e.g., QBH)
\end{itemize}
\smallskip
\item<2-> \textbf{music consumption}
\begin{itemize}
\item creative music listening
\end{itemize}
\smallskip
\item<3-> \textbf{music production}
\begin{itemize}
\item adaptive parametrization, enhancements of creative process
\end{itemize}
\smallskip
\item<4-> \textbf{music education}
\begin{itemize}
\item musically intelligent software tutoring
\end{itemize}
\smallskip
\item<5-> \textbf{generative music}
\begin{itemize}
\item interactive soundtracks (games, video)
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}{introduction}{(commercial) application examples}
\setbeamercovered{invisible} % uncover the graphics with the bullet points
\begin{itemize}
\item \textbf{recommendation}, playlist generation
\begin{columns}
\column{.25\textwidth}
\column{.25\textwidth}
\href{https://www.spotify.com}{\includegraphics[scale=.1]{graph/logo_spotify}}
\column{.25\textwidth}
\href{https://www.last.fm}{\includegraphics[scale=.05]{graph/logo_lastfm}}
\column{.25\textwidth}
\href{https://www.pandora.com}{\includegraphics[scale=.03]{graph/logo_pandora}}
\end{columns}
\bigskip
\item<1-> \textbf{fingerprinting}
\begin{columns}
\column{.25\textwidth}
\column{.25\textwidth}
\href{https://www.shazam.com}{\includegraphics[scale=.03]{graph/logo_shazam}}
\column{.25\textwidth}
\href{https://www.gracenote.com}{\includegraphics[scale=.05]{graph/logo_gracenote}}
\column{.25\textwidth}
\end{columns}
\bigskip
\item<1-> \textbf{score following}
\begin{columns}
\column{.25\textwidth}
\column{.25\textwidth}
\href{http://www.rockprodigy.com}{\includegraphics[scale=.15]{graph/logo_rockprodigy}}
\column{.25\textwidth}
\href{https://www.smartmusic.com}{\includegraphics[scale=.75]{graph/logo_smartmusic}}
\column{.25\textwidth}
\end{columns}
\bigskip
\item<1-> (multi-) \textbf{pitch detection}
\begin{columns}
\column{.25\textwidth}
\column{.25\textwidth}
\href{http://www.celemony.com}{\includegraphics[scale=.15]{graph/logo_melodyne}}
\column{.25\textwidth}
\href{http://www.zplane.de}{\includegraphics[scale=.125]{graph/logo_zplane}}
\column{.25\textwidth}
\end{columns}
\end{itemize}
\end{frame}
%\section[content]{audio content}
%\begin{frame}{audio content}{sources}
%\setbeamercovered{invisible}
%\question{what are the sources of (musical) audio content?}
%
%\begin{enumerate}
%\item<2-> \textbf{score}:
%\begin{itemize}
%\item definition of musical ideas
%\item ``blue-print'' of the music
%\item \textit{examples}: melody, key, harmony, rhythmic patterns, \ldots
%\end{itemize}
%\item<3-> \textbf{performance}:
%\begin{itemize}
%\item unique acoustic rendition
%\item information in the score is interpreted, modified, added to
%\item \textit{examples}: (micro-)tempo, dynamics, intonation, \ldots
%\end{itemize}
%\item<4-> \textbf{production}:
%\begin{itemize}
%\item aesthetic choices
%\item editing \& processing
%\item \textit{examples}: sound quality (EQ, microphone positioning), changes in timing and pitch
%\end{itemize}
%\end{enumerate}
%\end{frame}
%\begin{frame}\frametitle{audio content}\framesubtitle{technical categories}
%audio content can be structured into \textbf{5 technical basic categories:}
%
%\bigskip
%\begin{enumerate}
%\item<2-> \textbf{timbral}: related to sound quality
%\begin{itemize}
%\item \textit{examples}: instrument(ation), playing technique, venue, audio processing, \ldots
%\end{itemize}
%\smallskip
%\item<3-> \textbf{intensity-related}: related to musical dynamics
%\begin{itemize}
%\item \textit{examples}: accents, loudness, \ldots
%\end{itemize}
%\smallskip
%\item<4-> \textbf{tonal}: related to pitch
%\begin{itemize}
%\item \textit{examples}: melody, chords, intonation, vibrato, \ldots
%\end{itemize}
%\smallskip
%\item<5-> \textbf{temporal}: related to rhythm and tempo
%\begin{itemize}
%\item \textit{examples}: timing, meter, rhythmic patterns, \ldots
%\end{itemize}
%\smallskip
%\item<6-> \textbf{statistical \& technical}: related to signal properties
%\begin{itemize}
%\item \textit{examples}: amplitude distribution, number of zero crossings, \ldots
%\end{itemize}
%\end{enumerate}
%\end{frame}
%
%\section[ACA]{generic audio content analysis system}
%\begin{frame}\frametitle{audio content analysis}\framesubtitle{system overview}
%\begin{textblock*}{100mm}(1cm,2cm)
%\includegraphics[scale=.2]{waveform}
%\end{textblock*}
%\begin{figure}
%\centering
%\only<1>{\input{pict/introduction_ACASystem}}
%\only<2>{\input{pict/introduction_ACASystem_2}}
%\only<3->{\input{pict/introduction_ACASystem_3}}
%\end{figure}
%
%\begin{columns}
%\column{.5\textwidth}
%\begin{itemize}
%\item<2->[] \textbf{feature extraction}
%\begin{itemize}
%\item dimensionality reduction
%\item meaningful representation
%\end{itemize}
%\end{itemize}
%\column{.5\textwidth}
%\begin{itemize}
%\item<3->[] \textbf{classification}
%\begin{itemize}
%\item map or convert feature to comprehensible domain
%\end{itemize}
%\end{itemize}
%\end{columns}
%\end{frame}
\section{summary}
\begin{frame}{summary}{lecture content}
\begin{itemize}
\item \textbf{audio content analysis}
\begin{itemize}
\item aims at extracting data about the (musical) content from the music signal
\item interdisciplinary field
\end{itemize}
\bigskip
\item \textbf{related areas}
\begin{itemize}
\item music information retrieval, machine listening, ...
\end{itemize}
\bigskip
\item \textbf{applications} cover music \ldots
\begin{itemize}
\item discovery, consumption, production, education, generation
\end{itemize}
%\item \textbf{audio content}
%\begin{itemize}
%\item can relate to timbre, pitch, intensity, tempo and rhythm (but there is both lower level and higher level content)
%\item is shaped by the musical ideas (score), the music performance, and the (studio) production
%\end{itemize}
%\bigskip
%\item the \textbf{flow chart of an ACA system} at its most fundamental level shows
%\begin{itemize}
%\item a feature extraction step to extract meaningful descriptors
%\item a classification or inference step to produce a ``human'' result
%\end{itemize}
\end{itemize}
\inserticon{summary}
\end{frame}
\end{document}