-
Notifications
You must be signed in to change notification settings - Fork 278
/
Copy pathlecture14.tex
217 lines (191 loc) · 6.29 KB
/
lecture14.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
\documentclass[aspectratio=169]{beamer}
\mode<presentation>
%\usetheme{Warsaw}
%\usetheme{Goettingen}
\usetheme{Hannover}
%\useoutertheme{default}
%\useoutertheme{infolines}
\useoutertheme{sidebar}
\usecolortheme{dolphin}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{enumerate}
%some bold math symbosl
\newcommand{\Cov}{\mathrm{Cov}}
\newcommand{\Cor}{\mathrm{Cor}}
\newcommand{\Var}{\mathrm{Var}}
\newcommand{\brho}{\boldsymbol{\rho}}
\newcommand{\bSigma}{\boldsymbol{\Sigma}}
\newcommand{\btheta}{\boldsymbol{\theta}}
\newcommand{\bbeta}{\boldsymbol{\beta}}
\newcommand{\bmu}{\boldsymbol{\mu}}
\newcommand{\bW}{\mathbf{W}}
\newcommand{\one}{\mathbf{1}}
\newcommand{\bH}{\mathbf{H}}
\newcommand{\by}{\mathbf{y}}
\newcommand{\bolde}{\mathbf{e}}
\newcommand{\bx}{\mathbf{x}}
\newcommand{\cpp}[1]{\texttt{#1}}
\title{Mathematical Biostatistics Boot Camp 2: Lecture }
\author{Brian Caffo}
\date{\today}
\institute[Department of Biostatistics]{
Department of Biostatistics \\
Johns Hopkins Bloomberg School of Public Health\\
Johns Hopkins University
}
\begin{document}
\frame{\titlepage}
%\section{Table of contents}
\frame{
\frametitle{Table of contents}
\tableofcontents
}
\section{Multiplicity}
\begin{frame}\frametitle{Multiplicity}
\begin{itemize}
\item After rejecting a $\chi^2$ omnibus
test you do all pairwise comparisons
\item You conducted a study with 20 outcomes and
30 different combinations of covariates. You
consider significance at all combinations.
\item You compare diseased tissue versus normal tissue
expression levels for 20$k$ genes
\item You compare rest versus active at 300$k$ voxels
in an fMRI study
\end{itemize}
\end{frame}
\begin{frame}\frametitle{Multiplicity}
\begin{itemize}
\item Performing two $\alpha$-level tests: \\
$H_0^1$ versus $H_a^1$ and $H_0^2$ versus $H_a^2$ \\
$E_1$ Reject $H_0^1$ and $E_2$ Reject $H_0^2$
\begin{eqnarray*}
FWE
& & P(\mbox{one or more false rej} ~|~ H_0^1, H_0^2) \\
& = & P(E_1 \cup E_2 ~|~ H_0^1, H_0^2) \\
& = & P(E_1 ~|~ H_0^1, H_0^2) + P(E_2 ~|~ H_0^1, H_0^2) \\
& - & P(E_1 \cap E_2 ~|~ H_0^1, H_0^2) \\
& \leq & P(E_1 ~|~ H_0^1, H_0^2) + P(E_2 ~|~ H_0^1, H_0^2) \\
& = & 2\times \alpha
\end{eqnarray*}
\end{itemize}
Result : The {\bf familywise error rate} for
$k$ hypotheses tested at level $\alpha$ is
bounded by $k\alpha$
\end{frame}
\begin{frame}\frametitle{Proof}
$E_i$ - false rejection for test $i$ \\
All probabilities are conditional on all of the nulls being true
\begin{eqnarray*}
FWE
& = & P(\mbox{one or more false rej}) \\
& = & P(\cup_{i=1}^k E_i) \\
& = & P\left\{E_1 \cup (\cup_{i=2}^k E_i)\right\} \\
& \leq & P(E_1) + P(\cup_{i=2}^k E_i)\\
& \vdots & \\
& \leq & P(E_1) + P(E_2) +\hdots+P(E_k)\\
& = & k\alpha
\end{eqnarray*}
\end{frame}
\begin{frame}\frametitle{Other direction}
\begin{itemize}
\item The $FWE$ is no larger than $k\alpha$ where $k$ is the number of tests
\item The $FWE$ is no smaller than $\alpha$
$$
P(\cup_{i=1}^k E_i) \geq P(E_1) = \alpha
$$
\item The lower bound is obtained when the $E_i$ are identical
$E_1 = E_2 = \ldots = E_k$
\item {\bf Bonferoni's} tests each individual hypothesis at level $\alpha^* = \alpha / k$
\begin{itemize}
\item The $FWE$ is no larger than $k \alpha^* = k \alpha / k = \alpha$
\item The $FWE$ is no smaller than $\alpha / k$
\end{itemize}
\end{itemize}
\end{frame}
\section{Bonferoni}
\begin{frame}\frametitle{Bonferoni's procedure}
If $\alpha^*$ is small and the tests are independent, then the
upper bound on the $FWE$ is nearly obtained
\begin{eqnarray*}
FWE & = & P(\mbox{one or more false rej}) \\
& = & 1 - P(\mbox{no false rej}) \\
& = & 1 - P(\cap_{i=1}^k \bar E_i) \\
& = & 1 - (1 - \alpha^*)^k \\
& \approx & 1 - (1 - k\alpha^*)\\
& = & k \alpha^* = \alpha
\end{eqnarray*}
\end{frame}
\begin{frame}\frametitle{Scratch work}
Recall the approximation for $\alpha^*$ near 0
$$
\frac{f(\alpha^*) - f(0)}{\alpha^* - 0} \approx f'(0)
$$
hence
$$
f(\alpha^*) \approx f(0) + \alpha^* f'(0)
$$ \\
In our case $f(\alpha^*) = (1 - \alpha^*)^k$ so $f(0) = 1$ \ \\ \ \\
$f'(\alpha^*) = -k(1 - \alpha^*)^{k-1}$ so $f'(0) = -k$
\ \\ \ \\
Therefore $(1 - \alpha^*)^k \approx 1 - k \alpha^*$
\end{frame}
\begin{frame}\frametitle{Notes}
\begin{itemize}
\item For Bonferoni's procedure $\alpha^* = \alpha / k$ so will be close to 0 for
a large number of tests
\item When there are lots of tests that are (close to) independent,
the upper bound on the $FWE$ used is appropriate
\item When the test are closely related, then the $FWE$ will be closer to the lower
bound, and Bonferoni's procedure is conservative
\item Is the familywise error rate always the most appropriate quantity to control for?
\end{itemize}
\end{frame}
\section{FDR}
\begin{frame}\frametitle{FDR}
\begin{itemize}
\item The {\bf false discovery rate} is the proportion of tests that are
falsely declared significant
\item Controlling the FDR is less conservative than controlling the FWE rate
\item Introduced by Benjamini and Hochberg
\end{itemize}
\end{frame}
\begin{frame}\frametitle{Benjamini and Hochberg procedure}
\begin{enumerate}
\item Order your $k$ p-values, say $p_1 < p_2 < \ldots <p_k$
\item Define $q_i = k p_i / i$
\item Define $F_i = min(q_i,\ldots,q_k)$
\item Reject for all $i$ so that $F_i$ is less than the desired FDR
\end{enumerate}
Note that the $F_i$ are increasing, so you only need to find the largest
one so that $F_i < FDR$
\end{frame}
\begin{frame}\frametitle{Example}
1st 10 of 50 SNPs (Rosner page 581)\\
\begin{center}
\ttfamily
\begin{tabular}{lllll}
Gene & $i$ & $p_i$ & $q_i=kp_i/i$ & $F_i$ \\ \hline
30 & 1 & $<$.0001 & .0035 & .0035 \\
20 & 2 & .011 & .28 & .16 \\
48 & 3 & .017 & .28 & .16 \\
50 & 4 & .017 & .22 & .16 \\
4 & 5 & .018 & .18 & .16 \\
40 & 6 & .019 & .16 & .16 \\
7 & 7 & .026 & .18 & .18 \\
14 & 8 & .034 & .21 & .21 \\
26 & 9 & .042 & .23 & .23 \\
47 & 10& .048 & .24 & .24 \\ \hline
\end{tabular}
\normalfont
\end{center}
\end{frame}
\begin{frame}\frametitle{Example}
\begin{itemize}
\item Bonferoni cutoff $.05 / 50 = .001$; only the first Gene is significant
\item For a FDR of $0-15\%$; only the first Gene would be declared significant
\item For a FDR of $16-20\%$, the first $7$ would be significant
\end{itemize}
\end{frame}
\end{document}