-
Notifications
You must be signed in to change notification settings - Fork 0
/
notes.lt
171 lines (138 loc) · 5.73 KB
/
notes.lt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
@Include {paper-format}
@Doc @Text @Begin
|1rt 1.3 @Scale {Stat 231 Notes}
@SB |1rt {Andrei Thorp}
@SB |1rt {Spring 2011}
@BB
@BB Population @Defn {
Target @Defn {theoretical best case population}
@SB Study @Defn {actual population used for study}
@SB Sample @Defn {subset of the study population, used for sampling}
@SB Unit @Defn {individual atom of the population}
}
@BB PPDAC @Defn {Problem, plan, data, analysis, conclusion}
@BB {PPDAC -- Data} @Defn {Can be discrete (individualized), continuous (like real
numbers), categorical (not numerically valuable, like names), binary (categorical
for two categories).}
@BB {Data Transforms} @Defn {
Monotone @Defn {after the data is transformed, the original monotonic order
is preserved.}
@SB {Monotone Decreasing} @Defn {like monotone, but order is reversed after
the transform}
@SB {Affine} @Defn {in linear form @M{y = Ax + B}}
@SB {Log} @Defn {transform of @M{x --> log(x)}, where log is ln}
}
@BB Variate @Defn {characteristics of units}
@BB Attributes @Defn {functions on units}
@BB {Problem Types} @Defn {
Discriptive @Defn {involves learning about some attribute}
@SB Causative @Defn {finding a causal link between variates. One variate is
`explanatory' and the other is `response'}
@SB Predictive @Defn {predict value of a response variate for a given unit}
}
@BB Mean @Defn {@M{alpha(cdot)}, the attribute defining the average of the
population}
@BB Error @Defn {through the study, error may occur:
@SB {study error} @Defn @M{alpha(P sub {study}) - alpha(P sub {target})}
@SB {sample error} @Defn @M{alpha(S) - alpha(P sub {study})}
}
@BB {PPDAC -- Plan} @Defn {
Experimental @Defn {You can manipulate a study's variables.}
@SB Observational @Defn {Studies which are observational only.}
}
@BB {Kernel Density Estimate} @Defn { a smoothing algorithm for datasets}
@BB Histogram @Defn {Continuous data is split into bins, and these bins are then
plotted as a bar graph.}
@BB {Symmetry"/"Skew} @Defn {Data plots may have a `tail' that goes left or right,
and this is the direction of the skew. If there is no skew, then the data is
symmetric}
@BB {Frequency Polygon} @Defn {Join the top centres of each histogram bar with
lines to get the polygon}
@BB {Cumulative Frequency Plot} @Defn {Each bin of a histogram includes all the
preceding bars}
@BB {Empirical Rule} @Defn {For symmetric, unimodal distributions, if you know
@M{mu} and @M{sigma}, then:
@BulletList
@LI {Approx @M{68%} of observations lie in @M{mu +- sigma}}
@LI {Approx @M{95%} of observations lie in @M{mu +- 2 sigma}}
@LI {Approx @M{99.73%} of observations lie in @M{mu +- 3 sigma}}
@EndList
}
@BB {Event Independence} @Defn @M{P(A "|" B) = P(A "|" not B) = P(A)}
@BB {Correlation} @Defn {@Math{r = {S sub XY} over sqrt {S sub XX S sub YY}}
where @SB @Math{S sub XX = sum from {i=1} to {n} (x sub i - x overbar) sup 2} and
@Math {S sub XY = sum from {i=1} to {n} ((x sub i - x overbar)(y sub i - y overbar))}
@SB The closer the correlation is to @M{+- 1}, the better the correlation.}
@BB {Time Series Plot} @Defn {A scatter plot that tracks against time: @M{(t, y(t)).}}
@BB grey @Colour @Box {Midterm 1 Notes} @Defn {@RawBulletList
@LI {Remember some probability stuff like union, conditional, independence}
@LI {Very little calculation}
@LI {Memorize some of the distributions so far}
@LI {Write"/"memorize distributons' likelihood formulae}
@EndList}
@BB {MLE of Poisson Distribution} @Defn {A random variable of the Poisson distribution
is @Math{P(Y=y)=f sub y (y) = {e sup {-lambda} lambda sup y} over y!}, for
M{y = 0, 1, 2, ...}
@SB So @Math{L(y sub 1, y sub 2, ..., y sub n; lambda) =
{e sup {-lambda} lambda sup {y sub 1}} over {y sub 1 !} cdot
{e sup {-lambda} lambda sup {y sub 2}} over {y sub 2 !} cdot ... cdot
{e sup {-lambda} lambda sup {y sub n}} over {y sub n !} =
{e sup {-n lambda} lambda sup {sum from {i=1} to {n} y sub i}} over
{prod from {i=1} to {n} y sub i !}
}
@SB Now, lets take the ln of everything to make it simpler.
@SB @Math{l(lambda) = -n lambda
+ln(lambda) sum from {i=1} to {n} y sub i - prod from {i=1} to {n} y sub i !}.
@SB Take the derivitive of this and maximize... @M{lambda hat = y overbar}
}
@BB {Quick Review of Partial Derivitives} @Defn {Consider the function @Math{f(x, y) =
3x sup 2 + 4xy sup 2 + 2y sup 2}. Find the partial derivatives with respect to:
@BulletList
@LI {x: @Math{{2f(x,y)} over 2x = 6x + 4y sup 2}}
@LI {y: @Math{{2f(x,y)} over 2y = 8xy + 4y}}
@EndList
Pretty much: just ignore the other variable, treat it as a constant.}
@BB {Simple Linear Regression MLEs} @Defn {
@Math{alpha hat = y overbar - beta hat x overbar}
@SB @Math{beta hat = {S sub {XY}} over {S sub XX}}
@SB @Math{sigma hat sup 2 = 1 over n sum from {i=1} to {n} (y sub i -
alpha hat - beta hat x sub i ) sup 2 = 1 over n (S sub {YY} - beta hat S sub XY )}
}
@BB {Practice Problem for MLEs"/"Regression} @Defn {Consider the following dataset:
@SB { @OneRow {{@Underline x
//0.5f 100
//0.5f 200
//0.5f 300
//0.5f 400
//0.5f 500
//0.5f 600
//0.5f 700
} |2f
{@Underline y
//0.5f 40
//0.5f 50
//0.5f 50
//0.5f 70
//0.5f 65
//0.5f 65
//0.5f 80
}} |3f
@OneRow {x = lbs of fertilizer per acre
@SB y = wheat yield (bushels per acre)
@SB Determine: |1f {
@M{y hat = alpha hat + beta hat x}
/0.5f @M{sigma hat sup 2}
/0.5f @M{p hat = r = {S sub XY} over {sqrt{S sub XX S sub YY}}}
/0.5f @M{r sup 2}
}
}
}
@SB Check using R -- make the two vectors, then use `summary(lm(x "~" y))'
}
@BB blue @Colour {Important Formulae} @Defn {
@Math{sum from {i=1} to {n} i = {n(n+1)} over 2}
@SB @Math{sum from {i=1} to {n} i sup 2 = {(n(n+1))} over 2 cdot {2n+1} over 3}
@SB {Event Conditional} @Defn @Math{P(A bar B) = {P(A intersection B)} over P(B)}
@SB {Event Independence} @Defn @M{P(A "|" B) = P(A "|" not B) = P(A)}
}
@End @Text