-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy path3-probability_mass_functions.Rmd
102 lines (75 loc) · 1.72 KB
/
3-probability_mass_functions.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
---
title: 'Chapter 3: Probability Mass Functions'
author: "Ghislain Nono Gueye, Ph.D."
date: "3/9/2019"
output: bookdown::pdf_document2
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
```{r load_packages}
library(dplyr)
library(stringr)
library(purrr)
library(tidyr)
library(ggplot2)
theme_set(theme_minimal())
```
# Probability mass functions (Pmfs)
```{r}
prob_mass_fun <- function(x){
df <- as.data.frame(prop.table(table(x)))
colnames(df) <- c("value", "probability")
df$value <- as.numeric(as.character(df$value))
df
}
prob_mass_fun <- function(x){
tibble(value = x) %>%
group
}
prob_mass_fun(c(1, 2, 2, 3, 5))
```
# The class size paradox
```{r custom_functions}
compute_avg_size <- function(x){
x %>%
as.character() %>%
str_split(pattern = "-") %>%
map(as.numeric) %>%
map_dbl(mean)
}
compute_prob <- function(x){
x / sum(x)
}
```
```{r}
dat <-
tibble(
size = c("5-9", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40-44", "45-49"),
count = c(8, 8, 14, 4, 6, 12, 8, 3, 2)
) %>%
mutate(
avg_size = compute_avg_size(size),
prob_size = count / sum(count),
prob_dean = avg_size * prob_size,
prob_student = avg_size * prob_dean / sum(prob_dean)
) %>%
select(size, avg_size, count, everything())
dat
summarize(dat, avg_class_size_dean = sum(prob_dean), avg_class_size_student = sum(prob_student))
```
```{r}
dat %>%
select(-size, -count, -prob_size) %>%
mutate_at(.vars = vars(matches("prob")), .funs = compute_prob) %>%
gather(key = "prob_type", value = "value", -avg_size) %>%
ggplot(aes(x = avg_size, y = value, col = prob_type)) +
geom_step() +
labs(x = "Class size", y = "Probability Mass Functon")
```
```{r}
# UnBiasPmf
```
# DataFrame indexing
```{r}
```