-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathproject2.jl
3828 lines (3148 loc) · 132 KB
/
project2.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
### A Pluto.jl notebook ###
# v0.20.4
using Markdown
using InteractiveUtils
# This Pluto notebook uses @bind for interactivity. When running this notebook outside of Pluto, the following 'mock version' of @bind gives bound variables a default value (instead of an error).
macro bind(def, element)
#! format: off
quote
local iv = try Base.loaded_modules[Base.PkgId(Base.UUID("6e696c72-6542-2067-7265-42206c756150"), "AbstractPlutoDingetjes")].Bonds.initial_value catch; b -> missing; end
local el = $(esc(element))
global $(esc(def)) = Core.applicable(Base.get, el) ? Base.get(el) : iv(el)
el
end
#! format: on
end
# ╔═╡ 14964632-98d8-4a2f-b2f6-e3f28b558803
# ╠═╡ show_logs = false
using StanfordAA228V
# ╔═╡ 173388ab-207a-42a6-b364-b2c1cb335f6b
begin
import MarkdownLiteral: @mdx
using ProgressLogging
using Downloads
using TOML
using Test
using Base64
using PlutoUI
using Distributions
using Random
using Plots
using ReverseDiff
using Optim
using Parameters
using BSON
using GridInterpolations
using LinearAlgebra
default(fontfamily="Computer Modern", framestyle=:box) # LaTeX-style plotting
md"> _Additional package management._"
end
# ╔═╡ 117d0059-ce1a-497e-8667-a0c2ef20c632
md"""
# Project 2: Estimating failure probability
_Please wait until the entire notebook is finished loading before proceeding (you may get temporary errors)._
"""
# ╔═╡ d7643abe-4619-4859-b2e3-9e932fe53b2f
highlight(md"""_See the three **"⟶ Task"** sections below for where to fill out the algorithms._""")
# ╔═╡ 78181077-5548-459d-970d-1d8a9d63b72c
# ╔═╡ da5b4000-0bce-4fc2-be85-dada21264ca3
textbook_details([
"Chapter 6. _Failure Distribution_",
"Chapter 7. _Failure Probability Estimation_"])
# ╔═╡ 0456a732-2672-4108-a241-db9ae879a913
# ╔═╡ 6e8ab7c9-fb49-4d89-946d-c7d7588c199a
md"""
## Julia/Pluto tips
Useful tips you may be interested in regarding Julia and Pluto.
"""
# ╔═╡ fe044059-9102-4e7f-9888-d9f03eec69ff
html_expand("Expand for general Julia/Pluto tips.", [
html"<h2hide>Julia packages</h2hide>",
md"""
Feel free to use as many external Julia packages as you like. Running `using PackageName` will automatically install it in this notebook.
[List of Julia packages.](https://juliapackages.com/)""",
html"<h2hide>Dependent cells</h2hide>",
md"""
Unlike Jupyter notebooks, Pluto has _dependent cells_. Meaning, if one cell uses the variable `x` and another cell defines `x`, then the first cell will _re-run_ when `x` is changed. This means there is no "hidden global state".
See the [Pluto README](https://github.com/fonsp/Pluto.jl?tab=readme-ov-file) for details/examples.""",
html"<h2hide>New cells</h2hide>",
md"""
You can create as many new cells anywhere as you like. Click the `+` icon on the right or hit `CTRL+Enter` within an existing cell to create a new one below it.
- **Important**: Please do not modify/delete any existing cells.""",
html"<h2hide>Running code</h2hide>",
md"""
After editing a cell, you can run it several ways:
1. To run the current cell: `<SHIFT+ENTER>`
1. To run the current cell and create a new one below: `<CTRL+ENTER>` or `<CMD+ENTER>`
1. To run all cells with unsaved changes: `<CTRL+S>` or `<CMD+S>`
""",
html"<h2hide>Multiple lines of code in a cell</h2hide>",
md"""
To put multple lines of code in a single cell in Pluto, wrap with `begin` and `end`:
```julia
begin
x = 10
y = x^2
end
```""",
html"<h2hide>Locally scoped cells</h2hide>",
md"""
Use Julia's `let` block to create locally scoped variables:
```julia
ℓ = let d = get_depth(sys)
p = NominalTrajectoryDistribution(sys, d)
τ = rollout(sys, d)
logpdf(p, τ)
end
```
The last line of code in the `let` block will be returned and assigned to the globally scoped `ℓ` variable in this case.
This way, you can reuse variable names such as `τ` without affecting other cells that may also use that name in global scope.
You could also just define a new function:
```julia
function my_test(sys)
d = get_depth(sys)
p = NominalTrajectoryDistribution(sys, d)
τ = rollout(sys, d)
return logpdf(p, τ)
end
ℓ = my_test(sys)
```
""",
html"<h2hide>Suppress cell output</h2hide>",
md"""
To suppress the Pluto output of a cell, add a semicolon `;` at the end.
```julia
x = 10;
```
or
```julia
begin
x = 10
y = x^2
end;
```
""",
html"<h2hide>Underscore as digit separator</h2hide>",
md"""
You can use the underscore `_` as a convenient digit separator:
```julia
1000000 == 1_000_000 # true
100000 == 100_000 # true
10000 == 10_000 # true
1000 == 1_000 # true
```
[Link to Julia docs](https://docs.julialang.org/en/v1/manual/integers-and-floating-point-numbers/#:~:text=The%20underscore).
""",
html"<h2hide>Unicode symbols</h2hide>",
md"""
You can use Unicode—and even emojis 🙃—as variable and function names. Here are some common ones we use throughout this course:
| Unicode | Code |
|:-------:|:----:|
| `τ` | `\tab` |
| `ψ` | `\psi` |
| `ℓ` | `\ell` |
| `π` | `\pi` |
| `σ` | `\sigma` |
| `Σ` | `\Sigma` |
| `θ` | `\theta` |
| `ω` | `\omega` |
| `²` | `\^2` |
| `₂` | `\_2` |
| `🍕` | `\:pizza:` |
To enter them into cells, type the above "**Code**" and hit `<TAB><TAB>` (or `<TAB><ENTER>`). Feel free to use any Unicode/emojis to your hearts desire.
See the Julia docs for more examples: [https://docs.julialang.org/en/v1/manual/unicode-input/](https://docs.julialang.org/en/v1/manual/unicode-input/)
"""
])
# ╔═╡ 41406be3-4c2d-41db-bdb4-9e2ff875cebe
highlight(md"Here's the link to the notebook on [Julia and Pluto tips](https://sisl.github.io/AA228VLectureNotebooks/media/html/julia_pluto_session.html).")
# ╔═╡ a21612a1-1092-4892-9132-629833e7c867
# ╔═╡ ec776b30-6a30-4643-a22c-e071a365d50b
md"""
## Hints
Expand the sections below for some helpful hints.
"""
# ╔═╡ 18754cc6-c089-4245-ad10-2848594e49b4
html_expand("Expand for useful interface functions.", [
html"<h2hide>Useful interface functions</h2hide>",
md"""
The following functions are provided by `StanfordAA228V.jl` that you may use.
""",
html"<h3hide><code>NominalTrajectoryDistribution</code></h3hide>",
md"""
**`NominalTrajectoryDistribution(sys::System, d::Int)::TrajectoryDistribution`** — Returns the nominal trajectory distribution for the system `sys` over depth `d`.
- Use this to evaluate the nominal likelihood of the trajectory.
""",
html"<h3hide><code>rollout</code></h3hide>",
md"""
**`rollout(sys::System; d)::Array`** — Run a single rollout of the system `sys` to a depth of `d`.
- `τ` is written as `\tau<TAB>` in code.
```julia
function rollout(sys::System; d=1)
s = rand(Ps(sys.env))
τ = []
for t in 1:d
o, a, s′ = step(sys, s) # For each rollout call, step is called d times.
push!(τ, (; s, o, a))
s = s′
end
return τ
end
```
""",
html"<h3hide><code>isfailure</code></h3hide>",
md"""
**`isfailure(ψ, τ)::Bool`** — Using the specification `ψ`, check if the trajector `τ` led to a failure.
- `ψ` is written as `\psi<TAB>` in code.
"""])
# ╔═╡ c4fa9af9-1a79-43d7-9e8d-2854652a4ea2
html_expand("Stuck? Expand for hints on what to try.", md"""
$(hint(md"Try importance sampling with your fuzzing distributions! See _Algorithm 7.3_ in the textbook.
_Other techniques_: Bayesian estimation with a good prior, multiple importance sampling, the cross-entropy estimation method, etc. (or something entirely different!?)"))""")
# ╔═╡ 6bad6e8b-c021-41d2-afbb-bcd0242138dd
# ╔═╡ dba42df0-3199-4c31-a735-b6b514703d50
md"""
## Common errors
These are some common errors you may run into.
"""
# ╔═╡ 109c3d27-2c23-48a7-9fd7-be8a1f359e55
html_expand("Expand if you're using <code>Normal</code> and/or <code>MvNormal</code>.", md"""
The univariate normal (Gaussian) distribution in Julia takes in a mean $\mu$ and standard deviation $\sigma$:
```julia
Normal(μ, σ)
```
Where the **multivariate** normal distribution takes in a mean vector $\mathbf{\mu}$ and the _covariance_ matrix $\mathbf{\Sigma}$:
```julia
MvNormal(𝛍, 𝚺)
```
Meaning, if you want a 2d diagonal multivariate normal with mean zero and standard deviation of $\sigma = 0.1$, then you can do:
```julia
MvNormal(zeros(2), 0.1^2*I)
```
where "`I`" comes from the `LinearAlgebra` module (already loaded for you).
""")
# ╔═╡ bc2f62f5-1330-46cd-bb81-411baa483488
html_expand("Expand if you're using <code>initial_state_distribution</code>, <code>disturbance_distribution</code>, or <code>depth</code>.", md"""
The `StanfordAA228V` module defines several functions that you **might be adding new methods to (i.e., new type signatures)**.
- `initial_state_distribution`
- `disturbance_distribution`
- `depth`
Say you're implementing fuzzing and you define a new `TrajectoryDistribution` type and want to create your own version of `disturbance_distribution` for your new type:
```julia
struct NewTrajectoryDistribution <: TrajectoryDistribution
some_parameter
end
```
Then you will need to use the Julia dot notation to add a method to `StanfordAA228V`:
```julia
function StanfordAA228V.disturbance_distribution(p::NewTrajectoryDistribution)
# some code
end
```
This code will add a new method for `disturbance_distribution` with the input type of `NewTrajectoryDistribution`. Make sure to add the `StanfordAA228V.` to the function names that you create which are _also_ defined in `StanfordAA228V`:
- See the [`StanfordAA228V.jl`](https://github.com/sisl/StanfordAA228V.jl/blob/d2357ba8cdaf680b207a261495d785456981c66d/src/StanfordAA228V.jl#L39-L41) file.
This is common in Julia where you need to use the funciton name qualified with the module name. Read more in the ["Namespace Management" section of the Julia docs.](https://docs.julialang.org/en/v1/manual/modules/#namespace-management)
""")
# ╔═╡ bed9a6ed-3ca0-492d-9141-a34b601ea315
highlight(md"See the FAQs page for continuously updated tips: [https://github.com/sisl/AA228V-FAQs](https://github.com/sisl/AA228V-FAQs)")
# ╔═╡ a46702a3-4a8c-4749-bd00-52f8cce5b8ee
html_half_space()
# ╔═╡ 84d9c552-2e78-4922-9ec0-4d12d0c62643
html_expand("Expand for <code>SmallSystem</code> code.", md"""
```julia
## Agent
struct NoAgent <: Agent end
(c::NoAgent)(s, a=missing) = nothing
Distributions.pdf(c::NoAgent, s, x) = 1.0
## Environment
struct SimpleGaussian <: Environment end
(env::SimpleGaussian)(s, a, xs=missing) = s
Ps(env::SimpleGaussian) = Normal(0, 1) # Initial state distribution
## Sensor
struct IdealSensor <: Sensor end
(sensor::IdealSensor)(s) = s
(sensor::IdealSensor)(s, x) = sensor(s)
Distributions.pdf(sensor::IdealSensor, s, xₛ) = 1.0
```
""")
# ╔═╡ e6e675bf-5181-4e45-8c5d-e576aa411064
html_expand("Stuck? <span style='color: crimson'>Expand for hints</span>.", hint(md"""If you're trying fuzzing as a proposal distribution, the `disturbance_distribution` for the `SmallSystem` does not apply:
```julia
D = DisturbanceDistribution((o)->Deterministic(),
(s,a)->Deterministic(),
(s)->Deterministic())
```
where
```julia
struct DisturbanceDistribution
Da # agent disturbance distribution
Ds # environment disturbance distribution
Do # sensor disturbance distribution
end
```
but the `initial_state_distribution` should be changed:
```julia
function StanfordAA228V.initial_state_distribution(p::YourFuzzingDistribution)
return Normal(SOME_MEAN, SOME_STD)
end
```
See _Example 4.3_ in the textbook for how this is applied to the pendulum.
"""))
# ╔═╡ 17fa8557-9656-4347-9d44-213fd3b635a6
Markdown.parse("""
## Small system
The system is comprised of an `agent`, environment (`env`), and `sensor`.
""")
# ╔═╡ 22feee3d-4627-4358-9937-3c780b7e8bcb
sys_small = System(NoAgent(), SimpleGaussian(), IdealSensor());
# ╔═╡ fd8c851a-3a42-41c5-b0fd-a12085543c9b
Markdown.MD(
md"""
# 1️⃣ **Small**: 1D Gaussian
The small system is a simple 1D Gaussian system.
- There are no dynamics (rollout depth $d=1$).
- There are no disturbances.
- The (initial and only) state $s$ is sampled from $\mathcal{N}(0,1)$.
""",
depth_highlight(sys_small),
md"_(Same small system as Project 1)_"
)
# ╔═╡ 6f3e24de-094c-49dc-b892-6721b3cc54ed
SmallSystem::Type = typeof(sys_small) # Type used for multiple dispatch
# ╔═╡ 45f7c3a5-5763-43db-aba8-41ef8db39a53
md"""
## Small environment
The environment is a standard normal (Gaussian) distribution $\mathcal{N}(0, 1)$.
"""
# ╔═╡ 9c1daa96-76b2-4a6f-8d0e-f95d26168d2b
ps_small = Ps(sys_small.env)
# ╔═╡ ab4c6807-5b4e-4688-b794-159e26a1599b
ψ_small = LTLSpecification(@formula □(s->s > -2));
# ╔═╡ 370a15eb-df4b-493a-af77-00914b4616ea
Markdown.parse("""
## Small specification \$\\psi\$
The specification \$\\psi\$ (written `\\psi<TAB>` in code) indicates what the system should do:
\$\$\\psi(\\tau) = \\square(s > $(ψ_small.formula.ϕ.c))\$\$
i.e., "the state \$s\$ in the trajectory \$\\tau\$ should _always_ (\$\\square\$) be greater than \$$(ψ_small.formula.ϕ.c)\$, anything else is a failure."
""")
# ╔═╡ 166bd412-d433-4dc9-b874-7359108c0a8b
Markdown.parse("""
A failure is highly unlikely given that the probability of failure is:
\$\$\\begin{align}
P(\\neg\\psi(\\tau)) &= 1 - P(\\psi(\\tau)) \\\\
&= 1 - P(s > $(ψ_small.formula.ϕ.c)) \\\\
&= P(s < $(ψ_small.formula.ϕ.c)) \\approx $(round(cdf(ps_small, ψ_small.formula.ϕ.c), sigdigits=4))
\\end{align}\$\$
where \$\\neg\\psi(\\tau)\$ indicates that the specification was violated and \$P\$ is the _cumulative distribution function_ (`cdf` in Julia).
""")
# ╔═╡ cf42542a-f519-478d-a57e-652c420f4ed5
# ╔═╡ b6573f2b-52e5-4881-91e7-759d628bf7fe
md"""
## Probability vs. likelihood
In _Project 1_, you found trajectories that had high **_likelihoods_**, i.e., $p_\theta(\tau)$ where $\theta$ are the parameters of the system (e.g., the mean and std of the simple Gaussian system).
The likelihood measures the **_probability density_** of the "data" (in our case the trajectory $\tau$), given the set of parameters $\theta$ (using `pdf`). Likelihood values are non-negative and must integrate to one:
$$\begin{equation}
\int_\tau p_\theta(\tau) \mathrm{d}\tau = 1
\end{equation}$$
where likelihoods may be much larger than one (e.g., as in the inverted pendulum problem).
"""
# ╔═╡ 9d051b1b-dcf4-418c-988c-37561a10f485
md" $γ=$ $(@bind γ Slider(-4:0.1:4, show_value=true, default=-1))"
# ╔═╡ a67ea28d-3927-40f9-a049-b9faeb0cfa58
ψ_cdf = LTLSpecification(@eval @formula □(s->s > $γ));
# ╔═╡ bb4b252d-fc49-49d9-a31b-5092d73dc244
Markdown.parse("""
The specification \$\\psi(\\tau) = \\square(s > γ)\$ for the chosen \$γ\$ is \$\\psi(\\tau) = \\square(s > $(ψ_cdf.formula.ϕ.c))\$ where:
""")
# ╔═╡ 9ea24b88-03b3-434f-a703-af197f754dcd
Markdown.parse("""
\$\$\\begin{align}
p(s) &\\approx $(round(pdf(ps_small, ψ_cdf.formula.ϕ.c), sigdigits=4)) \\tag{probability density} \\\\
P(s < γ) &\\approx $(round(cdf(ps_small, ψ_cdf.formula.ϕ.c), sigdigits=4)) \\tag{cumulative probability}
\\end{align}\$\$
""")
# ╔═╡ 6ea3ba83-ade5-4a19-ad60-b3fe5c56e3b8
md"""
### Probability of an event
In _this_ project, we are now interested in measuring the **_probability_** of an event, where the probability is a value between **zero** and **one**.
If you have Monte Carlo samples of trajectories from your system, then the naive way to estimate the probability of an event (a failure in our case) would be:
$$\begin{equation}
\hat{P}_\text{event} = \frac{\text{number of times that event occurred}}{\text{total number of samples}}
\end{equation}$$
But Monte Carlo estimation can be _super_ inefficient. Therefore, this project is designed for you to explore more sophisticated algorithms to efficiently estimate the probabilty of an event.
_(Note for simple Gaussian problems, we can just use the `cdf` to evaluate this probability exactly, but we randomize the threshold so you cannot access it directly—just to make things more interesting)_
"""
# ╔═╡ cd2adeb4-493f-4d37-8e0d-9501637c6000
# ╔═╡ 42456abf-4930-4b01-afd1-fce3b4881e28
Markdown.MD(
HTML("<h2 id='baseline'>Baseline: Monte Carlo estimate</h2>"),
md"""
The Monte Carlo baseline algorithm will sample $m$ trajectories of depth $d$ from the nominal trajectory distribution and count how many trajectories were a failure.
The frequentist estimate of the failure probability is then computed simply as:
$$\begin{equation}
\hat{P}_\text{fail} = \frac{\text{number of failures}}{\text{total number of trajectories}}
\end{equation} = \mathbb{E}_i \Big[ \mathbb{1}\big(\neg\psi(\tau_i)\big) \Big]$$
where $\neg\psi(\tau_i)$ checks if trajectory $\tau_i$ is a failure, `isfailure(τᵢ)`, and $\mathbb{1}$ is the indicator function (but conveniently, Julia treats `true` as `1` and `false` as `0`).
_This is equivalent to `DirectEstimation` (algorithm 7.1)._
""")
# ╔═╡ cc11217f-e070-4d20-8ebe-18e7eb977487
highlight(md"""**Note**: You can access the number of `step` calls via `stepcount()`""")
# ╔═╡ 92f20cc7-8bc0-4aea-8c70-b0f759748fbf
Markdown.parse("""
## ⟶ **Task (Small)**: Estimate failure probability
Please fill in the following `estimate_probability` function.
""")
# ╔═╡ a003beb6-6235-455c-943a-e381acd00c0e
start_code()
# ╔═╡ c494bb97-14ef-408c-9de1-ecabe221eea6
end_code()
# ╔═╡ e2418154-4471-406f-b900-97905f5d2f59
html_quarter_space()
# ╔═╡ 1789c8b5-b314-4aba-ad44-555be9a85984
md"""
# 📊 Small Tests
We'll automatically test your `estimate_probability(::SmallSystem, ψ)` function below.
**Note**: The next three tests are _only_ local validation tests.
_The **graded** tests to be submitted to Gradescope are located [below](#graded-test)._
"""
# ╔═╡ 535261e3-4cb3-4b0b-954d-7452b2a91b5d
begin
ψ_small_different = LTLSpecification(@formula □(s->s < 2))
md"""
## Different failure threshold
Let's test a different failure threshold.
"""
end
# ╔═╡ 02a4098f-a1ee-433c-aea7-8e8fc8a65088
highlight(md"**Note**: You might fail on some of these specifications. Don't worry, as long as your _average_ estimate over different $\psi$ values is better than random, then the **_graded_** test should pass.")
# ╔═╡ ce99f0cc-5fe8-42c2-af78-ac7211b6b699
@bind rerun_rand_small Button("Click to rerun random test.")
# ╔═╡ fda151a1-5069-44a8-baa1-d7903bc89797
html_space()
# ╔═╡ e61657ef-961b-42af-89d7-e242d477ba1f
html_expand("Expand for <code>MediumSystem</code> code.", md"""
```julia
## Agent
struct ProportionalController <: Agent
k
end
(c::ProportionalController)(s, a=missing) = c.k' * s
## Environment
@with_kw struct InvertedPendulum <: Environment
m::Float64 = 1.0
l::Float64 = 1.0
g::Float64 = 10.0
dt::Float64 = 0.05
ω_max::Float64 = 8.0
a_max::Float64 = 2.0
end
function (env::InvertedPendulum)(s, a, xs=missing)
θ, ω = s[1], s[2]
dt, g, m, l = env.dt, env.g, env.m, env.l
a = clamp(a, -env.a_max, env.a_max)
ω = ω + (3g / (2 * l) * sin(θ) + 3 * a / (m * l^2)) * dt
θ = θ + ω * dt
ω = clamp(ω, -env.ω_max, env.ω_max)
return [θ, ω]
end
# Initial state distribution
Ps(env::InvertedPendulum) = MvNormal(zeros(2), diagm([(π/32)^2, 0.5^2]))
## Sensor
struct AdditiveNoiseSensor <: Sensor
Do
end
(sensor::AdditiveNoiseSensor)(s) = sensor(s, rand(Do(sensor, s)))
(sensor::AdditiveNoiseSensor)(s, x) = s + x
Do(sensor::AdditiveNoiseSensor, s) = sensor.Do
Os(sensor::AdditiveNoiseSensor) = I
```
""")
# ╔═╡ 1cef8f2a-faee-4709-a0e9-6e5f2e3ce4cd
html_expand("Stuck? <span style='color: crimson'>Expand for hints</span>.", hint(md"""If you're trying fuzzing as a proposal distribution, the `disturbance_distribution` for the `MediumSystem` applies disturbances to the _sensor_:
```julia
D = DisturbanceDistribution((o)->Deterministic(),
(s,a)->Deterministic(),
(s)->MvNormal(SOME_MEAN_VECTOR, SOME_COVARIANCE))
```
where
```julia
struct DisturbanceDistribution
Da # agent disturbance distribution
Ds # environment disturbance distribution
Do # sensor disturbance distribution
end
```
See _Example 4.3_ in the textbook.
"""))
# ╔═╡ d18c2105-c2af-4dda-8388-617aa816a567
Markdown.parse("""
## Medium system
An inverted pendulum comprised of a `ProportionalController` with an `AdditiveNoiseSensor`.
""")
# ╔═╡ 77637b5e-e3ce-4ecd-90fc-95611af18002
sys_medium = System(
ProportionalController([-15.0, -8.0]),
InvertedPendulum(),
AdditiveNoiseSensor(MvNormal(zeros(2), 0.1^2*I))
);
# ╔═╡ 8c78529c-1e00-472c-bb76-d984b37235ab
Markdown.MD(
md"""
# 2️⃣ **Medium**: Inverted Pendulum
The medium system is a swinging inverted pendulum.
- It uses a proportional controller to keep it upright.
- The state is comprised of the angle $\theta$ and angular velocity $\omega$ making $s = [\theta, \omega]$
- Actions are left/right adjustments in the range $[-2, 2]$
- Disturbances $x$ are treated as additive noise: $x \sim \mathcal{N}(\mathbf{0}, 0.1^2I)$
""",
depth_highlight(sys_medium),
md"_(Same medium system as Project 1)_"
)
# ╔═╡ c4c0328d-8cb3-41d5-9740-0197cbf760c2
MediumSystem::Type = typeof(sys_medium) # Type used for multiple dispatch
# ╔═╡ b1e9bd40-a401-4630-9a1f-d61b276e72f7
md"""
## Medium specification $\psi$
The inverted pendulum specification $\psi$ indicates what the system should do:
$$\psi(\tau) = \square\big(|\theta| < \pi/4\big)$$
i.e., "the absolute value of the pendulum angle $\theta$ (first element of the state $s$) in the trajectory $\tau$ should _always_ ($\square$) be less than $\pi/4$, anything else is a failure."
"""
# ╔═╡ fe272c1b-421c-49de-a513-80c7bcefdd9b
ψ_medium = LTLSpecification(@formula □(s -> abs(s[1]) < π / 4));
# ╔═╡ bac5c489-553c-436f-b332-8a8e97126a51
html_quarter_space()
# ╔═╡ 1da9695f-b7fc-46eb-9ef9-12160246018d
Markdown.parse("""
## ⟶ **Task (Medium)**: Estimate failure probability
Please fill in the following `estimate_probability` function.
""")
# ╔═╡ 0606d827-9c70-4a79-afa7-14fb6b806546
start_code()
# ╔═╡ 759534ca-b40b-4824-b7ec-3a5c06cbd23e
end_code()
# ╔═╡ 7987c20d-68e8-441b-bddc-3f0ae7c3591d
html_quarter_space()
# ╔═╡ da2d692a-8378-435e-bd6b-c0e65caef542
md"""
# 📊 Medium Test
We'll automatically test your `estimate_probability(::MediumSystem, ψ)` function below.
"""
# ╔═╡ 60ab8107-db65-4fb6-aeea-d4978aed77bd
html_space()
# ╔═╡ 155f2bfe-badd-46fe-9d1e-b73099be5e77
html_expand("Expand for <code>LargeSystem</code> code.", md"""
```julia
## Agent
struct InterpAgent <: Agent
grid::RectangleGrid
Q
end
(c::InterpAgent)(s) = argmax([interpolate(c.grid, q, s) for q in c.Q])
(c::InterpAgent)(s, x) = c(s)
Distributions.pdf(c::InterpAgent, o, xₐ) = 1.0
## Environment
@with_kw struct CollisionAvoidance <: Environment
ddh_max::Float64 = 1.0 # [m/s²]
𝒜::Vector{Float64} = [-5.0, 0.0, 5.0] # [m/s]
Ds::Sampleable = Normal(0,1.5)
end
# NominalTrajectoryDistribution on the environment (D.Ds)
Ds(env::CollisionAvoidance, s, a) = env.Ds
function (env::CollisionAvoidance)(s, a, x)
a = env.𝒜[a]
h, dh, a_prev, τ = s
h = h + dh
if a != 0.0
if abs(a - dh) < env.ddh_max
dh += a
else
dh += sign(a - dh) * env.ddh_max
end
end
a_prev = a
τ = max(τ - 1.0, -1.0)
return [h, dh + x, a_prev, τ]
end
(env::CollisionAvoidance)(s, a) = env(s, a, rand(Ds(env, s, a)))
# Initial state distribution
Ps(env::CollisionAvoidance) = product_distribution(
Uniform(-100, 100), # Initial h
Uniform(-10, 10), # Initial dh
DiscreteNonParametric([0], [1.0]), # Initial a_prev
DiscreteNonParametric([40], [1.0]) # Initial τ
)
## Sensor
struct IdealSensor <: Sensor end
(sensor::IdealSensor)(s) = s
(sensor::IdealSensor)(s, x) = sensor(s)
Distributions.pdf(sensor::IdealSensor, s, xₛ) = 1.0
```
""")
# ╔═╡ 82d11960-a4b1-4e7e-9c7e-783351c9bcd5
html_expand("Stuck? <span style='color: crimson'>Expand for hints</span>.", hint(md"""If you're trying fuzzing as a proposal distribution, the `disturbance_distribution` for the `LargeSystem` applies disturbances to the _environment_:
```julia
D = DisturbanceDistribution((o)->Deterministic(),
(s,a)->Normal(SOME_MEAN, SOME_STD),
(s)->Deterministic())
```
where
```julia
struct DisturbanceDistribution
Da # agent disturbance distribution
Ds # environment disturbance distribution
Do # sensor disturbance distribution
end
```
See _Example 4.3_ in the textbook for how this is applied to the pendulum.
"""))
# ╔═╡ 7d054465-9f80-4dfb-9b5f-76c3977de7cd
Markdown.parse("""
## Large system
An aircraft collision avoidance system that uses an interpolated lookup-table policy.
""")
# ╔═╡ 1ec68a39-8de9-4fd3-be8a-26cf7706d1d6
begin
local grid, Q = load_cas_policy(joinpath(@__DIR__, "cas_policy.bson"))
cas_agent = InterpAgent(grid, Q)
cas_env = CollisionAvoidance(Ds=Normal(0, 1.5))
cas_sensor = IdealSensor()
sys_large = System(cas_agent, cas_env, cas_sensor)
LargeSystem::Type = typeof(sys_large) # Type used for multiple dispatch
end
# ╔═╡ 9f739929-1cd3-4935-b229-ae3aeac7e131
begin
ThisProject = Project2
max_steps(sys::SmallSystem) = 200
max_steps(sys::MediumSystem) = 10_000
max_steps(sys::LargeSystem) = 200_000
num_seeds(sys::SmallSystem) = 20
num_seeds(sys::MediumSystem) = 5
num_seeds(sys::LargeSystem) = 3
end;
# ╔═╡ 60f72d30-ab80-11ef-3c20-270dbcdf0cc4
Markdown.parse("""
**Task**: Efficiently estimating the failure probability using \$n\$ total calls to the system `step` function.
- **Small system**: 1D Gaussian \$\\mathcal{N}(0,1)\$: \$n=$(format(max_steps(sys_small); latex=true))\$ `step` calls and \$$(num_seeds(sys_small))\$ seeds.
- **Medium system**: Swinging inverted pendulum: \$n=$(format(max_steps(sys_medium); latex=true))\$ `step` calls and \$$(num_seeds(sys_medium))\$ seeds.
- **Large system**: Aircraft collision avoidance system (CAS): \$n=$(format(max_steps(sys_large); latex=true))\$ `step` calls and \$$(num_seeds(sys_large))\$ seeds.
_(Same systems as Project 1)_
Your job is to write the following function that returns the estimated failure probability:
```julia
estimate_probability(sys, ψ; n)::Float64
```
and get a better estimate of the failure probability than a random baseline.
""")
# ╔═╡ c2ae204e-dbcc-453a-81f5-791ba4be39db
@tracked function estimate_probability_baseline(sys, ψ; n=max_steps(sys))
d = get_depth(sys)
m = n ÷ d # Get num. rollouts (\div for ÷)
pτ = NominalTrajectoryDistribution(sys, d) # Nominal trajectory distribution
τs = [rollout(sys, pτ; d) for _ in 1:m] # Rollout with pτ, m*d steps
return mean(isfailure.(ψ, τs)) # Frequentist estimate of P(fail)
end
# ╔═╡ 254956d0-8f58-4e2b-b8a9-5dd10dd074a2
function run_baseline(sys::System, ψ; seed=4)
Random.seed!(seed)
pfail = estimate_probability_baseline(sys, ψ)
n = stepcount()
d = get_depth(sys)
return (pfail=pfail, n=n, m=n÷d) # return these variables as a NamedTuple
end
# ╔═╡ c8c1a321-39c8-4a78-bbcf-13663243c457
Markdown.MD(
Markdown.parse("""
### Baseline comparison
Unlike _Project 1_, in this project you will be given the same number of `step` calls as the baselines:
\$\$\\begin{equation}
n_\\text{steps} = $(max_steps(sys_small)) \\tag{for the small system}
\\end{equation}\$\$
Reminder that the number of `step` calls \$n\$ is equal to the number of rollouts \$m\$ for the small system because the rollout depth is \$d=1\$.
"""),
highlight(md"**Note**: To pass the tests, your $\hat{P}_\text{fail}$ estimate must be better than the baseline on average.")
)
# ╔═╡ db5c210a-e783-40bf-892d-58a9fe5dfb23
Markdown.parse("""
## Average performance
Because most estimation algorithms are stochastic, we will test your implemented algorithms to get the average \$\\hat{P}_\\text{fail}\$ over \$K = $(num_seeds(sys_small))\$ _random number generator_ (RNG) seeds. Note this specific number of seeds is for the small system, please refer to the other sections for their prescribed number of seeds.
_We will report the mean and standard deviation of your estimates._
**Your mean estimate should be better than random.**
""")
# ╔═╡ b60c518f-41bb-4abd-b573-d3f8d29f60de
function aggregate_performance(alg::Function, sys, ψ; seeds=1:num_seeds(sys))
estimates = []
for seed in seeds
Random.seed!(seed)
pfail = alg(sys, ψ)
push!(estimates, pfail)
end
return estimates
end
# ╔═╡ 402c0eaa-727f-4c54-89ec-64c3dfb8002c
fbaseline(sys,ψ,seeds) =
aggregate_performance(estimate_probability_baseline, sys, ψ; seeds);
# ╔═╡ 782a2696-41a7-4bcf-8002-058d18d82840
begin
baseline_μₘ, baseline_σₘ =
aggregate_performance(estimate_probability_baseline, sys_medium, ψ_medium);
Markdown.parse("""
### Example aggregate performance for the baseline
The aggregate estimated failure probability using the Monte Carlo baseline is:
\$\$\\begin{equation}
\\hat{P}_\\text{fail}^{(\\text{baseline})} \\approx $(baseline_μₘ) \\pm $(round(baseline_σₘ; sigdigits=4))
\\end{equation}\$\$
_Note that we will test over different RNG seeds than those defaulted above._
""")
end
# ╔═╡ fc2d34da-258c-4460-a0a4-c70b072f91ca
@small function estimate_probability(sys::SmallSystem, ψ; n=max_steps(sys))
# TODO: WRITE YOUR CODE HERE
end
# ╔═╡ 307afd9c-6dac-4a6d-89d7-4d8cabfe3fe5
Markdown.MD(
md"""
$(@bind rerun_small LargeCheckBox(text="⟵ Click to re-run the <code>SmallSystem</code> evaluation."))""",
Markdown.parse("""
↑ This will re-run **`estimate_probability(::SmallSystem, ψ)`** and re-save **`$(get_filename(sys_small, ThisProject))`**
_Uncheck this to load results from the file._
""")
)
# ╔═╡ f3cf88ca-8569-4e42-a9fc-436637b82364
Markdown.parse("""
## Average performance
The failure probability \$\\hat{P}_\\text{fail}\$ is averaged over \$$(num_seeds(sys_medium))\$ _random number generator_ (RNG) seeds.
_We will report the mean and standard deviation of your estimates._
**Your mean estimate should be better than random.**
""")
# ╔═╡ cb7b9b9f-59da-4851-ab13-c451c26117df
@medium function estimate_probability(sys::MediumSystem, ψ; n=max_steps(sys))
# TODO: WRITE YOUR CODE HERE
end
# ╔═╡ 38f26afd-ffa5-48d6-90cc-e3ec189c2bf1
Markdown.MD(
md"""
$(@bind rerun_medium LargeCheckBox(text="⟵ Click to re-run the <code>MediumSystem</code> evaluation."))""",
Markdown.parse("""
↑ This will re-run **`estimate_probability(::MediumSystem, ψ)`** and re-save **`$(get_filename(sys_medium, ThisProject))`**
_Uncheck this to load results from the file._
""")
)
# ╔═╡ 4eeaa9ae-eac5-478a-aca5-82de3dda24f7
submission_details(@bind(directory_trigger, OpenDirectory(@__DIR__)), ThisProject,
[SmallSystem, MediumSystem, LargeSystem])
# ╔═╡ 0c520f93-49ce-45eb-899d-a31105d856c8
if directory_trigger
@info "Opening local directory..."
sleep(1)
end
# ╔═╡ aa0c4ffc-d7f0-484e-a1e2-7f6f92a3a53d
Markdown.MD(
Markdown.parse("""
# 3️⃣ **Large**: Aircraft Collision Avoidance
The large system is an aircraft collision avoidance system (CAS).
- It uses an interpolated lookup-table policy.
- The state is comprised of the relative altitude (m) \$h\$, the relative vertical rate \$\\dot{h}\$ (m/s), the previous action \$a_\\text{prev}\$, and the time to closest point of approach \$t_\\text{col}\$ (sec): \$s = [h, \\dot{h}, a_\\text{prev}, t_\\text{col}]\$
- Actions are \$a \\in [-5, 0, 5]\$ vertical rate changes.
- Disturbances \$x\$ are applied to \$\\dot{h}\$ as environment noise: \$x \\sim \\mathcal{N}(0, 1.5)\$
- Finite horizon (i.e., rollout depth) of \$d=$(get_depth(sys_large))\$ for \$t_\\text{col}\$ from \$40-0\$ seconds.
"""),
depth_highlight(sys_large),
md"_(Same large system as Project 1)_"
)
# ╔═╡ d23f0299-981c-43b9-88f3-fb6e07927498
md"""
## Large environment
The collision avoidance system has disturbances applied to the relative vertical rate variable $\dot{h}$ of the state (i.e., environment disturbances).
$$\dot{h} + x \quad \text{where} \quad x \sim \mathcal{N}(0, 1.5)$$
"""
# ╔═╡ 641b92a3-8ff2-4aed-8482-9fa686803b68
cas_env.Ds
# ╔═╡ be426908-3fee-4ecd-b054-2497ce9a2e50
md"""
## Large specification $\psi$
The collision avoidance system specification $\psi$ indicates what the system should do:
$$\psi(\tau) = \square_{[41]}\big(|h| > 50\big)$$
i.e., "the absolute valued relative altitude $h$ (first element of the state $s$) in the trajectory $\tau$ should _always_ ($\square$) be greater than $50$ meters at the end of the encounter ($t=41$), anything else is a failure."
"""
# ╔═╡ 258e14c4-9a2d-4515-9a8f-8cd96f31a6ff
ψ_large = LTLSpecification(@formula □(41:41, s->abs(s[1]) > 50));
# ╔═╡ aee22151-51de-426b-8478-6a04284a4888
Markdown.parse("""
## Average performance
The failure probability \$\\hat{P}_\\text{fail}\$ is averaged over \$$(num_seeds(sys_large))\$ _random number generator_ (RNG) seeds.
_We will report the mean and standard deviation of your estimates._
**Your mean estimate should be better than random.**
""")
# ╔═╡ c22f039c-d7bb-4f7f-9284-cf66906f6390
begin
baseline_μₗ, baseline_σₗ =
aggregate_performance(estimate_probability_baseline, sys_large, ψ_large)
Markdown.parse("""
### Example aggregate performance for the baseline
The aggregate estimated failure probability using the Monte Carlo baseline is:
\$\$\\begin{equation}
\\hat{P}_\\text{fail}^{(\\text{baseline})} \\approx $(baseline_μₗ) \\pm $(baseline_σₗ)
\\end{equation}\$\$
Notice the high standard deviation! This is because failures are extremely rare for the CAS problem.
_Note that we will test over different RNG seeds than those defaulted above._
""")
end
# ╔═╡ e3d6fdf1-3a9e-446b-8482-49d6f64b652e
html_quarter_space()
# ╔═╡ 23fd490a-74d2-44b4-8a12-ea1460d95f85
Markdown.parse("""
## ⟶ **Task (Large)**: Estimate failure probability
Please fill in the following `estimate_probability` function.
""")
# ╔═╡ 18a70925-3c2a-4317-8bbc-c2a096ec56d0
start_code()
# ╔═╡ 4c5210d6-598f-4167-a6ee-93bceda7223b
end_code()
# ╔═╡ 3471a623-16af-481a-8f66-5bd1e7890188
@large function estimate_probability(sys::LargeSystem, ψ; n=max_steps(sys))
# TODO: WRITE YOUR CODE HERE