Merge branch 'develop' into release

NASA-SW-VnV · Apr 22, 2024 · c394c7e · c394c7e
2 parents a8802ee + 2f9abe6
commit c394c7e
Show file tree

Hide file tree

Showing 27 changed files with 429 additions and 65,905 deletions.
diff --git a/Project.toml b/Project.toml
@@ -6,9 +6,15 @@ version = "0.1.0"
 [deps]
 BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0"
 Bijections = "e2ed5e7c-b2de-5872-ae92-c73ca462fb04"
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd"
 CommonRLInterface = "d842c3ba-07a1-494f-bbec-f5741b0a3e98"
 DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
+Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
+Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
+Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
 ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
@@ -17,17 +23,10 @@ Sockets = "6462fe0b-24de-5631-8697-dd941f90decc"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 Suppressor = "fd094767-a336-5f1f-9728-57cf17d0bbfb"
 TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
+Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
 [compat]
-julia = "1.7"
-BSON = "0.3.4"
-Bijections = "0.1.3"
-CommonRLInterface = "0.3.1"
-DataStructures = "0.18.11"
-Distributions = "0.25.48"
-ProgressMeter = "1.7.1"
-Scratch = "1.1.0"
-Suppressor = "0.2.0"
+julia = "1.10"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 ![AdaStress](docs/logo.svg)
 
-AdaStress is a software package that implements the Adaptive Stress Testing (AST) framework, which determines the likeliest failures for a system under test.
+AdaStress is a software package that implements and extends the Adaptive Stress Testing (AST) framework, which determines the likeliest failures for a system under test.
 
 AdaStress provides three primary services:
 - Interfaces between user simulations and the AST framework
@@ -33,8 +33,7 @@ AdaStress provides two basic simulation interfaces, **black-box** and **gray-box
 Your simulation must inherit from the `BlackBox` or `GrayBox` type and implement the methods found in `src/interface/BlackBox.jl` or `src/interface/GrayBox.jl`.
 
 ## Further information
-For more detailed instructions on using AdaStress, see the [complete documentation](./docs/main.md). Example notebooks can be found in the `examples` directory. For background on original AST formulation, see
-> Lee, Ritchie, Ole J. Mengshoel, Anshu Saksena, Ryan W. Gardner, Daniel Genin, Joshua Silbermann, Michael Owen, and Mykel J. Kochenderfer. "Adaptive stress testing: Finding likely failure events with reinforcement learning." Journal of Artificial Intelligence Research 69 (2020): 1165-1201.
+For more detailed instructions on using AdaStress, see the [complete documentation](./docs/main.md). Example notebooks can be found in the `examples` directory. For background on the AST formulation, see the [original paper](https://doi.org/10.1613/jair.1.12190).
 
 ## License
 AdaStress has been released under the NASA Open Source Agreement version 1.3, as detailed [here](docs/LICENSE.pdf).

diff --git a/docs/main.md b/docs/main.md
@@ -4,17 +4,16 @@
 
 ---
 
-- [Maintainers](#maintainers)
-- [Description](#description)
-- [Prerequisites](#prerequisites)
-- [Architecture](#architecture)
-- [Problem setup](#problem-setup)
-- [Interface](#interface)
-- [Serialization interface](#serialization-interface)
-- [Submodule management](#submodule-management)
-- [Solvers](#solvers)
-- [Analysis](#analysis)
-- [Acknowledgements](#acknowledgments)
+[Maintainers](#maintainers)\
+[Description](#description)\
+[Prerequisites](#prerequisites)\
+[Architecture](#architecture)\
+[Problem setup](#problem-setup)\
+[Interface](#interface)\
+[Serialization interface](#serialization-interface)\
+[Solvers](#solvers)\
+[Analysis](#analysis)\
+[Acknowledgements](#acknowledgments)
 
 ---
 
@@ -24,7 +23,7 @@
 
 ## Description
 
-AdaStress is a software package that implements the [adaptive stress testing (AST) framework](https://doi.org/10.1613/jair.1.12190), which determines the likeliest failures for a system under test.
+AdaStress is a software package that implements and extends the [adaptive stress testing (AST) framework](https://doi.org/10.1613/jair.1.12190), which determines the likeliest failures for a system under test.
 
 AdaStress provides three primary services:
 - Interfaces between user simulations and the AST framework
@@ -192,49 +191,6 @@ The serialization capabilities also make it easier to interact with other progra
 
 An `ASTServer` and `ASTClient` can be created separately and configured to exchange a minimal amount of information to enable stress-testing. This exchange can be further encrypted in various ways, in order to obscure the system under test from the stress-testing agent. For an example of serialized stress-testing, see the notebooks in `examples/pedestrian`.
 
-## Submodule management
-
-The submodule manager allows optional and experimental features with heavy dependencies to be made available without increasing the loading time of the base package. The user can selectively enable and disable these submodules as needed. In the background, the submodule manager maintains an internal project environment with a minimal set of necessary dependencies, avoiding the need to load unused packages.
-
-This system is made necessary by certain limitations of the language, which does not currently support optional dependencies. A common solution involves creating multiple separate packages to extend a base package; however, we consider this approach somewhat of an anti-pattern, and have chosen not to employ it here. In future versions of AdaStress, the submodule system may be removed if a suitable alternative is possible.
-
-### Using submodules
-
-Submodules are managed through the following API:
-
-> - **`AdaStress.submodules()`**
-> List all available submodules.
-> - **`AdaStress.enabled()`**
-> List enabled submodules.
-> - **`AdaStress.enable(submodule)`**
-> Enable submodule(s). Accepts string or vector of strings. With zero arguments defaults to all associated submodules. Takes effect immediately.
-> - **`AdaStress.disable(submodule)`**
-> Disable submodule(s). Accepts string or vector of strings. With zero arguments defaults to all enabled submodules. Takes effect after Julia restart.
-> - **`AdaStress.load()`**
-> Load enabled submodules (necessary after Julia restart). Takes effect immediately.
-> - **`AdaStress.clean()`**
-> Forcibly remove temporary environment, purging all enabled submodules. Only necessary if submodule manager is corrupted and `disable` cannot restore functionality. Takes effect after Julia restart.
-
-Enabling a submodule can take several seconds, particularly the first time. Due to current limitations of the language, previously enabled submodules cannot be automatically loaded when a new Julia session is launched. The user should use the `load` command for this, as in the following example. In the first session, it is necessary to run
-
-> ```
-> julia> using AdaStress
-> julia> AdaStress.enable("SoftActorCritic")
-> ```
-while in later sessions, the user may simply run
-> ```
-> julia> using AdaStress
-> julia> AdaStress.load()
-> ```
-
-### Multiprocessing
-
-Due to current bugs in the language, many processes related to code loading and environment management are not truly atomic. This can lead to problems when submodules are used in multiprocessing, as occurs with policy-value verification analysis. In such cases, care should be taken when invoking the submodule manager API asynchronously. For an example of loading submodules on multiple processes, see the notebook `examples/pvv`.
-
-### Creating submodules
-
-Custom submodules are essentially regular Julia packages that reside within the AdaStress directory tree, complete with a UUID and `Project.toml` file. Submodules are associated with AdaStress via the `exclude` command, similarly to how source files are associated via `include`.
-
 ## Solvers
 
 A solver object is a standalone entity representing an algorithm and its parameters. A solver can be applied to an `ASTMDP` or a function that generates an `ASTMDP`, producing a `Result` object, as in
@@ -276,18 +232,34 @@ For an example of a problem solved with MCTS, see the notebook `examples/walk1d`
 
 Global solvers aim to produce an adversarial policy mapping from simulator state to environment instance. The output of the solver is a function that takes as input an observation of the system and returns an action. In this way, failure trajectories can be produced from any given initialization. This opens the door to a richer analysis of the system's weaknesses.
 
-#### Soft actor-critic
+#### Q-learning
 
->This feature is contained in a submodule, and must be explicitly enabled.
+Q-learning is a classic reinforcement learning algorithm that uses a table-based policy to map states to optimal actions. Exploration is driven by an epsilon-greedy action selection approach. The simplicity of the QL algorithm makes it a useful baseline for more advanced methods.
+
+| Parameter | Type | Default | Description |
+| - | - | - | - |
+| `state_mins` | `Vector{Float64}` | `[0.0]` | Minimum values of state vector |
+| `state_maxs` | `Vector{Float64}` | `[1.0]` | Maximum values of state vector |
+| `state_divs` | `Vector{Int64}` | `[10]` | State space grid size |
+| `act_mins` | `Vector{Float64}` | `[-3.0]` | Minimum values of actions (normalized) |
+| `act_maxs` | `Vector{Float64}` | `[3.0]` | Maximum values of actions (normalized) |
+| `act_divs` | `Vector{Int64}` | `[10]` | Action space grid size |
+| `num_episodes` | `Int64` | `1000` | Number of episodes |
+| `alpha` | `Float64` | `0.1` | Learning rate |
+| `gamma` | `Float64` | `1.0` | Discount factor |
+| `eps` | `Float64` | `0.25` | Exploration parameter |
+| `reverse_update` | `Bool` | `true` | Update table in time-reverse order |
+
+#### Soft actor-critic
 
 Soft actor-critic (SAC) is a deep reinforcement learning algorithm that simultaneously learns a value function and a policy for the `ASTMDP`. Both take the form of neural networks, which can be used to generate failures online in real-time or analyze system properties offline. SAC offers the following tunable parameters:
 
 | Parameter | Type | Default | Description |
 | - | - | - | - | 
 | `obs_dim` | `Int64` | none | Dimension of observation space | 
 | `act_dim` | `Int64` | none | Dimension of action space | 
-| `act_mins` | `Vector{Float64}` | none | Minimum values of actions | 
-| `act_maxs` | `Vector{Float64}` | none | Maximum values of actions | 
+| `act_mins` | `Vector{Float64}` | none | Minimum values of actions (normalized) | 
+| `act_maxs` | `Vector{Float64}` | none | Maximum values of actions (normalized) | 
 | `gamma` | `Float64` | `0.999` | Discount factor | 
 | `max_buffer_size` | `Int64` | `100000` | Maximum number of timesteps in buffer | 
 | `hidden_sizes` | `Vector{Int}` | `[100,100,100]` | Dimensions of hidden layers | 
@@ -327,8 +299,6 @@ The analysis module provide methods to further analyze results.
 
 ### Policy-value verification
 
->This feature is contained in a submodule, and must be explicitly enabled.
-
 Policy-value verification (PVV) is an experimental method of analyzing the output of a global solver. It assembles the policy network and value network (or ensemble of value networks) into a single value function over the state space. Then, given a set condition on the value function, the algorithm uses an adaptive refinement process to classify regions of state space that provably satisfy the condition, violate the condition, or are unprovable at the given tolerance.
 
 As a matter of ongoing research, requirements concerning the safety of the system can be linked to conditions on the value function. For instance, a requirement that the possibility of failure not exceed $10^{-9}$ from a set of initial states (given some modeled environmental stochasticity) translates to a constraint on the value function. The validity and practicality of this analysis is largely dependent on the learning process and is still uncertain. Nonetheless, the approach can currently generate *approximate* artifacts that may be useful for casual and nonrigorous analysis of system performance. 
@@ -341,4 +311,4 @@ For an example of a problem analyzed with PVV, see the notebook `examples/pvv`.
 
 The adaptive stress testing framework was proposed and developed by Ritchie Lee during his PhD under the supervision of Prof. Mykel Kochenderfer (Stanford University). Ritchie directed the creation of AdaStress and was instrumental in shaping our particular approach to this problem.
 
-Some of the basic nomenclature in AdaStress is borrowed from the package `POMDPStressTesting.jl`, namely the `GrayBox` and `BlackBox` terminology. Note that the usage and interpretation of these terms differs between the packages. Code that is compatible with one package cannot immediately be used with the other without modification.
+Some of the basic nomenclature in AdaStress is borrowed from the package `POMDPStressTesting.jl`, namely the `GrayBox` and `BlackBox` terminology. Note that the usage and interpretation of these terms differs between the packages. Code that is compatible with one package cannot immediately be used with the other without modification.
diff --git a/examples/cartpole/Project.toml b/examples/cartpole/Project.toml
@@ -1,11 +1,7 @@
 [deps]
 AdaStress = "f8632b6a-8763-4da0-bfaf-5f7707adef25"
-BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
-Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
-NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
 Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
 ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
-ReinforcementLearning = "158674fc-8238-5cab-b5ba-03dfc80d1318"
-StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
+ReinforcementLearningExperiments = "6bd458e5-1694-412f-b601-3a888375c491"