diff --git a/Project.toml b/Project.toml
index a0be6395f..9891f3cfb 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "NonlinearSolve"
 uuid = "8913a72c-1f9b-4ce2-8d82-65094dcecaec"
 authors = ["SciML"]
-version = "2.10.0"
+version = "3.0.0"
 
 [deps]
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
@@ -33,12 +33,16 @@ UnPack = "3a884ed6-31ef-47d7-9d2a-63182c4928ed"
 BandedMatrices = "aae01518-5342-5314-be14-df237901396f"
 FastLevenbergMarquardt = "7a0df574-e128-4d35-8cbd-3d84502bf7ce"
 LeastSquaresOptim = "0fc2ff8b-aaa3-5acd-a817-1944a5e08891"
+MINPACK = "4854310b-de5a-5eb6-a2a5-c1dee2bd17f9"
+NLsolve = "2774e3e8-f4cf-5e23-947b-6d7e65073b56"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
 [extensions]
 NonlinearSolveBandedMatricesExt = "BandedMatrices"
 NonlinearSolveFastLevenbergMarquardtExt = "FastLevenbergMarquardt"
 NonlinearSolveLeastSquaresOptimExt = "LeastSquaresOptim"
+NonlinearSolveMINPACKExt = "MINPACK"
+NonlinearSolveNLsolveExt = "NLsolve"
 NonlinearSolveZygoteExt = "Zygote"
 
 [compat]
@@ -60,19 +64,21 @@ LeastSquaresOptim = "0.8"
 LineSearches = "7"
 LinearAlgebra = "<0.0.1, 1"
 LinearSolve = "2.12"
+MINPACK = "1.2"
 MaybeInplace = "0.1"
+NLsolve = "4.5"
 NaNMath = "1"
 NonlinearProblemLibrary = "0.1"
 Pkg = "1"
 PrecompileTools = "1"
 Printf = "<0.0.1, 1"
-Random = "1"
+Random = "<0.0.1, 1"
 RecursiveArrayTools = "2"
 Reexport = "0.2, 1"
 SafeTestsets = "0.1"
 SciMLBase = "2.9"
 SciMLOperators = "0.3"
-SimpleNonlinearSolve = "0.1.23"
+SimpleNonlinearSolve = "1"
 SparseArrays = "<0.0.1, 1"
 SparseDiffTools = "2.14"
 StableRNGs = "1"
@@ -94,17 +100,19 @@ ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 LeastSquaresOptim = "0fc2ff8b-aaa3-5acd-a817-1944a5e08891"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae"
+MINPACK = "4854310b-de5a-5eb6-a2a5-c1dee2bd17f9"
+NLsolve = "2774e3e8-f4cf-5e23-947b-6d7e65073b56"
 NaNMath = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3"
 NonlinearProblemLibrary = "b7050fa9-e91f-4b37-bcee-a89a063da141"
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f"
-StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
 SparseDiffTools = "47a9eef4-7e08-11e9-0b38-333d64bd3804"
+StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
 StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
 Symbolics = "0c5d862f-8b57-4792-8d23-62f2024744c7"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
 [targets]
-test = ["Aqua", "Enzyme", "BenchmarkTools", "SafeTestsets", "Pkg", "Test", "ForwardDiff", "StaticArrays", "Symbolics", "LinearSolve", "Random", "LinearAlgebra", "Zygote", "SparseDiffTools", "NonlinearProblemLibrary", "LeastSquaresOptim", "FastLevenbergMarquardt", "NaNMath", "BandedMatrices", "DiffEqBase", "StableRNGs"]
+test = ["Aqua", "Enzyme", "BenchmarkTools", "SafeTestsets", "Pkg", "Test", "ForwardDiff", "StaticArrays", "Symbolics", "LinearSolve", "Random", "LinearAlgebra", "Zygote", "SparseDiffTools", "NonlinearProblemLibrary", "LeastSquaresOptim", "FastLevenbergMarquardt", "NaNMath", "BandedMatrices", "DiffEqBase", "StableRNGs", "MINPACK", "NLsolve"]
diff --git a/docs/Project.toml b/docs/Project.toml
index 28416cb10..a78e698f2 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -8,9 +8,9 @@ IncompleteLU = "40713840-3770-5561-ab4c-a76e7d0d7895"
 LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae"
 ModelingToolkit = "961ee093-0014-501f-94e3-6117800e7a78"
 NonlinearSolve = "8913a72c-1f9b-4ce2-8d82-65094dcecaec"
-NonlinearSolveMINPACK = "c100e077-885d-495a-a2ea-599e143bf69d"
+OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462"
-SciMLNLSolve = "e9a6253c-8580-4d32-9898-8661bb511710"
 SimpleNonlinearSolve = "727e6d20-b764-4bd8-a329-72de5adea6c7"
 StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
 SteadyStateDiffEq = "9672c7b4-1e72-59bd-8a11-6ac3964bc41f"
@@ -26,12 +26,11 @@ Documenter = "1"
 IncompleteLU = "0.2"
 LinearSolve = "2"
 ModelingToolkit = "8"
-NonlinearSolve = "1, 2"
-NonlinearSolveMINPACK = "0.1"
+NonlinearSolve = "3"
+Random = "<0.0.1, 1"
 SciMLBase = "2.4"
-SciMLNLSolve = "0.1"
-SimpleNonlinearSolve = "0.1.5, 1"
+SimpleNonlinearSolve = "1"
 StaticArrays = "1"
-SteadyStateDiffEq = "1.10, 2"
+SteadyStateDiffEq = "2"
 Sundials = "4.11"
 Symbolics = "4, 5"
diff --git a/docs/make.jl b/docs/make.jl
index 9a063bf47..959463b09 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -1,21 +1,22 @@
-using Documenter, NonlinearSolve, SimpleNonlinearSolve, Sundials, SciMLNLSolve,
-    NonlinearSolveMINPACK, SteadyStateDiffEq, SciMLBase, DiffEqBase
+using Documenter,
+    NonlinearSolve, SimpleNonlinearSolve, Sundials, SteadyStateDiffEq, SciMLBase, DiffEqBase
 
-cp("./docs/Manifest.toml", "./docs/src/assets/Manifest.toml", force = true)
-cp("./docs/Project.toml", "./docs/src/assets/Project.toml", force = true)
+cp(joinpath(@__DIR__, "Manifest.toml"), joinpath(@__DIR__, "src/assets/Manifest.toml"),
+    force = true)
+cp(joinpath(@__DIR__, "Project.toml"), joinpath(@__DIR__, "src/assets/Project.toml"),
+    force = true)
 
 include("pages.jl")
 
-makedocs(sitename = "NonlinearSolve.jl",
+makedocs(; sitename = "NonlinearSolve.jl",
     authors = "Chris Rackauckas",
     modules = [NonlinearSolve, SciMLBase, DiffEqBase, SimpleNonlinearSolve, Sundials,
-        SciMLNLSolve, NonlinearSolveMINPACK, SteadyStateDiffEq],
+        SteadyStateDiffEq],
     clean = true, doctest = false, linkcheck = true,
     linkcheck_ignore = ["https://twitter.com/ChrisRackauckas/status/1544743542094020615"],
     warnonly = [:missing_docs, :cross_references],
     format = Documenter.HTML(assets = ["assets/favicon.ico"],
         canonical = "https://docs.sciml.ai/NonlinearSolve/stable/"),
-    pages = pages)
+    pages)
 
-deploydocs(repo = "github.com/SciML/NonlinearSolve.jl.git";
-    push_preview = true)
+deploydocs(repo = "github.com/SciML/NonlinearSolve.jl.git"; push_preview = true)
diff --git a/docs/pages.jl b/docs/pages.jl
index bea06e3fa..e9999c82c 100644
--- a/docs/pages.jl
+++ b/docs/pages.jl
@@ -2,9 +2,9 @@
 
 pages = ["index.md",
     "Getting Started with Nonlinear Rootfinding in Julia" => "tutorials/getting_started.md",
-    "Tutorials" => Any["Code Optimization for Small Nonlinear Systems" => "tutorials/code_optimization.md",
-        "Handling Large Ill-Conditioned and Sparse Systems" => "tutorials/large_systems.md",
-        "Symbolic System Definition and Acceleration via ModelingToolkit" => "tutorials/modelingtoolkit.md",
+    "Tutorials" => Any["tutorials/code_optimization.md",
+        "tutorials/large_systems.md",
+        "tutorials/modelingtoolkit.md",
         "tutorials/small_compile.md",
         "tutorials/iterator_interface.md"],
     "Basics" => Any["basics/NonlinearProblem.md",
@@ -24,5 +24,8 @@ pages = ["index.md",
         "api/minpack.md",
         "api/nlsolve.md",
         "api/sundials.md",
-        "api/steadystatediffeq.md"],
+        "api/steadystatediffeq.md",
+        "api/leastsquaresoptim.md",
+        "api/fastlevenbergmarquardt.md"],
+    "Release Notes" => "release_notes.md",
 ]
diff --git a/docs/src/api/fastlevenbergmarquardt.md b/docs/src/api/fastlevenbergmarquardt.md
new file mode 100644
index 000000000..8709dc303
--- /dev/null
+++ b/docs/src/api/fastlevenbergmarquardt.md
@@ -0,0 +1,17 @@
+# FastLevenbergMarquardt.jl
+
+This is a extension for importing solvers from FastLevenbergMarquardt.jl into the SciML
+interface. Note that these solvers do not come by default, and thus one needs to install
+the package before using these solvers:
+
+```julia
+using Pkg
+Pkg.add("FastLevenbergMarquardt")
+using FastLevenbergMarquardt, NonlinearSolve
+```
+
+## Solver API
+
+```@docs
+FastLevenbergMarquardtJL
+```
diff --git a/docs/src/api/leastsquaresoptim.md b/docs/src/api/leastsquaresoptim.md
new file mode 100644
index 000000000..76850555b
--- /dev/null
+++ b/docs/src/api/leastsquaresoptim.md
@@ -0,0 +1,17 @@
+# LeastSquaresOptim.jl
+
+This is a extension for importing solvers from LeastSquaresOptim.jl into the SciML
+interface. Note that these solvers do not come by default, and thus one needs to install
+the package before using these solvers:
+
+```julia
+using Pkg
+Pkg.add("LeastSquaresOptim")
+using LeastSquaresOptim, NonlinearSolve
+```
+
+## Solver API
+
+```@docs
+LeastSquaresOptimJL
+```
diff --git a/docs/src/api/minpack.md b/docs/src/api/minpack.md
index 5c1e80856..d55491c0e 100644
--- a/docs/src/api/minpack.md
+++ b/docs/src/api/minpack.md
@@ -1,17 +1,15 @@
 # MINPACK.jl
 
-This is a wrapper package for importing solvers from Sundials into the SciML interface.
-Note that these solvers do not come by default, and thus one needs to install
-the package before using these solvers:
+This is a extension for importing solvers from MINPACK into the SciML interface. Note that
+these solvers do not come by default, and thus one needs to install the package before using
+these solvers:
 
 ```julia
 using Pkg
-Pkg.add("NonlinearSolveMINPACK")
-using NonlinearSolveMINPACK
+Pkg.add("MINPACK")
+using MINPACK, NonlinearSolve
 ```
 
-These methods can be used independently of the rest of NonlinearSolve.jl
-
 ## Solver API
 
 ```@docs
diff --git a/docs/src/api/nlsolve.md b/docs/src/api/nlsolve.md
index 703a5174b..05db0937f 100644
--- a/docs/src/api/nlsolve.md
+++ b/docs/src/api/nlsolve.md
@@ -1,17 +1,17 @@
 # NLsolve.jl
 
-This is a wrapper package for importing solvers from NLsolve.jl into the SciML interface.
-Note that these solvers do not come by default, and thus one needs to install
-the package before using these solvers:
+This is a extension for importing solvers from NLsolve.jl into the SciML interface. Note
+that these solvers do not come by default, and thus one needs to install the package before
+using these solvers:
 
 ```julia
 using Pkg
-Pkg.add("SciMLNLSolve")
-using SciMLNLSolve
+Pkg.add("NLsolve")
+using NLSolve, NonlinearSolve
 ```
 
 ## Solver API
 
 ```@docs
-NLSolveJL
+NLsolveJL
 ```
diff --git a/docs/src/api/nonlinearsolve.md b/docs/src/api/nonlinearsolve.md
index 3e6009017..0eebda2b4 100644
--- a/docs/src/api/nonlinearsolve.md
+++ b/docs/src/api/nonlinearsolve.md
@@ -9,8 +9,8 @@ NewtonRaphson
 TrustRegion
 PseudoTransient
 DFSane
-GeneralBroyden
-GeneralKlement
+Broyden
+Klement
 ```
 
 ## Polyalgorithms
diff --git a/docs/src/api/simplenonlinearsolve.md b/docs/src/api/simplenonlinearsolve.md
index e71aacc1a..72f738140 100644
--- a/docs/src/api/simplenonlinearsolve.md
+++ b/docs/src/api/simplenonlinearsolve.md
@@ -6,7 +6,8 @@ These methods can be used independently of the rest of NonlinearSolve.jl
 
 ### Interval Methods
 
-These methods are suited for interval (scalar) root-finding problems, i.e. `IntervalNonlinearProblem`.
+These methods are suited for interval (scalar) root-finding problems,
+i.e. `IntervalNonlinearProblem`.
 
 ```@docs
 ITP
@@ -18,14 +19,15 @@ Brent
 
 ### General Methods
 
-These methods are suited for any general nonlinear root-finding problem , i.e. `NonlinearProblem`.
+These methods are suited for any general nonlinear root-finding problem, i.e.
+`NonlinearProblem`.
 
 ```@docs
 SimpleNewtonRaphson
-Broyden
+SimpleBroyden
 SimpleHalley
-Klement
+SimpleKlement
 SimpleTrustRegion
 SimpleDFSane
-LBroyden
+SimpleLimitedMemoryBroyden
 ```
diff --git a/docs/src/api/steadystatediffeq.md b/docs/src/api/steadystatediffeq.md
index 79499aec0..3bfe61c1a 100644
--- a/docs/src/api/steadystatediffeq.md
+++ b/docs/src/api/steadystatediffeq.md
@@ -1,8 +1,8 @@
 # SteadyStateDiffEq.jl
 
 This is a wrapper package for using ODE solvers from
-[DifferentialEquations.jl](https://docs.sciml.ai/DiffEqDocs/stable/) into the SciML interface.
-Note that these solvers do not come by default, and thus one needs to install
+[DifferentialEquations.jl](https://docs.sciml.ai/DiffEqDocs/stable/) into the SciML
+interface. Note that these solvers do not come by default, and thus one needs to install
 the package before using these solvers:
 
 ```julia
@@ -17,4 +17,5 @@ These methods can be used independently of the rest of NonlinearSolve.jl
 
 ```@docs
 DynamicSS
+SSRootfind
 ```
diff --git a/docs/src/api/sundials.md b/docs/src/api/sundials.md
index 551176329..80a6b367d 100644
--- a/docs/src/api/sundials.md
+++ b/docs/src/api/sundials.md
@@ -1,8 +1,8 @@
 # Sundials.jl
 
 This is a wrapper package for importing solvers from Sundials into the SciML interface.
-Note that these solvers do not come by default, and thus one needs to install
-the package before using these solvers:
+Note that these solvers do not come by default, and thus one needs to install the package
+before using these solvers:
 
 ```julia
 using Pkg
diff --git a/docs/src/basics/FAQ.md b/docs/src/basics/FAQ.md
index 7e6fbd5b5..62add8e83 100644
--- a/docs/src/basics/FAQ.md
+++ b/docs/src/basics/FAQ.md
@@ -8,7 +8,7 @@ On the test example:
 ```@example
 using NonlinearSolve, BenchmarkTools
 
-N = 100_000;
+const N = 100_000;
 levels = 1.5 .* rand(N);
 out = zeros(N);
 myfun(x, lv) = x * sin(x) - lv
@@ -31,8 +31,62 @@ end
 @btime f2(out, levels, 1.0)
 ```
 
-MATLAB 2022a achieves 1.66s. Try this code yourself: we receive 0.06 seconds, or a 28x speedup.
-This example is still not optimized in the Julia code, and we expect an improvement in a near
-future version.
+MATLAB 2022a achieves 1.66s. Try this code yourself: we receive 0.009 seconds, or a 184x
+speedup.
 
 For more information on performance of SciML, see the [SciMLBenchmarks](https://docs.sciml.ai/SciMLBenchmarksOutput/stable/).
+
+## The solver tried to set a Dual Number in my Vector of Floats.How do I fix that?
+
+This is a common problem that occurs if the code was not written to be generic based on the
+input types. For example, consider this example taken from
+[this issue](https://github.com/SciML/NonlinearSolve.jl/issues/298)
+
+```@example dual_error_faq
+using NonlinearSolve, Random
+
+function fff_incorrect(var, p)
+    v_true = [1.0, 0.1, 2.0, 0.5]
+    xx = [1.0, 2.0, 3.0, 4.0]
+    xx[1] = var[1] - v_true[1]
+    return var - v_true
+end
+
+v_true = [1.0, 0.1, 2.0, 0.5]
+v_init = v_true .+ randn!(similar(v_true)) * 0.1
+
+prob_oop = NonlinearLeastSquaresProblem{false}(fff_incorrect, v_init)
+try
+    sol = solve(prob_oop, LevenbergMarquardt(); maxiters = 10000, abstol = 1e-8)
+catch e
+    @error e
+end
+```
+
+Essentially what happened was, NonlinearSolve checked that we can use ForwardDiff.jl to
+differentiate the function based on the input types. However, this function has
+`xx = [1.0, 2.0, 3.0, 4.0]` followed by a `xx[1] = var[1] - v_true[1]` where `var` might
+be a Dual number. This causes the error. To fix it:
+
+ 1. Specify the `autodiff` to be `AutoFiniteDiff`
+
+```@example dual_error_faq
+sol = solve(prob_oop, LevenbergMarquardt(; autodiff = AutoFiniteDiff()); maxiters = 10000,
+    abstol = 1e-8)
+```
+
+This worked but, Finite Differencing is not the recommended approach in any scenario.
+Instead, rewrite the function to use
+[PreallocationTools.jl](https://github.com/SciML/PreallocationTools.jl) or write it as
+
+```@example dual_error_faq
+function fff_correct(var, p)
+    v_true = [1.0, 0.1, 2.0, 0.5]
+    xx = eltype(var)[1.0, 2.0, 3.0, 4.0]
+    xx[1] = var[1] - v_true[1]
+    return xx - v_true
+end
+
+prob_oop = NonlinearLeastSquaresProblem{false}(fff_correct, v_init)
+sol = solve(prob_oop, LevenbergMarquardt(); maxiters = 10000, abstol = 1e-8)
+```
diff --git a/docs/src/basics/NonlinearFunctions.md b/docs/src/basics/NonlinearFunctions.md
index e679a1cb3..f3e142ac5 100644
--- a/docs/src/basics/NonlinearFunctions.md
+++ b/docs/src/basics/NonlinearFunctions.md
@@ -1,10 +1,10 @@
 # [NonlinearFunctions and Jacobian Types](@id nonlinearfunctions)
 
 The SciML ecosystem provides an extensive interface for declaring extra functions
-associated with the differential equation's data. In traditional libraries, there
-is usually only one option: the Jacobian. However, we allow for a large array
-of pre-computed functions to speed up the calculations. This is offered via the
-`NonlinearFunction` types, which can be passed to the problems.
+associated with the differential equation's data. In traditional libraries, there is usually
+only one option: the Jacobian. However, we allow for a large array of pre-computed functions
+to speed up the calculations. This is offered via the `NonlinearFunction` types, which can
+be passed to the problems.
 
 ## Function Type Definitions
 
diff --git a/docs/src/basics/NonlinearProblem.md b/docs/src/basics/NonlinearProblem.md
index 11dc09772..23acf78b5 100644
--- a/docs/src/basics/NonlinearProblem.md
+++ b/docs/src/basics/NonlinearProblem.md
@@ -1,13 +1,17 @@
 # [Nonlinear Problems](@id problems)
 
-## The Three Types of Nonlinear Problems
+## The Four Types of Nonlinear Problems
 
-NonlinearSolve.jl tackles three related types of nonlinear systems:
+NonlinearSolve.jl tackles four related types of nonlinear systems:
 
- 1. Interval rootfinding problems. I.e., find the ``t \in [t_0, t_f]`` such that ``f(t) = 0``.
+ 1. Interval rootfinding problems. I.e., find the ``t \in [t_0, t_f]`` such that
+    ``f(t) = 0``.
  2. Systems of nonlinear equations, i.e., find the ``u`` such that ``f(u) = 0``.
  3. Steady state problems, i.e., find the ``u`` such that ``u' = f(u,t)`` has reached steady state,
     i.e., ``0 = f(u, ∞)``.
+ 4. The nonlinear least squares problem, which is an under/over-constrained nonlinear system
+    which might not be satisfiable, i.e. there may be no `u` such that `f(u) = 0`, and thus
+    we find the `u` which minimizes `||f(u)||` in the least squares sense.
 
 The first is for solving scalar rootfinding problems, i.e., finding a single number, and
 requires that a bracketing interval is known. For a bracketing interval, one must have that
@@ -16,9 +20,9 @@ interval.
 
 !!! note
     
-    Interval rootfinding problems allow for `f` to return an array, in which case the interval
-    rootfinding problem is interpreted as finding the first `t` such that any of the components
-    of the array hit zero.
+    Interval rootfinding problems allow for `f` to return an array, in which case the
+    interval rootfinding problem is interpreted as finding the first `t` such that any of
+    the components of the array hit zero.
 
 The second type of nonlinear system can be multidimensional, and thus no ordering nor
 boundaries are assumed to be known. For a system of nonlinear equations, `f` can return
@@ -43,4 +47,5 @@ ODE `u' = f(u,t)`.
 SciMLBase.IntervalNonlinearProblem
 SciMLBase.NonlinearProblem
 SciMLBase.SteadyStateProblem
+SciMLBase.NonlinearLeastSquaresProblem
 ```
diff --git a/docs/src/basics/NonlinearSolution.md b/docs/src/basics/NonlinearSolution.md
index da9886a3c..33cab954f 100644
--- a/docs/src/basics/NonlinearSolution.md
+++ b/docs/src/basics/NonlinearSolution.md
@@ -3,3 +3,14 @@
 ```@docs
 SciMLBase.NonlinearSolution
 ```
+
+## Return Code
+
+  - `ReturnCode.Success` - The nonlinear solve succeeded.
+  - `ReturnCode.ConvergenceFailure` - The nonlinear solve failed to converge due to stalling
+    or some limit of the solver was exceeded. For example, too many shrinks for trust
+    region methods, number of resets for Broyden, etc.
+  - `ReturnCode.Unstable` - This corresponds to
+    `NonlinearSafeTerminationReturnCode.ProtectiveTermination` and is caused if the step-size
+    of the solver was too large or the objective value became non-finite.
+  - `ReturnCode.MaxIters` - The maximum number of iterations was reached.
diff --git a/docs/src/basics/TerminationCondition.md b/docs/src/basics/TerminationCondition.md
index a4ff5272b..5351198ca 100644
--- a/docs/src/basics/TerminationCondition.md
+++ b/docs/src/basics/TerminationCondition.md
@@ -63,14 +63,3 @@ DiffEqBase.NonlinearSafeTerminationReturnCode.Failure
 DiffEqBase.NonlinearSafeTerminationReturnCode.PatienceTermination
 DiffEqBase.NonlinearSafeTerminationReturnCode.ProtectiveTermination
 ```
-
-## [Deprecated] Termination Condition API
-
-!!! warning
-    
-    This is deprecated. Currently only parts of `SimpleNonlinearSolve` uses this API. That
-    will also be phased out soon!
-
-```@docs
-NLSolveTerminationCondition
-```
diff --git a/docs/src/basics/solve.md b/docs/src/basics/solve.md
index 8e33ed933..0f205aba1 100644
--- a/docs/src/basics/solve.md
+++ b/docs/src/basics/solve.md
@@ -3,3 +3,31 @@
 ```@docs
 solve(prob::SciMLBase.NonlinearProblem, args...; kwargs...)
 ```
+
+## General Controls
+
+  - `alias_u0::Bool`: Whether to alias the initial condition or use a copy.
+    Defaults to `false`.
+  - `internal_norm::Function`: The norm used by the solver. Default depends on algorithm
+    choice.
+
+## Iteration Controls
+
+  - `maxiters::Int`: The maximum number of iterations to perform. Defaults to `1000`.
+  - `abstol::Number`: The absolute tolerance.
+  - `reltol::Number`: The relative tolerance.
+  - `termination_condition`: Termination Condition from DiffEqBase. Defaults to
+    `AbsSafeBestTerminationMode()` for `NonlinearSolve.jl` and `AbsTerminateMode()` for
+    `SimpleNonlinearSolve.jl`.
+
+## Tracing Controls
+
+These are exclusively available for native `NonlinearSolve.jl` solvers.
+
+  - `show_trace`: Must be `Val(true)` or `Val(false)`. This controls whether the trace is
+    displayed to the console. (Defaults to `Val(false)`)
+  - `trace_level`: Needs to be one of Trace Objects: [`TraceMinimal`](@ref),
+    [`TraceWithJacobianConditionNumber`](@ref), or [`TraceAll`](@ref). This controls the
+    level of detail of the trace. (Defaults to `TraceMinimal()`)
+  - `store_trace`: Must be `Val(true)` or `Val(false)`. This controls whether the trace is
+    stored in the solution object. (Defaults to `Val(false)`)
diff --git a/docs/src/index.md b/docs/src/index.md
index 3d7e97890..b35f09f3a 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -1,20 +1,19 @@
 # NonlinearSolve.jl: High-Performance Unified Nonlinear Solvers
 
-NonlinearSolve.jl is a unified interface for the nonlinear solving packages of
-Julia. The package includes its own high-performance nonlinear solvers which include the
-ability to swap out to fast direct and iterative linear solvers, along with the
-ability to use sparse automatic differentiation for Jacobian construction and
-Jacobian-vector products. NonlinearSolve.jl interfaces with other packages of the Julia ecosystem
-to make it easy to test alternative solver packages and pass small types to
-control algorithm swapping. It also interfaces with the
-[ModelingToolkit.jl](https://docs.sciml.ai/ModelingToolkit/stable/) world of symbolic
-modeling to allow for automatically generating high-performance code.
-
-Performance is key: the current methods are made to be highly performant on
-scalar and statically sized small problems, with options for large-scale systems.
-If you run into any performance issues, please file an issue.
-Consult the [NonlinearSystemSolvers](@ref nonlinearsystemsolvers) page for
-information on how to import solvers from different packages.
+NonlinearSolve.jl is a unified interface for the nonlinear solving packages of Julia. The
+package includes its own high-performance nonlinear solvers which include the ability to
+swap out to fast direct and iterative linear solvers, along with the ability to use sparse
+automatic differentiation for Jacobian construction and Jacobian-vector products.
+NonlinearSolve.jl interfaces with other packages of the Julia ecosystem to make it easy to
+test alternative solver packages and pass small types to control algorithm swapping. It also
+interfaces with the [ModelingToolkit.jl](https://docs.sciml.ai/ModelingToolkit/stable/)
+world of symbolic modeling to allow for automatically generating high-performance code.
+
+Performance is key: the current methods are made to be highly performant on scalar and
+statically sized small problems, with options for large-scale systems. If you run into any
+performance issues, please file an issue. Consult the
+[NonlinearSystemSolvers](@ref nonlinearsystemsolvers) page for information on how to import
+solvers from different packages.
 
 ## Installation
 
@@ -34,10 +33,8 @@ Pkg.add("NonlinearSolve")
   - See the [SciML Style Guide](https://github.com/SciML/SciMLStyle) for common coding practices and other style decisions.
   - There are a few community forums:
     
-      + The #diffeq-bridged and #sciml-bridged channels in the
-        [Julia Slack](https://julialang.org/slack/)
-      + The #diffeq-bridged and #sciml-bridged channels in the
-        [Julia Zulip](https://julialang.zulipchat.com/#narrow/stream/279055-sciml-bridged)
+      + The #diffeq-bridged and #sciml-bridged channels in the [Julia Slack](https://julialang.org/slack/)
+      + The #diffeq-bridged and #sciml-bridged channels in the [Julia Zulip](https://julialang.zulipchat.com/#narrow/stream/279055-sciml-bridged)
       + On the [Julia Discourse forums](https://discourse.julialang.org)
       + See also [SciML Community page](https://sciml.ai/community/)
 
diff --git a/docs/src/release_notes.md b/docs/src/release_notes.md
new file mode 100644
index 000000000..9d517267e
--- /dev/null
+++ b/docs/src/release_notes.md
@@ -0,0 +1,22 @@
+# Release Notes
+
+## Breaking Changes in `NonlinearSolve.jl` v3
+
+  - `GeneralBroyden` and `GeneralKlement` have been renamed to `Broyden` and `Klement`
+    respectively.
+  - Compat for `SimpleNonlinearSolve` has been bumped to `v1`.
+  - The old style of specifying autodiff with `chunksize`, `standardtag`, etc. has been
+    deprecated in favor of directly specifying the autodiff type, like `AutoForwardDiff`.
+
+## Breaking Changes in `SimpleNonlinearSolve.jl` v1
+
+  - Batched solvers have been removed in favor of `BatchedArrays.jl`. Stay tuned for detailed
+    tutorials on how to use `BatchedArrays.jl` with `NonlinearSolve` & `SimpleNonlinearSolve`
+    solvers.
+  - The old style of specifying autodiff with `chunksize`, `standardtag`, etc. has been
+    deprecated in favor of directly specifying the autodiff type, like `AutoForwardDiff`.
+  - `Broyden` and `Klement` have been renamed to `SimpleBroyden` and `SimpleKlement` to
+    avoid conflicts with `NonlinearSolve.jl`'s `GeneralBroyden` and `GeneralKlement`, which
+    will be renamed to `Broyden` and `Klement` in the future.
+  - `LBroyden` has been renamed to `SimpleLimitedMemoryBroyden` to make it consistent with
+    `NonlinearSolve.jl`'s `LimitedMemoryBroyden`.
diff --git a/docs/src/solvers/BracketingSolvers.md b/docs/src/solvers/BracketingSolvers.md
index 37184fbfe..af322af74 100644
--- a/docs/src/solvers/BracketingSolvers.md
+++ b/docs/src/solvers/BracketingSolvers.md
@@ -1,15 +1,25 @@
 # [Interval Rootfinding Methods (Bracketing Solvers)](@id bracketing)
 
-`solve(prob::IntervalNonlinearProblem,alg;kwargs)`
+`solve(prob::IntervalNonlinearProblem, alg; kwargs...)`
 
-Solves for ``f(t)=0`` in the problem defined by `prob` using the algorithm
-`alg`. If no algorithm is given, a default algorithm will be chosen.
+Solves for ``f(t) = 0`` in the problem defined by `prob` using the algorithm `alg`. If no
+algorithm is given, a default algorithm will be chosen.
 
 ## Recommended Methods
 
-`ITP()` is the recommended method for the scalar interval root-finding problems. It is particularly well-suited for cases where the function is smooth and well-behaved; and achieved superlinear convergence while retaining the optimal worst-case performance of the Bisection method. For more details, consult the detailed solver API docs.
-`Ridder` is a hybrid method that uses the value of function at the midpoint of the interval to perform an exponential interpolation to the root. This gives a fast convergence with a guaranteed convergence of at most twice the number of iterations as the bisection method.
-`Brent` is a combination of the bisection method, the secant method and inverse quadratic interpolation. At every iteration, Brent's method decides which method out of these three is likely to do best, and proceeds by doing a step according to that method. This gives a robust and fast method, which therefore enjoys considerable popularity.
+`ITP()` is the recommended method for the scalar interval root-finding problems. It is
+particularly well-suited for cases where the function is smooth and well-behaved; and
+achieved superlinear convergence while retaining the optimal worst-case performance of the
+Bisection method. For more details, consult the detailed solver API docs.
+
+`Ridder` is a hybrid method that uses the value of function at the midpoint of the interval
+to perform an exponential interpolation to the root. This gives a fast convergence with a
+guaranteed convergence of at most twice the number of iterations as the bisection method.
+
+`Brent` is a combination of the bisection method, the secant method and inverse quadratic
+interpolation. At every iteration, Brent's method decides which method out of these three is
+likely to do best, and proceeds by doing a step according to that method. This gives a
+robust and fast method, which therefore enjoys considerable popularity.
 
 ## Full List of Methods
 
diff --git a/docs/src/solvers/NonlinearLeastSquaresSolvers.md b/docs/src/solvers/NonlinearLeastSquaresSolvers.md
index 1690671d9..7adfd9508 100644
--- a/docs/src/solvers/NonlinearLeastSquaresSolvers.md
+++ b/docs/src/solvers/NonlinearLeastSquaresSolvers.md
@@ -7,20 +7,75 @@ Solves the nonlinear least squares problem defined by `prob` using the algorithm
 
 ## Recommended Methods
 
-The default method `FastShortcutNLLSPolyalg` is a good choice for most
-problems. It is a polyalgorithm that attempts to use a fast algorithm
-(`GaussNewton`) and if that fails it falls back to a more robust
-algorithm (`LevenbergMarquardt`).
+The default method `FastShortcutNLLSPolyalg` is a good choice for most problems. It is a
+polyalgorithm that attempts to use a fast algorithm (`GaussNewton`) and if that fails it
+falls back to a more robust algorithm (`LevenbergMarquardt`).
 
 ## Full List of Methods
 
+### NonlinearSolve.jl
+
   - `LevenbergMarquardt()`: An advanced Levenberg-Marquardt implementation with the
     improvements suggested in the [paper](https://arxiv.org/abs/1201.5885) "Improvements to
-    the Levenberg-Marquardt algorithm for nonlinear least-squares minimization". Designed for
-    large-scale and numerically-difficult nonlinear systems.
+    the Levenberg-Marquardt algorithm for nonlinear least-squares minimization". Designed
+    for large-scale and numerically-difficult nonlinear systems.
   - `GaussNewton()`: An advanced GaussNewton implementation with support for efficient
     handling of sparse matrices via colored automatic differentiation and preconditioned
-    linear solvers. Designed for large-scale and numerically-difficult nonlinear least squares
-    problems.
-  - `SimpleNewtonRaphson()`: Simple Gauss Newton Implementation with `QRFactorization` to
-    solve a linear least squares problem at each step!
+    linear solvers. Designed for large-scale and numerically-difficult nonlinear least
+    squares problems.
+
+### SimpleNonlinearSolve.jl
+
+These methods are included with NonlinearSolve.jl by default, though SimpleNonlinearSolve.jl
+can be used  directly to reduce dependencies and improve load times.
+SimpleNonlinearSolve.jl's methods excel at small problems and problems defined with static
+arrays.
+
+  - `SimpleGaussNewton()`: Simple Gauss Newton implementation using QR factorizations for
+    numerical stability.
+
+### FastLevenbergMarquardt.jl
+
+A wrapper over
+[FastLevenbergMarquardt.jl](https://github.com/kamesy/FastLevenbergMarquardt.jl). Note that
+it is called `FastLevenbergMarquardt` since the original package is called "Fast", though
+benchmarks demonstrate `LevenbergMarquardt()` usually outperforms.
+
+  - `FastLevenbergMarquardtJL(linsolve = :cholesky)`, can also choose `linsolve = :qr`.
+
+### LeastSquaresOptim.jl
+
+A wrapper over
+[LeastSquaresOptim.jl](https://github.com/matthieugomez/LeastSquaresOptim.jl). Has a core
+algorithm `LeastSquaresOptimJL(alg; linsolve)` where the choices for `alg` are:
+
+  - `:lm` a Levenberg-Marquardt implementation
+  - `:dogleg` a trust-region dogleg Gauss-Newton
+
+And the choices for `linsolve` are:
+
+  - `:qr`
+  - `:cholesky`
+  - `:lsmr` a conjugate gradient method (LSMR with diagonal preconditioner).
+
+### MINPACK.jl
+
+MINPACK.jl methods are fine for medium-sized nonlinear solves. They are the FORTRAN
+standard methods which are used in many places, such as SciPy. However, our benchmarks
+demonstrate that these methods are not robust or stable. In addition, they are slower
+than the standard methods and do not scale due to lack of sparse Jacobian support.
+Thus they are only recommended for benchmarking and testing code conversions.
+
+  - `CMINPACK()`: A wrapper for using the classic MINPACK method through [MINPACK.jl](https://github.com/sglyon/MINPACK.jl)
+
+Submethod choices for this algorithm include:
+
+  - `:hybr`: Modified version of Powell's algorithm.
+  - `:lm`: Levenberg-Marquardt.
+  - `:lmdif`: Advanced Levenberg-Marquardt
+  - `:hybrd`: Advanced modified version of Powell's algorithm
+
+### Optimization.jl
+
+`NonlinearLeastSquaresProblem`s can be converted into an `OptimizationProblem`  and used
+with any solver of [Optimization.jl](https://github.com/SciML/Optimization.jl).
diff --git a/docs/src/solvers/NonlinearSystemSolvers.md b/docs/src/solvers/NonlinearSystemSolvers.md
index 31e820083..776e58b9c 100644
--- a/docs/src/solvers/NonlinearSystemSolvers.md
+++ b/docs/src/solvers/NonlinearSystemSolvers.md
@@ -7,31 +7,27 @@ Solves for ``f(u)=0`` in the problem defined by `prob` using the algorithm
 
 ## Recommended Methods
 
-The default method `FastShortcutNonlinearPolyalg` is a good choice for most
-problems. It is a polyalgorithm that attempts to use a fast algorithm
-(Klement, Broyden) and if that fails it falls back to a more robust
-algorithm (`NewtonRaphson`) before falling back the most robust variant of
-`TrustRegion`. For basic problems this will be very fast, for harder problems
-it will make sure to work.
-
-If one is looking for more robustness then `RobustMultiNewton` is a good choice.
-It attempts a set of the most robust methods in succession and only fails if
-all of the methods fail to converge. Additionally, `DynamicSS` can be a good choice
-for high stability.
-
-As a balance, `NewtonRaphson` is a good choice for most problems that aren't too
-difficult yet need high performance, and  `TrustRegion` is a bit less performant
-but more stable. If the problem is well-conditioned, `Klement` or `Broyden`
-may be faster, but highly dependent on the eigenvalues of the Jacobian being
-sufficiently small.
-
-`NewtonRaphson` and `TrustRegion` are designed for for large systems.
-They can make use of sparsity patterns for sparse automatic differentiation
-and sparse linear solving of very large systems. Meanwhile,
-`SimpleNewtonRaphson` and `SimpleTrustRegion` are implementations which is specialized for
-small equations. They are non-allocating on static arrays and thus really well-optimized
-for small systems, thus usually outperforming the other methods when such types are
-used for `u0`.
+The default method `FastShortcutNonlinearPolyalg` is a good choice for most problems. It is
+a polyalgorithm that attempts to use a fast algorithm (Klement, Broyden) and if that fails
+it falls back to a more robust algorithm (`NewtonRaphson`) before falling back the most
+robust variant of `TrustRegion`. For basic problems this will be very fast, for harder
+problems it will make sure to work.
+
+If one is looking for more robustness then `RobustMultiNewton` is a good choice. It attempts
+a set of the most robust methods in succession and only fails if all of the methods fail to
+converge. Additionally, `DynamicSS` can be a good choice for high stability.
+
+As a balance, `NewtonRaphson` is a good choice for most problems that aren't too difficult
+yet need high performance, and  `TrustRegion` is a bit less performant but more stable. If
+the problem is well-conditioned, `Klement` or `Broyden` may be faster, but highly dependent
+on the eigenvalues of the Jacobian being sufficiently small.
+
+`NewtonRaphson` and `TrustRegion` are designed for for large systems. They can make use of
+sparsity patterns for sparse automatic differentiation and sparse linear solving of very
+large systems. Meanwhile, `SimpleNewtonRaphson` and `SimpleTrustRegion` are implementations
+which are specialized for small equations. They are non-allocating on static arrays and thus
+really well-optimized for small systems, thus usually outperforming the other methods when
+such types are used for `u0`.
 
 ## Full List of Methods
 
@@ -65,16 +61,16 @@ features, but have a bit of overhead on very small problems.
   - `FastShortcutNonlinearPolyalg()`: The default method. A polyalgorithm that mixes fast methods
     with fallbacks to robust methods to allow for solving easy problems quickly without sacrificing
     robustness on the hard problems.
-  - `GeneralBroyden()`: Generalization of Broyden's Quasi-Newton Method with Line Search and
+  - `Broyden()`: Generalization of Broyden's Quasi-Newton Method with Line Search and
     Automatic Jacobian Resetting. This is a fast method but unstable when the condition number of
     the Jacobian matrix is sufficiently large.
-  - `GeneralKlement()`: Generalization of Klement's Quasi-Newton Method with Line Search and
+  - `Klement()`: Generalization of Klement's Quasi-Newton Method with Line Search and
     Automatic Jacobian Resetting. This is a fast method but unstable when the condition number of
     the Jacobian matrix is sufficiently large.
   - `LimitedMemoryBroyden()`: An advanced version of `LBroyden` which uses a limited memory
     Broyden method. This is a fast method but unstable when the condition number of
-    the Jacobian matrix is sufficiently large. It is recommended to use `GeneralBroyden` or
-    `GeneralKlement` instead unless the memory usage is a concern.
+    the Jacobian matrix is sufficiently large. It is recommended to use `Broyden` or
+    `Klement` instead unless the memory usage is a concern.
 
 ### SimpleNonlinearSolve.jl
 
@@ -83,11 +79,11 @@ can be used directly to reduce dependencies and improve load times. SimpleNonlin
 methods excel at small problems and problems defined with static arrays.
 
   - `SimpleNewtonRaphson()`: A simplified implementation of the Newton-Raphson method.
-  - `Broyden()`: The classic Broyden's quasi-Newton method.
-  - `LBroyden()`: A low-memory Broyden implementation, similar to L-BFGS. This method is
+  - `SimpleBroyden()`: The classic Broyden's quasi-Newton method.
+  - `SimpleLimitedMemoryBroyden()`: A low-memory Broyden implementation, similar to L-BFGS. This method is
     common in machine learning contexts but is known to be unstable in comparison to many
     other choices.
-  - `Klement()`: A quasi-Newton method due to Klement. It's supposed to be more efficient
+  - `SimpleKlement()`: A quasi-Newton method due to Klement. It's supposed to be more efficient
     than Broyden's method, and it seems to be in the cases that have been tried, but more
     benchmarking is required.
   - `SimpleTrustRegion()`: A dogleg trust-region Newton method. Improved globalizing stability
@@ -110,14 +106,15 @@ SteadyStateDiffEq.jl uses ODE solvers to iteratively approach the steady state.
 very stable method for solving nonlinear systems, though often more
 computationally expensive than direct methods.
 
-  - `DynamicSS()` : Uses an ODE solver to find the steady state. Automatically
-    terminates when close to the steady state.
+  - `DynamicSS()`: Uses an ODE solver to find the steady state. Automatically terminates when
+    close to the steady state.
+  - `SSRootfind()`: Uses a NonlinearSolve compatible solver to find the steady state.
 
-### SciMLNLSolve.jl
+### NLsolve.jl
 
 This is a wrapper package for importing solvers from NLsolve.jl into the SciML interface.
 
-  - `NLSolveJL()`: A wrapper for [NLsolve.jl](https://github.com/JuliaNLSolvers/NLsolve.jl)
+  - `NLsolveJL()`: A wrapper for [NLsolve.jl](https://github.com/JuliaNLSolvers/NLsolve.jl)
 
 Submethod choices for this algorithm include:
 
diff --git a/docs/src/solvers/SteadyStateSolvers.md b/docs/src/solvers/SteadyStateSolvers.md
index af7b2bc06..b1a57a2c0 100644
--- a/docs/src/solvers/SteadyStateSolvers.md
+++ b/docs/src/solvers/SteadyStateSolvers.md
@@ -9,9 +9,9 @@ Solves for the steady states in the problem defined by `prob` using the algorith
 
 Conversion to a NonlinearProblem is generally the fastest method. However, this will not
 guarantee the preferred root, and thus if the preferred root is required, then it's
-recommended that one uses `DynamicSS`. For `DynamicSS`, often an adaptive stiff
-solver, like a Rosenbrock or BDF method (`Rodas5` or `QNDF`), is a good way to allow for
-very large time steps as the steady state approaches.
+recommended that one uses `DynamicSS`. For `DynamicSS`, often an adaptive stiff solver,
+like a Rosenbrock or BDF method (`Rodas5` or `QNDF`), is a good way to allow for very large
+time steps as the steady state approaches.
 
 !!! note
     
@@ -28,7 +28,11 @@ very large time steps as the steady state approaches.
 
 Any `SteadyStateProblem` can be trivially converted to a `NonlinearProblem` via
 `NonlinearProblem(prob::SteadyStateProblem)`. Using this approach, any of the solvers from
-the [Nonlinear System Solvers page](@ref nonlinearsystemsolvers) can be used.
+the [Nonlinear System Solvers page](@ref nonlinearsystemsolvers) can be used. As a
+convenience, users can use:
+
+  - `SSRootfind`: A wrapper around `NonlinearSolve.jl` compliant solvers which converts
+    the `SteadyStateProblem` to a `NonlinearProblem` and solves it.
 
 ### SteadyStateDiffEq.jl
 
@@ -36,16 +40,13 @@ SteadyStateDiffEq.jl uses ODE solvers to iteratively approach the steady state.
 very stable method for solving nonlinear systems,
 though often computationally more expensive than direct methods.
 
-  - `DynamicSS` : Uses an ODE solver to find the steady state. Automatically
-    terminates when close to the steady state.
-    `DynamicSS(alg;abstol=1e-8,reltol=1e-6,tspan=Inf)` requires that an
-    ODE algorithm is given as the first argument.  The absolute and
-    relative tolerances specify the termination conditions on the
-    derivative's closeness to zero.  This internally uses the
-    `TerminateSteadyState` callback from the Callback Library.  The
-    simulated time, for which the ODE is solved, can be limited by
-    `tspan`.  If `tspan` is a number, it is equivalent to passing
-    `(zero(tspan), tspan)`.
+  - `DynamicSS` : Uses an ODE solver to find the steady state. Automatically terminates
+    when close to the steady state. `DynamicSS(alg; tspan=Inf)` requires that an ODE
+    algorithm is given as the first argument. The absolute and relative tolerances specify
+    the termination conditions on the derivative's closeness to zero. This internally
+    uses the `TerminateSteadyState` callback from the Callback Library. The simulated time,
+    for which the ODE is solved, can be limited by `tspan`.  If `tspan` is a number, it is
+    equivalent to passing `(zero(tspan), tspan)`.
 
 Example usage:
 
@@ -59,4 +60,4 @@ sol = solve(prob, DynamicSS(CVODE_BDF()), dt = 1.0)
 
 !!! note
     
-    If you use `CVODE_BDF` you may need to give a starting `dt` via `dt=....`.*
+    If you use `CVODE_BDF` you may need to give a starting `dt` via `dt=....`.
diff --git a/docs/src/tutorials/code_optimization.md b/docs/src/tutorials/code_optimization.md
index cd4089d5d..1bfc1c302 100644
--- a/docs/src/tutorials/code_optimization.md
+++ b/docs/src/tutorials/code_optimization.md
@@ -2,19 +2,18 @@
 
 ## General Code Optimization in Julia
 
-Before starting this tutorial, we recommend the reader to check out one of the
-many tutorials for optimization Julia code. The following is an incomplete
-list:
+Before starting this tutorial, we recommend the reader to check out one of the many
+tutorials for optimization Julia code. The following is an incomplete list:
 
   - [The Julia Performance Tips](https://docs.julialang.org/en/v1/manual/performance-tips/)
   - [MIT 18.337 Course Notes on Optimizing Serial Code](https://mitmath.github.io/18337/lecture2/optimizing)
   - [What scientists must know about hardware to write fast code](https://viralinstruction.com/posts/hardware/)
 
-User-side optimizations are important because, for sufficiently difficult problems,
-most time will be spent inside your `f` function, the function you are
-trying to solve. “Efficient solvers" are those that reduce the required
-number of `f` calls to hit the error tolerance. The main ideas for optimizing
-your nonlinear solver code, or any Julia function, are the following:
+User-side optimizations are important because, for sufficiently difficult problems, most
+time will be spent inside your `f` function, the function you are trying to solve.
+“Efficient solvers" are those that reduce the required number of `f` calls to hit the error
+tolerance. The main ideas for optimizing your nonlinear solver code, or any Julia function,
+are the following:
 
   - Make it non-allocating
   - Use StaticArrays for small arrays
@@ -24,8 +23,8 @@ your nonlinear solver code, or any Julia function, are the following:
   - Make use of BLAS calls
   - Optimize algorithm choice
 
-We'll discuss these strategies in the context of nonlinear solvers.
-Let's start with small systems.
+We'll discuss these strategies in the context of nonlinear solvers. Let's start with small
+systems.
 
 ## Optimizing Nonlinear Solver Code for Small Systems
 
@@ -48,7 +47,7 @@ We can use BenchmarkTools.jl to get more precise timings:
 ```@example small_opt
 using BenchmarkTools
 
-@btime solve(prob, NewtonRaphson())
+@benchmark solve(prob, NewtonRaphson())
 ```
 
 Note that this way of writing the function is a shorthand for:
@@ -59,14 +58,14 @@ function f(u, p)
 end
 ```
 
-where the function `f` returns an array. This is a common pattern from things like MATLAB's `fzero`
-or SciPy's `scipy.optimize.fsolve`. However, by design it's very slow. I nthe benchmark you can see
-that there are many allocations. These allocations cause the memory allocator to take more time
-than the actual numerics itself, which is one of the reasons why codes from MATLAB and Python end up
-slow.
+where the function `f` returns an array. This is a common pattern from things like MATLAB's
+`fzero` or SciPy's `scipy.optimize.fsolve`. However, by design it's very slow. In the
+benchmark you can see that there are many allocations. These allocations cause the memory
+allocator to take more time than the actual numerics itself, which is one of the reasons why
+codes from MATLAB and Python end up slow.
 
-In order to avoid this issue, we can use a non-allocating "in-place" approach. Written out by hand,
-this looks like:
+In order to avoid this issue, we can use a non-allocating "in-place" approach. Written out
+by hand, this looks like:
 
 ```@example small_opt
 function f(du, u, p)
@@ -76,7 +75,7 @@ function f(du, u, p)
 end
 
 prob = NonlinearProblem(f, u0, p)
-@btime sol = solve(prob, NewtonRaphson())
+@benchmark sol = solve(prob, NewtonRaphson())
 ```
 
 Notice how much faster this already runs! We can make this code even simpler by using
@@ -87,33 +86,31 @@ function f(du, u, p)
     du .= u .* u .- p
 end
 
-@btime sol = solve(prob, NewtonRaphson())
+@benchmark sol = solve(prob, NewtonRaphson())
 ```
 
 ## Further Optimizations for Small Nonlinear Solves with Static Arrays and SimpleNonlinearSolve
 
-Allocations are only expensive if they are “heap allocations”. For a more
-in-depth definition of heap allocations,
+Allocations are only expensive if they are “heap allocations”. For a more in-depth
+definition of heap allocations,
 [there are many sources online](http://net-informations.com/faq/net/stack-heap.htm).
-But a good working definition is that heap allocations are variable-sized slabs
-of memory which have to be pointed to, and this pointer indirection costs time.
-Additionally, the heap has to be managed, and the garbage controllers has to
-actively keep track of what's on the heap.
-
-However, there's an alternative to heap allocations, known as stack allocations.
-The stack is statically-sized (known at compile time) and thus its accesses are
-quick. Additionally, the exact block of memory is known in advance by the
-compiler, and thus re-using the memory is cheap. This means that allocating on
-the stack has essentially no cost!
-
-Arrays have to be heap allocated because their size (and thus the amount of
-memory they take up) is determined at runtime. But there are structures in
-Julia which are stack-allocated. `struct`s for example are stack-allocated
-“value-type”s. `Tuple`s are a stack-allocated collection. The most useful data
-structure for NonlinearSolve though is the `StaticArray` from the package
-[StaticArrays.jl](https://github.com/JuliaArrays/StaticArrays.jl). These arrays
-have their length determined at compile-time. They are created using macros
-attached to normal array expressions, for example:
+But a good working definition is that heap allocations are variable-sized slabs of memory
+which have to be pointed to, and this pointer indirection costs time. Additionally, the heap
+has to be managed, and the garbage controllers has to actively keep track of what's on the
+heap.
+
+However, there's an alternative to heap allocations, known as stack allocations. The stack
+is statically-sized (known at compile time) and thus its accesses are quick. Additionally,
+the exact block of memory is known in advance by the compiler, and thus re-using the memory
+is cheap. This means that allocating on the stack has essentially no cost!
+
+Arrays have to be heap allocated because their size (and thus the amount of memory they take
+up) is determined at runtime. But there are structures in Julia which are stack-allocated.
+`struct`s for example are stack-allocated “value-type”s. `Tuple`s are a stack-allocated
+collection. The most useful data structure for NonlinearSolve though is the `StaticArray`
+from the package [StaticArrays.jl](https://github.com/JuliaArrays/StaticArrays.jl). These
+arrays have their length determined at compile-time. They are created using macros attached
+to normal array expressions, for example:
 
 ```@example small_opt
 using StaticArrays
@@ -121,23 +118,21 @@ A = SA[2.0, 3.0, 5.0]
 typeof(A) # SVector{3, Float64} (alias for SArray{Tuple{3}, Float64, 1, 3})
 ```
 
-Notice that the `3` after `SVector` gives the size of the `SVector`. It cannot
-be changed. Additionally, `SVector`s are immutable, so we have to create a new
-`SVector` to change values. But remember, we don't have to worry about
-allocations because this data structure is stack-allocated. `SArray`s have
-numerous extra optimizations as well: they have fast matrix multiplication,
-fast QR factorizations, etc. which directly make use of the information about
-the size of the array. Thus, when possible, they should be used.
+Notice that the `3` after `SVector` gives the size of the `SVector`. It cannot be changed.
+Additionally, `SVector`s are immutable, so we have to create a new `SVector` to change
+values. But remember, we don't have to worry about allocations because this data structure
+is stack-allocated. `SArray`s have numerous extra optimizations as well: they have fast
+matrix multiplication, fast QR factorizations, etc. which directly make use of the
+information about the size of the array. Thus, when possible, they should be used.
 
-Unfortunately, static arrays can only be used for sufficiently small arrays.
-After a certain size, they are forced to heap allocate after some instructions
-and their compile time balloons. Thus, static arrays shouldn't be used if your
-system has more than ~20 variables. Additionally, only the native Julia
-algorithms can fully utilize static arrays.
+Unfortunately, static arrays can only be used for sufficiently small arrays. After a certain
+size, they are forced to heap allocate after some instructions and their compile time
+balloons. Thus, static arrays shouldn't be used if your system has more than ~20 variables.
+Additionally, only the native Julia algorithms can fully utilize static arrays.
 
 Let's ***optimize our nonlinear solve using static arrays***. Note that in this case, we
-want to use the out-of-place allocating form, but this time we want to output
-a static array. Doing it with broadcasting looks like:
+want to use the out-of-place allocating form, but this time we want to output a static
+array. Doing it with broadcasting looks like:
 
 ```@example small_opt
 function f_SA(u, p)
@@ -146,28 +141,29 @@ end
 u0 = SA[1.0, 1.0]
 p = 2.0
 prob = NonlinearProblem(f_SA, u0, p)
-@btime solve(prob, NewtonRaphson())
+@benchmark solve(prob, NewtonRaphson())
 ```
 
-Note that only change here is that `u0` is made into a StaticArray! If we needed to write `f` out
-for a more complex nonlinear case, then we'd simply do the following:
+Note that only change here is that `u0` is made into a StaticArray! If we needed to write
+`f` out for a more complex nonlinear case, then we'd simply do the following:
 
 ```@example small_opt
 function f_SA(u, p)
     SA[u[1] * u[1] - p, u[2] * u[2] - p]
 end
 
-@btime solve(prob, NewtonRaphson())
+@benchmark solve(prob, NewtonRaphson())
 ```
 
-However, notice that this did not give us a speedup but rather a slowdown. This is because many of the
-methods in NonlinearSolve.jl are designed to scale to larger problems, and thus aggressively do things
-like caching which is good for large problems but not good for these smaller problems and static arrays.
-In order to see the full benefit, we need to move to one of the methods from SimpleNonlinearSolve.jl
-which are designed for these small-scale static examples. Let's now use `SimpleNewtonRaphson`:
+However, notice that this did not give us a speedup but rather a slowdown. This is because
+many of the methods in NonlinearSolve.jl are designed to scale to larger problems, and thus
+aggressively do things like caching which is good for large problems but not good for these
+smaller problems and static arrays. In order to see the full benefit, we need to move to one
+of the methods from SimpleNonlinearSolve.jl which are designed for these small-scale static
+examples. Let's now use `SimpleNewtonRaphson`:
 
 ```@example small_opt
-@btime solve(prob, SimpleNewtonRaphson())
+@benchmark solve(prob, SimpleNewtonRaphson())
 ```
 
-And there we go, around 100ns from our starting point of almost 6μs!
+And there we go, around `40ns` from our starting point of almost `4μs`!
diff --git a/docs/src/tutorials/getting_started.md b/docs/src/tutorials/getting_started.md
index a5cc8786e..0078aaa16 100644
--- a/docs/src/tutorials/getting_started.md
+++ b/docs/src/tutorials/getting_started.md
@@ -1,36 +1,34 @@
 # Getting Started with Nonlinear Rootfinding in Julia
 
-NonlinearSolve.jl is a system for solving rootfinding problems, i.e. finding
-the value $$u$$ such that $$f(u) = 0$$. In this tutorial we will go through
-the basics of NonlinearSolve.jl, demonstrating the core ideas and leading you
-to understanding the deeper parts of the documentation.
-
-## The Three Types of Nonlinear Systems
-
-There are three types of nonlinear systems:
-
- 1. The "standard nonlinear system", i.e. the `NonlinearProblem`. This is a
-    system of equations with an initial condition where you want to satisfy
-    all equations simultaneously.
- 2. The "interval rootfinding problem", i.e. the `IntervalNonlinearProblem`.
-    This is the case where you're given an interval `[a,b]` and need to find
-    where `f(u) = 0` for `u` inside the bounds.
- 3. The "steady state problem", i.e. find the `u` such that `u' = f(u) = 0`.
-    While related to (1), it's not entirely the same because there's a uniquely
-    defined privileged root.
- 4. The nonlinear least squares problem, which is an overconstrained nonlinear
-    system (i.e. more equations than states) which might not be satisfiable, i.e.
-    there may be no `u` such that `f(u) = 0`, and thus we find the `u` which
-    minimizes `||f(u)||` in the least squares sense.
-
-For now let's focus on the first two. The other two are covered in later tutorials,
-but from the first two we can show the general flow of the NonlinearSolve.jl package.
+NonlinearSolve.jl is a system for solving rootfinding problems, i.e. finding the value $$u$$
+such that $$f(u) = 0$$. In this tutorial we will go through the basics of NonlinearSolve.jl,
+demonstrating the core ideas and leading you to understanding the deeper parts of the
+documentation.
+
+## The Four Types of Nonlinear Systems
+
+There are four types of nonlinear systems:
+
+ 1. The "standard nonlinear system", i.e. the `NonlinearProblem`. This is a system of
+    equations with an initial condition where you want to satisfy all equations
+    simultaneously.
+ 2. The "interval rootfinding problem", i.e. the `IntervalNonlinearProblem`. This is the
+    case where you're given an interval `[a,b]` and need to find where `f(u) = 0` for `u`
+    inside the bounds.
+ 3. The "steady state problem", i.e. find the `u` such that `u' = f(u) = 0`. While related
+    to (1), it's not entirely the same because there's a uniquely defined privileged root.
+ 4. The nonlinear least squares problem, which is an under/over-constrained nonlinear system
+    which might not be satisfiable, i.e. there may be no `u` such that `f(u) = 0`, and thus
+    we find the `u` which minimizes `||f(u)||` in the least squares sense.
+
+One important distinction is that (1) and (3) require the input and output sizes to be the
+same, while (4) does not.
 
 ## Problem Type 1: Solving Nonlinear Systems of Equations
 
-A nonlinear system $$f(u) = 0$$ is specified by defining a function `f(u,p)`,
-where `p` are the parameters of the system. For example, the following solves
-the vector equation $$f(u) = u^2 - p$$ for a vector of equations:
+A nonlinear system $$f(u) = 0$$ is specified by defining a function `f(u,p)`, where `p` are
+the parameters of the system. For example, the following solves the vector
+equation $$f(u) = u^2 - p$$ for a vector of equations:
 
 ```@example 1
 using NonlinearSolve
@@ -42,15 +40,15 @@ prob = NonlinearProblem(f, u0, p)
 sol = solve(prob)
 ```
 
-where `u0` is the initial condition for the rootfinder. Native NonlinearSolve.jl
-solvers use the given type of `u0` to determine the type used within the solver
-and the return. Note that the parameters `p` can be any type, but most are an
-AbstractArray for automatic differentiation.
+where `u0` is the initial condition for the rootfinder. Native NonlinearSolve.jl solvers use
+the given type of `u0` to determine the type used within the solver and the return. Note
+that the parameters `p` can be any type, but most are an AbstractArray for automatic
+differentiation.
 
 ### Investigating the Solution
 
-To investigate the solution, one can look at the elements of the `NonlinearSolution`.
-The most important value is `sol.u`: this is the `u` that satisfies `f(u) = 0`. For example:
+To investigate the solution, one can look at the elements of the `NonlinearSolution`. The
+most important value is `sol.u`: this is the `u` that satisfies `f(u) = 0`. For example:
 
 ```@example 1
 u = sol.u
@@ -60,63 +58,70 @@ u = sol.u
 f(u, p)
 ```
 
-This final value, the difference of the solution against zero, can also be found with `sol.resid`:
+This final value, the difference of the solution against zero, can also be found with
+`sol.resid`:
 
 ```@example 1
 sol.resid
 ```
 
-To know if the solution converged, or why the solution had not converged we can check the return
-code (`retcode`):
+To know if the solution converged, or why the solution had not converged we can check the
+return code (`retcode`):
 
 ```@example 1
 sol.retcode
 ```
 
-There are multiple return codes which can mean the solve was successful, and thus we can use the
-general command `SciMLBase.successful_retcode` to check whether the solution process exited as
-intended:
+There are multiple return codes which can mean the solve was successful, and thus we can use
+the general command `SciMLBase.successful_retcode` to check whether the solution process
+exited as intended:
 
 ```@example 1
 SciMLBase.successful_retcode(sol)
 ```
 
-If we're curious about what it took to solve this equation, then you're in luck because all of the
-details can be found in `sol.stats`:
+If we're curious about what it took to solve this equation, then you're in luck because all
+of the details can be found in `sol.stats`:
 
 ```@example 1
 sol.stats
 ```
 
-For more information on `NonlinearSolution`s, see the [`NonlinearSolution` manual page](@ref solution).
+For more information on `NonlinearSolution`s, see the
+[`NonlinearSolution` manual page](@ref solution).
 
 ### Interacting with the Solver Options
 
-While `sol = solve(prob)` worked for our case here, in many situations you may need to interact more
-deeply with how the solving process is done. First things first, you can specify the solver using the
-positional arguments. For example, let's set the solver to `TrustRegion()`:
+While `sol = solve(prob)` worked for our case here, in many situations you may need to
+interact more deeply with how the solving process is done. First things first, you can
+specify the solver using the positional arguments. For example, let's set the solver to
+`TrustRegion()`:
 
 ```@example 1
 solve(prob, TrustRegion())
 ```
 
-For a complete list of solver choices, see [the nonlinear system solvers page](@ref nonlinearsystemsolvers).
+For a complete list of solver choices, see
+[the nonlinear system solvers page](@ref nonlinearsystemsolvers).
 
-Next we can modify the tolerances. Here let's set some really low tolerances to force a tight solution:
+Next we can modify the tolerances. Here let's set some really low tolerances to force a
+tight solution:
 
 ```@example 1
 solve(prob, TrustRegion(), reltol = 1e-12, abstol = 1e-12)
 ```
 
-There are many more options for doing this configuring. Specifically for handling termination conditions,
-see the [Termination Conditions](@ref termination_condition) page for more details. And for more details on
-all of the available keyword arguments, see the [solver options](@ref solver_options) page.
+There are many more options for doing this configuring. Specifically for handling
+termination conditions, see the [Termination Conditions](@ref termination_condition) page
+for more details. And for more details on all of the available keyword arguments, see the
+[solver options](@ref solver_options) page.
 
 ## Problem Type 2: Solving Interval Rootfinding Problems with Bracketing Methods
 
-For scalar rootfinding problems, bracketing methods exist in NonlinearSolve. The difference with bracketing
-methods is that with bracketing methods, instead of giving a `u0` initial condition, you pass a `uspan (a,b)`
-bracket in which the zero is expected to live. For example:
+For scalar rootfinding problems, bracketing methods exist in NonlinearSolve. The difference
+with bracketing methods is that with bracketing methods, instead of giving a `u0` initial
+condition, you pass a `uspan (a,b)` bracket in which the zero is expected to live. For
+example:
 
 ```@example 1
 using NonlinearSolve
@@ -126,14 +131,67 @@ prob_int = IntervalNonlinearProblem(f, uspan)
 sol = solve(prob_int)
 ```
 
-All of the same option handling from before works just as before, now just with different solver choices
-(see the [bracketing solvers](@ref bracketing) page for more details). For example, let's set the solver
-to `ITP()` and set a high absolute tolerance:
+All of the same option handling from before works just as before, now just with different
+solver choices (see the [bracketing solvers](@ref bracketing) page for more details). For
+example, let's set the solver to `ITP()` and set a high absolute tolerance:
 
 ```@example 1
 sol = solve(prob_int, ITP(), abstol = 0.01)
 ```
 
+## Problem Type 3: Solving Steady State Problems
+
+For Steady State Problems, we have a wrapper package
+[SteadyStateDiffEq.jl](https://github.com/SciML/SteadyStateDiffEq.jl). This package
+automates handling SteadyStateProblems with NonlinearSolve and OrdinaryDiffEq.
+
+```@example 1
+using NonlinearSolve, SteadyStateDiffEq
+
+f(u, p, t) = [2 - 2u[1]; u[1] - 4u[2]]
+u0 = [0.0, 0.0]
+prob = SteadyStateProblem(f, u0)
+
+solve(prob, SSRootfind())
+```
+
+If you don't provide a nonlinear solver to `SSRootfind` it uses a polyalgorithm from
+NonlinearSolve. We can also provide the actual nonlinear solver to use:
+
+```@example 1
+solve(prob, SSRootfind(Broyden()))
+```
+
+## Problem Type 4: Solving Nonlinear Least Squares Problems
+
+```@example 1
+using NonlinearSolve
+
+function nlls!(du, u, p)
+    du[1] = 2u[1] - 2
+    du[2] = u[1] - 4u[2]
+    du[3] = 0
+end
+```
+
+Note that here the output array is of length `3` while the input array is of length `2`. We
+need to provide the `resid_prototype` to tell the solver what the output size is (this can
+be skipped for out of place problems):
+
+```@example 1
+u0 = [0.0, 0.0]
+prob = NonlinearLeastSquaresProblem(NonlinearFunction(nlls!, resid_prototype = zeros(3)),
+    u0)
+
+solve(prob)
+```
+
+Same as before, we can change the solver and tolerances:
+
+```@example 1
+solve(prob, GaussNewton(), reltol = 1e-12, abstol = 1e-12)
+```
+
 ## Going Beyond the Basics: How to use the Documentation
 
 Congrats, you now know how to use the basics of NonlinearSolve.jl! However, there is so much more to
@@ -143,6 +201,6 @@ see. Next check out:
   - [An iterator interface which lets you step through the solving process step by step](@ref iterator)
   - [How to handle large systems of equations efficiently](@ref large_systems)
   - [Ways to use NonlinearSolve.jl that is faster to startup and can statically compile](@ref fast_startup)
-  - [More detailed termination conditions](@ref termination_conditions_tutorial)
+  - [More detailed termination conditions](@ref termination_condition)
 
 And also check out the rest of the manual.
diff --git a/docs/src/tutorials/large_systems.md b/docs/src/tutorials/large_systems.md
index 8e53d193c..5748cb351 100644
--- a/docs/src/tutorials/large_systems.md
+++ b/docs/src/tutorials/large_systems.md
@@ -1,9 +1,11 @@
 # [Efficiently Solving Large Sparse Ill-Conditioned Nonlinear Systems in Julia](@id large_systems)
 
-This tutorial is for getting into the extra features of using NonlinearSolve.jl. Solving ill-conditioned nonlinear systems
-requires specializing the linear solver on properties of the Jacobian in order to cut down on the ``\mathcal{O}(n^3)``
-linear solve and the ``\mathcal{O}(n^2)`` back-solves. This tutorial is designed to explain the advanced usage of
-NonlinearSolve.jl by solving the steady state stiff Brusselator partial differential equation (BRUSS) using NonlinearSolve.jl.
+This tutorial is for getting into the extra features of using NonlinearSolve.jl. Solving
+ill-conditioned nonlinear systems requires specializing the linear solver on properties of
+the Jacobian in order to cut down on the ``\mathcal{O}(n^3)`` linear solve and the
+``\mathcal{O}(n^2)`` back-solves. This tutorial is designed to explain the advanced usage of
+NonlinearSolve.jl by solving the steady state stiff Brusselator partial differential
+equation (BRUSS) using NonlinearSolve.jl.
 
 ## Definition of the Brusselator Equation
 
@@ -47,13 +49,13 @@ u(x,y+1,t) &= u(x,y,t)
 \end{align}
 ```
 
-To solve this PDE, we will discretize it into a system of ODEs with the finite
-difference method. We discretize `u` and `v` into arrays of the values at each
-time point: `u[i,j] = u(i*dx,j*dy)` for some choice of `dx`/`dy`, and same for
-`v`. Then our ODE is defined with `U[i,j,k] = [u v]`. The second derivative
-operator, the Laplacian, discretizes to become a tridiagonal matrix with
-`[1 -2 1]` and a `1` in the top right and bottom left corners. The nonlinear functions
-are then applied at each point in space (they are broadcast). Use `dx=dy=1/32`.
+To solve this PDE, we will discretize it into a system of ODEs with the finite difference
+method. We discretize `u` and `v` into arrays of the values at each time point:
+`u[i,j] = u(i*dx,j*dy)` for some choice of `dx`/`dy`, and same for `v`. Then our ODE is
+defined with `U[i,j,k] = [u v]`. The second derivative operator, the Laplacian, discretizes
+to become a tridiagonal matrix with `[1 -2 1]` and a `1` in the top right and bottom left
+corners. The nonlinear functions are then applied at each point in space (they are
+broadcast). Use `dx=dy=1/32`.
 
 The resulting `NonlinearProblem` definition is:
 
@@ -100,18 +102,17 @@ prob_brusselator_2d = NonlinearProblem(brusselator_2d_loop, u0, p)
 
 ## Choosing Jacobian Types
 
-When we are solving this nonlinear problem, the Jacobian must be built at many
-iterations, and this can be one of the most
-expensive steps. There are two pieces that must be optimized in order to reach
-maximal efficiency when solving stiff equations: the sparsity pattern and the
-construction of the Jacobian. The construction is filling the matrix
-`J` with values, while the sparsity pattern is what `J` to use.
+When we are solving this nonlinear problem, the Jacobian must be built at many iterations,
+and this can be one of the most expensive steps. There are two pieces that must be optimized
+in order to reach maximal efficiency when solving stiff equations: the sparsity pattern and
+the construction of the Jacobian. The construction is filling the matrix `J` with values,
+while the sparsity pattern is what `J` to use.
 
-The sparsity pattern is given by a prototype matrix, the `jac_prototype`, which
-will be copied to be used as `J`. The default is for `J` to be a `Matrix`,
-i.e. a dense matrix. However, if you know the sparsity of your problem, then
-you can pass a different matrix type. For example, a `SparseMatrixCSC` will
-give a sparse matrix. Other sparse matrix types include:
+The sparsity pattern is given by a prototype matrix, the `jac_prototype`, which will be
+copied to be used as `J`. The default is for `J` to be a `Matrix`, i.e. a dense matrix.
+However, if you know the sparsity of your problem, then you can pass a different matrix
+type. For example, a `SparseMatrixCSC` will give a sparse matrix. Other sparse matrix types
+include:
 
   - Bidiagonal
   - Tridiagonal
@@ -122,16 +123,15 @@ give a sparse matrix. Other sparse matrix types include:
 ## Declaring a Sparse Jacobian with Automatic Sparsity Detection
 
 Jacobian sparsity is declared by the `jac_prototype` argument in the `NonlinearFunction`.
-Note that you should only do this if the sparsity is high, for example, 0.1%
-of the matrix is non-zeros, otherwise the overhead of sparse matrices can be higher
-than the gains from sparse differentiation!
+Note that you should only do this if the sparsity is high, for example, 0.1% of the matrix
+is non-zeros, otherwise the overhead of sparse matrices can be higher than the gains from
+sparse differentiation!
 
 One of the useful companion tools for NonlinearSolve.jl is
-[Symbolics.jl](https://github.com/JuliaSymbolics/Symbolics.jl).
-This allows for automatic declaration of Jacobian sparsity types. To see this
-in action, we can give an example `du` and `u` and call `jacobian_sparsity`
-on our function with the example arguments, and it will kick out a sparse matrix
-with our pattern, that we can turn into our `jac_prototype`.
+[Symbolics.jl](https://github.com/JuliaSymbolics/Symbolics.jl). This allows for automatic
+declaration of Jacobian sparsity types. To see this in action, we can give an example `du`
+and `u` and call `jacobian_sparsity` on our function with the example arguments, and it will
+kick out a sparse matrix with our pattern, that we can turn into our `jac_prototype`.
 
 ```@example ill_conditioned_nlprob
 using Symbolics
@@ -140,12 +140,11 @@ jac_sparsity = Symbolics.jacobian_sparsity((du, u) -> brusselator_2d_loop(du, u,
     du0, u0)
 ```
 
-Notice that Julia gives a nice print out of the sparsity pattern. That's neat, and
-would be tedious to build by hand! Now we just pass it to the `NonlinearFunction`
-like as before:
+Notice that Julia gives a nice print out of the sparsity pattern. That's neat, and would be
+tedious to build by hand! Now we just pass it to the `NonlinearFunction` like as before:
 
 ```@example ill_conditioned_nlprob
-ff = NonlinearFunction(brusselator_2d_loop; jac_prototype = float.(jac_sparsity))
+ff = NonlinearFunction(brusselator_2d_loop; sparsity = jac_sparsity)
 ```
 
 Build the `NonlinearProblem`:
@@ -170,37 +169,36 @@ or `NewtonRaphson(linsolve = UMFPACKFactorization())`
 
 ## Using Jacobian-Free Newton-Krylov
 
-A completely different way to optimize the linear solvers for large sparse
-matrices is to use a Krylov subspace method. This requires choosing a linear
-solver for changing to a Krylov method. To swap the linear solver out, we use
-the `linsolve` command and choose the GMRES linear solver.
+A completely different way to optimize the linear solvers for large sparse matrices is to
+use a Krylov subspace method. This requires choosing a linear solver for changing to a
+Krylov method. To swap the linear solver out, we use the `linsolve` command and choose the
+GMRES linear solver.
 
 ```@example ill_conditioned_nlprob
 @btime solve(prob_brusselator_2d, NewtonRaphson(linsolve = KrylovJL_GMRES()));
 nothing # hide
 ```
 
-Notice that this acceleration does not require the definition of a sparsity
-pattern, and can thus be an easier way to scale for large problems. For more
-information on linear solver choices, see the
+Notice that this acceleration does not require the definition of a sparsity pattern, and can
+thus be an easier way to scale for large problems. For more information on linear solver
+choices, see the
 [linear solver documentation](https://docs.sciml.ai/DiffEqDocs/stable/features/linear_nonlinear/#linear_nonlinear).
 `linsolve` choices are any valid [LinearSolve.jl](https://linearsolve.sciml.ai/dev/) solver.
 
 !!! note
     
-    Switching to a Krylov linear solver will automatically change the nonlinear problem solver
-    into Jacobian-free mode, dramatically reducing the memory required. This can
-    be overridden by adding `concrete_jac=true` to the algorithm.
+    Switching to a Krylov linear solver will automatically change the nonlinear problem
+    solver into Jacobian-free mode, dramatically reducing the memory required. This can be
+    overridden by adding `concrete_jac=true` to the algorithm.
 
 ## Adding a Preconditioner
 
 Any [LinearSolve.jl-compatible preconditioner](https://docs.sciml.ai/LinearSolve/stable/basics/Preconditioners/)
-can be used as a preconditioner in the linear solver interface. To define
-preconditioners, one must define a `precs` function in compatible with nonlinear
-solvers which returns the left and right preconditioners, matrices which
-approximate the inverse of `W = I - gamma*J` used in the solution of the ODE.
-An example of this with using [IncompleteLU.jl](https://github.com/haampie/IncompleteLU.jl)
-is as follows:
+can be used as a preconditioner in the linear solver interface. To define preconditioners,
+one must define a `precs` function in compatible with nonlinear solvers which returns the
+left and right preconditioners, matrices which approximate the inverse of `W = I - gamma*J`
+used in the solution of the ODE. An example of this with using
+[IncompleteLU.jl](https://github.com/haampie/IncompleteLU.jl) is as follows:
 
 ```@example ill_conditioned_nlprob
 using IncompleteLU
@@ -218,22 +216,21 @@ end
 nothing # hide
 ```
 
-Notice a few things about this preconditioner. This preconditioner uses the
-sparse Jacobian, and thus we set `concrete_jac=true` to tell the algorithm to
-generate the Jacobian (otherwise, a Jacobian-free algorithm is used with GMRES
-by default). Then `newW = true` whenever a new `W` matrix is computed, and
-`newW=nothing` during the startup phase of the solver. Thus, we do a check
-`newW === nothing || newW` and when true, it's only at these points when
-we update the preconditioner, otherwise we just pass on the previous version.
-We use `convert(AbstractMatrix,W)` to get the concrete `W` matrix (matching
-`jac_prototype`, thus `SpraseMatrixCSC`) which we can use in the preconditioner's
-definition. Then we use `IncompleteLU.ilu` on that sparse matrix to generate
-the preconditioner. We return `Pl,nothing` to say that our preconditioner is a
-left preconditioner, and that there is no right preconditioning.
-
-This method thus uses both the Krylov solver and the sparse Jacobian. Not only
-that, it is faster than both implementations! IncompleteLU is fussy in that it
-requires a well-tuned `τ` parameter. Another option is to use
+Notice a few things about this preconditioner. This preconditioner uses the sparse Jacobian,
+and thus we set `concrete_jac = true` to tell the algorithm to generate the Jacobian
+(otherwise, a Jacobian-free algorithm is used with GMRES by default). Then `newW = true`
+whenever a new `W` matrix is computed, and `newW = nothing` during the startup phase of the
+solver. Thus, we do a check `newW === nothing || newW` and when true, it's only at these
+points when we update the preconditioner, otherwise we just pass on the previous version.
+We use `convert(AbstractMatrix,W)` to get the concrete `W` matrix (matching `jac_prototype`,
+thus `SpraseMatrixCSC`) which we can use in the preconditioner's definition. Then we use
+`IncompleteLU.ilu` on that sparse matrix to generate the preconditioner. We return
+`Pl, nothing` to say that our preconditioner is a left preconditioner, and that there is no
+right preconditioning.
+
+This method thus uses both the Krylov solver and the sparse Jacobian. Not only that, it is
+faster than both implementations! IncompleteLU is fussy in that it requires a well-tuned `τ`
+parameter. Another option is to use
 [AlgebraicMultigrid.jl](https://github.com/JuliaLinearAlgebra/AlgebraicMultigrid.jl)
 which is more automatic. The setup is very similar to before:
 
diff --git a/docs/src/tutorials/modelingtoolkit.md b/docs/src/tutorials/modelingtoolkit.md
index 3ee7d8e75..d6d9711c3 100644
--- a/docs/src/tutorials/modelingtoolkit.md
+++ b/docs/src/tutorials/modelingtoolkit.md
@@ -1,9 +1,9 @@
 # [Symbolic Nonlinear System Definition and Acceleration via ModelingToolkit](@id modelingtoolkit)
 
-[ModelingToolkit.jl](https://docs.sciml.ai/ModelingToolkit/dev/) is a symbolic-numeric modeling system
-for the Julia SciML ecosystem. It adds a high-level interactive interface for the numerical solvers
-which can make it easy to symbolically modify and generate equations to be solved. The basic form of
-using ModelingToolkit looks as follows:
+[ModelingToolkit.jl](https://docs.sciml.ai/ModelingToolkit/dev/) is a symbolic-numeric
+modeling system for the Julia SciML ecosystem. It adds a high-level interactive interface
+for the numerical solvers which can make it easy to symbolically modify and generate
+equations to be solved. The basic form of using ModelingToolkit looks as follows:
 
 ```@example mtk
 using ModelingToolkit, NonlinearSolve
@@ -31,8 +31,8 @@ sol = solve(prob, NewtonRaphson())
 
 ## Symbolic Derivations of Extra Functions
 
-As a symbolic system, ModelingToolkit can be used to represent the equations and derive new forms. For example,
-let's look at the equations:
+As a symbolic system, ModelingToolkit can be used to represent the equations and derive new
+forms. For example, let's look at the equations:
 
 ```@example mtk
 equations(ns)
@@ -44,8 +44,8 @@ We can ask it what the Jacobian of our system is via `calculate_jacobian`:
 calculate_jacobian(ns)
 ```
 
-We can tell MTK to generate a computable form of this analytical Jacobian via `jac = true` to help the solver
-use efficient forms:
+We can tell MTK to generate a computable form of this analytical Jacobian via `jac = true`
+to help the solver use efficient forms:
 
 ```@example mtk
 prob = NonlinearProblem(ns, u0, ps, jac = true)
@@ -54,9 +54,9 @@ sol = solve(prob, NewtonRaphson())
 
 ## Symbolic Simplification of Nonlinear Systems via Tearing
 
-One of the major reasons for using ModelingToolkit is to allow structural simplification of the systems. It's very
-easy to write down a mathematical model that, in theory, could be solved more simply. Let's take a look at a quick
-system:
+One of the major reasons for using ModelingToolkit is to allow structural simplification of
+the systems. It's very easy to write down a mathematical model that, in theory, could be
+solved more simply. Let's take a look at a quick system:
 
 ```@example mtk
 @variables u1 u2 u3 u4 u5
@@ -118,6 +118,7 @@ sol[u5]
 
 ## Component-Based and Acausal Modeling
 
-If you're interested in building models in a component or block based form, such as seen in systems like Simulink or Modelica,
-take a deeper look at [ModelingToolkit.jl's documentation](https://docs.sciml.ai/ModelingToolkit/stable/) which goes into
-detail on such features.
+If you're interested in building models in a component or block based form, such as seen in
+systems like Simulink or Modelica, take a deeper look at
+[ModelingToolkit.jl's documentation](https://docs.sciml.ai/ModelingToolkit/stable/) which
+goes into detail on such features.
diff --git a/docs/src/tutorials/small_compile.md b/docs/src/tutorials/small_compile.md
index c5a40ee05..ec28361f4 100644
--- a/docs/src/tutorials/small_compile.md
+++ b/docs/src/tutorials/small_compile.md
@@ -1,11 +1,11 @@
 # [Faster Startup and and Static Compilation](@id fast_startup)
 
-In many instances one may want a very lightweight version of NonlinearSolve.jl. For this case there
-exists the solver package SimpleNonlinearSolve.jl. SimpleNonlinearSolve.jl solvers all satisfy the
-same interface as NonlinearSolve.jl, but they are designed to be simpler, lightweight, and thus
-have a faster startup time. Everything that can be done with NonlinearSolve.jl can be done with
-SimpleNonlinearSolve.jl. Thus for example, we can solve the core tutorial problem with just SimpleNonlinearSolve.jl
-as follows:
+In many instances one may want a very lightweight version of NonlinearSolve.jl. For this
+case there exists the solver package SimpleNonlinearSolve.jl. SimpleNonlinearSolve.jl
+solvers all satisfy the same interface as NonlinearSolve.jl, but they are designed to be
+simpler, lightweight, and thus have a faster startup time. Everything that can be done with
+NonlinearSolve.jl can be done with SimpleNonlinearSolve.jl. Thus for example, we can solve
+the core tutorial problem with just SimpleNonlinearSolve.jl as follows:
 
 ```@example simple
 using SimpleNonlinearSolve
@@ -17,23 +17,24 @@ prob = NonlinearProblem(f, u0, p)
 sol = solve(prob, SimpleNewtonRaphson())
 ```
 
-However, there are a few downsides to SimpleNonlinearSolve's `SimpleX` style algorithms to note:
+However, there are a few downsides to SimpleNonlinearSolve's `SimpleX` style algorithms to
+note:
 
- 1. SimpleNonlinearSolve.jl's methods are not hooked into the LinearSolve.jl system, and thus do not have
-    the ability to specify linear solvers, use sparse matrices, preconditioners, and all of the other features
-    which are required to scale for very large systems of equations.
- 2. SimpleNonlinearSolve.jl's methods have less robust error handling and termination conditions, and thus
-    these methods are missing some flexibility and give worse hints for debugging.
- 3. SimpleNonlinearSolve.jl's methods are focused on out-of-place support. There is some in-place support,
-    but it's designed for simple smaller systems and so some optimizations are missing.
+ 1. SimpleNonlinearSolve.jl's methods are not hooked into the LinearSolve.jl system, and
+    thus do not have the ability to specify linear solvers, use sparse matrices,
+    preconditioners, and all of the other features which are required to scale for very
+    large systems of equations.
+ 2. SimpleNonlinearSolve.jl's methods have less robust error handling and termination
+    conditions, and thus these methods are missing some flexibility and give worse hints
+    for debugging. Note that these can be enabled but are disabled by default.
 
 However, the major upsides of SimpleNonlinearSolve.jl are:
 
  1. The methods are optimized and non-allocating on StaticArrays
  2. The methods are minimal in compilation
 
-As such, you can use the code as shown above to have very low startup with good methods, but for more scaling and debuggability
-we recommend the full NonlinearSolve.jl. But that said,
+As such, you can use the code as shown above to have very low startup with good methods, but
+for more scaling and debuggability we recommend the full NonlinearSolve.jl. But that said,
 
 ```@example simple
 using StaticArrays
@@ -44,11 +45,13 @@ prob = NonlinearProblem(f, u0, p)
 sol = solve(prob, SimpleNewtonRaphson())
 ```
 
-using StaticArrays.jl is also the fastest form for small equations, so if you know your system is small then SimpleNonlinearSolve.jl
-is not only sufficient but optimal.
+using StaticArrays.jl is also the fastest form for small equations, so if you know your
+system is small then SimpleNonlinearSolve.jl is not only sufficient but optimal.
 
 ## Static Compilation
 
-Julia has tools for building small binaries via static compilation with [StaticCompiler.jl](https://github.com/tshort/StaticCompiler.jl).
-However, these tools are currently limited to type-stable non-allocating functions. That said, SimpleNonlinearSolve.jl's solvers are
-precisely the subset of NonlinearSolve.jl which are compatible with static compilation.
+Julia has tools for building small binaries via static compilation with
+[StaticCompiler.jl](https://github.com/tshort/StaticCompiler.jl).
+However, these tools are currently limited to type-stable non-allocating functions. That
+said, SimpleNonlinearSolve.jl's solvers are precisely the subset of NonlinearSolve.jl which
+are compatible with static compilation.
diff --git a/ext/NonlinearSolveFastLevenbergMarquardtExt.jl b/ext/NonlinearSolveFastLevenbergMarquardtExt.jl
index e292e29c6..6d33c23ba 100644
--- a/ext/NonlinearSolveFastLevenbergMarquardtExt.jl
+++ b/ext/NonlinearSolveFastLevenbergMarquardtExt.jl
@@ -3,11 +3,12 @@ module NonlinearSolveFastLevenbergMarquardtExt
 using ArrayInterface, NonlinearSolve, SciMLBase
 import ConcreteStructs: @concrete
 import FastLevenbergMarquardt as FastLM
+import FiniteDiff, ForwardDiff
 
 function _fast_lm_solver(::FastLevenbergMarquardtJL{linsolve}, x) where {linsolve}
-    if linsolve == :cholesky
+    if linsolve === :cholesky
         return FastLM.CholeskySolver(ArrayInterface.undefmatrix(x))
-    elseif linsolve == :qr
+    elseif linsolve === :qr
         return FastLM.QRSolver(eltype(x), length(x))
     else
         throw(ArgumentError("Unknown FastLevenbergMarquardt Linear Solver: $linsolve"))
@@ -33,23 +34,65 @@ end
 
 function SciMLBase.__init(prob::NonlinearLeastSquaresProblem,
         alg::FastLevenbergMarquardtJL, args...; alias_u0 = false, abstol = 1e-8,
-        reltol = 1e-8, verbose = false, maxiters = 1000, kwargs...)
+        reltol = 1e-8, maxiters = 1000, kwargs...)
     iip = SciMLBase.isinplace(prob)
-    u0 = alias_u0 ? prob.u0 : deepcopy(prob.u0)
-
-    @assert prob.f.jac!==nothing "FastLevenbergMarquardt requires a Jacobian!"
+    u = NonlinearSolve.__maybe_unaliased(prob.u0, alias_u0)
+    fu = NonlinearSolve.evaluate_f(prob, u)
 
     f! = InplaceFunction{iip}(prob.f)
-    J! = InplaceFunction{iip}(prob.f.jac)
 
-    resid_prototype = prob.f.resid_prototype === nothing ?
-                      (!iip ? prob.f(u0, prob.p) : zeros(u0)) :
-                      prob.f.resid_prototype
+    if prob.f.jac === nothing
+        use_forward_diff = if alg.autodiff === nothing
+            ForwardDiff.can_dual(eltype(u))
+        else
+            alg.autodiff isa AutoForwardDiff
+        end
+        uf = SciMLBase.JacobianWrapper{iip}(prob.f, prob.p)
+        if use_forward_diff
+            cache = iip ? ForwardDiff.JacobianConfig(uf, fu, u) :
+                    ForwardDiff.JacobianConfig(uf, u)
+        else
+            cache = FiniteDiff.JacobianCache(u, fu)
+        end
+        J! = if iip
+            if use_forward_diff
+                fu_cache = similar(fu)
+                function (J, x, p)
+                    uf.p = p
+                    ForwardDiff.jacobian!(J, uf, fu_cache, x, cache)
+                    return J
+                end
+            else
+                function (J, x, p)
+                    uf.p = p
+                    FiniteDiff.finite_difference_jacobian!(J, uf, x, cache)
+                    return J
+                end
+            end
+        else
+            if use_forward_diff
+                function (J, x, p)
+                    uf.p = p
+                    ForwardDiff.jacobian!(J, uf, x, cache)
+                    return J
+                end
+            else
+                function (J, x, p)
+                    uf.p = p
+                    J_ = FiniteDiff.finite_difference_jacobian(uf, x, cache)
+                    copyto!(J, J_)
+                    return J
+                end
+            end
+        end
+    else
+        J! = InplaceFunction{iip}(prob.f.jac)
+    end
 
-    J = similar(u0, length(resid_prototype), length(u0))
+    J = similar(u, length(fu), length(u))
 
-    solver = _fast_lm_solver(alg, u0)
-    LM = FastLM.LMWorkspace(u0, resid_prototype, J)
+    solver = _fast_lm_solver(alg, u)
+    LM = FastLM.LMWorkspace(u, fu, J)
 
     return FastLevenbergMarquardtJLCache(f!, J!, prob, alg, LM, solver,
         (; xtol = abstol, ftol = reltol, maxit = maxiters, alg.factor, alg.factoraccept,
diff --git a/ext/NonlinearSolveLeastSquaresOptimExt.jl b/ext/NonlinearSolveLeastSquaresOptimExt.jl
index 63ce7d0cc..004c26670 100644
--- a/ext/NonlinearSolveLeastSquaresOptimExt.jl
+++ b/ext/NonlinearSolveLeastSquaresOptimExt.jl
@@ -5,12 +5,12 @@ import ConcreteStructs: @concrete
 import LeastSquaresOptim as LSO
 
 function _lso_solver(::LeastSquaresOptimJL{alg, linsolve}) where {alg, linsolve}
-    ls = linsolve == :qr ? LSO.QR() :
-         (linsolve == :cholesky ? LSO.Cholesky() :
-          (linsolve == :lsmr ? LSO.LSMR() : nothing))
-    if alg == :lm
+    ls = linsolve === :qr ? LSO.QR() :
+         (linsolve === :cholesky ? LSO.Cholesky() :
+          (linsolve === :lsmr ? LSO.LSMR() : nothing))
+    if alg === :lm
         return LSO.LevenbergMarquardt(ls)
-    elseif alg == :dogleg
+    elseif alg === :dogleg
         return LSO.Dogleg(ls)
     else
         throw(ArgumentError("Unknown LeastSquaresOptim Algorithm: $alg"))
diff --git a/ext/NonlinearSolveMINPACKExt.jl b/ext/NonlinearSolveMINPACKExt.jl
new file mode 100644
index 000000000..5f15d85b3
--- /dev/null
+++ b/ext/NonlinearSolveMINPACKExt.jl
@@ -0,0 +1,72 @@
+module NonlinearSolveMINPACKExt
+
+using NonlinearSolve, SciMLBase
+using MINPACK
+
+function SciMLBase.__solve(prob::Union{NonlinearProblem{uType, iip},
+            NonlinearLeastSquaresProblem{uType, iip}}, alg::CMINPACK, args...;
+        abstol = 1e-6, maxiters = 100000, alias_u0::Bool = false,
+        kwargs...) where {uType, iip}
+    if prob.u0 isa Number
+        u0 = [prob.u0]
+    else
+        u0 = NonlinearSolve.__maybe_unaliased(prob.u0, alias_u0)
+    end
+
+    sizeu = size(prob.u0)
+    p = prob.p
+
+    # unwrapping alg params
+    show_trace = alg.show_trace
+    tracing = alg.tracing
+
+    if !iip && prob.u0 isa Number
+        f! = (du, u) -> (du .= prob.f(first(u), p); Cint(0))
+    elseif !iip && prob.u0 isa Vector{Float64}
+        f! = (du, u) -> (du .= prob.f(u, p); Cint(0))
+    elseif !iip && prob.u0 isa AbstractArray
+        f! = (du, u) -> (du .= vec(prob.f(reshape(u, sizeu), p)); Cint(0))
+    elseif prob.u0 isa Vector{Float64}
+        f! = (du, u) -> prob.f(du, u, p)
+    else # Then it's an in-place function on an abstract array
+        f! = (du, u) -> (prob.f(reshape(du, sizeu), reshape(u, sizeu), p); du = vec(du); 0)
+    end
+
+    u = zero(u0)
+    resid = NonlinearSolve.evaluate_f(prob, u)
+    m = length(resid)
+    size_jac = (length(resid), length(u))
+
+    method = ifelse(alg.method === :auto,
+        ifelse(prob isa NonlinearLeastSquaresProblem, :lm, :hybr), alg.method)
+
+    if SciMLBase.has_jac(prob.f)
+        if !iip && prob.u0 isa Number
+            g! = (du, u) -> (du .= prob.f.jac(first(u), p); Cint(0))
+        elseif !iip && prob.u0 isa Vector{Float64}
+            g! = (du, u) -> (du .= prob.f.jac(u, p); Cint(0))
+        elseif !iip && prob.u0 isa AbstractArray
+            g! = (du, u) -> (du .= vec(prob.f.jac(reshape(u, sizeu), p)); Cint(0))
+        elseif prob.u0 isa Vector{Float64}
+            g! = (du, u) -> prob.f.jac(du, u, p)
+        else # Then it's an in-place function on an abstract array
+            g! = function (du, u)
+                prob.f.jac(reshape(du, size_jac), reshape(u, sizeu), p)
+                return Cint(0)
+            end
+        end
+        original = MINPACK.fsolve(f!, g!, u0, m; tol = abstol, show_trace, tracing, method,
+            iterations = maxiters, kwargs...)
+    else
+        original = MINPACK.fsolve(f!, u0, m; tol = abstol, show_trace, tracing, method,
+            iterations = maxiters, kwargs...)
+    end
+
+    u = reshape(original.x, size(u))
+    resid = original.f
+    retcode = original.converged ? ReturnCode.Success : ReturnCode.Failure
+
+    return SciMLBase.build_solution(prob, alg, u, resid; retcode, original)
+end
+
+end
diff --git a/ext/NonlinearSolveNLsolveExt.jl b/ext/NonlinearSolveNLsolveExt.jl
new file mode 100644
index 000000000..5350e9be2
--- /dev/null
+++ b/ext/NonlinearSolveNLsolveExt.jl
@@ -0,0 +1,80 @@
+module NonlinearSolveNLsolveExt
+
+using NonlinearSolve, NLsolve, DiffEqBase, SciMLBase
+import UnPack: @unpack
+
+function SciMLBase.__solve(prob::NonlinearProblem, alg::NLsolveJL, args...; abstol = 1e-6,
+        maxiters = 1000, alias_u0::Bool = false, kwargs...)
+    if typeof(prob.u0) <: Number
+        u0 = [prob.u0]
+    else
+        u0 = NonlinearSolve.__maybe_unaliased(prob.u0, alias_u0)
+    end
+
+    iip = isinplace(prob)
+
+    sizeu = size(prob.u0)
+    p = prob.p
+
+    # unwrapping alg params
+    @unpack method, autodiff, store_trace, extended_trace, linesearch, linsolve = alg
+    @unpack factor, autoscale, m, beta, show_trace = alg
+
+    if !iip && prob.u0 isa Number
+        f! = (du, u) -> (du .= prob.f(first(u), p); Cint(0))
+    elseif !iip && prob.u0 isa Vector{Float64}
+        f! = (du, u) -> (du .= prob.f(u, p); Cint(0))
+    elseif !iip && prob.u0 isa AbstractArray
+        f! = (du, u) -> (du .= vec(prob.f(reshape(u, sizeu), p)); Cint(0))
+    elseif prob.u0 isa Vector{Float64}
+        f! = (du, u) -> prob.f(du, u, p)
+    else # Then it's an in-place function on an abstract array
+        f! = (du, u) -> (prob.f(reshape(du, sizeu), reshape(u, sizeu), p); du = vec(du); 0)
+    end
+
+    if prob.u0 isa Number
+        resid = [NonlinearSolve.evaluate_f(prob, first(u0))]
+    else
+        resid = NonlinearSolve.evaluate_f(prob, u0)
+    end
+
+    size_jac = (length(resid), length(u0))
+
+    if SciMLBase.has_jac(prob.f)
+        if !iip && prob.u0 isa Number
+            g! = (du, u) -> (du .= prob.f.jac(first(u), p); Cint(0))
+        elseif !iip && prob.u0 isa Vector{Float64}
+            g! = (du, u) -> (du .= prob.f.jac(u, p); Cint(0))
+        elseif !iip && prob.u0 isa AbstractArray
+            g! = (du, u) -> (du .= vec(prob.f.jac(reshape(u, sizeu), p)); Cint(0))
+        elseif prob.u0 isa Vector{Float64}
+            g! = (du, u) -> prob.f.jac(du, u, p)
+        else # Then it's an in-place function on an abstract array
+            g! = function (du, u)
+                prob.f.jac(reshape(du, size_jac), reshape(u, sizeu), p)
+                return Cint(0)
+            end
+        end
+        if prob.f.jac_prototype !== nothing
+            J = zero(prob.f.jac_prototype)
+            df = OnceDifferentiable(f!, g!, u0, resid, J)
+        else
+            df = OnceDifferentiable(f!, g!, u0, resid)
+        end
+    else
+        df = OnceDifferentiable(f!, u0, resid; autodiff)
+    end
+
+    original = nlsolve(df, u0; ftol = abstol, iterations = maxiters, method, store_trace,
+        extended_trace, linesearch, linsolve, factor, autoscale, m, beta, show_trace)
+
+    u = reshape(original.zero, size(u0))
+    f!(resid, u)
+    retcode = original.x_converged || original.f_converged ? ReturnCode.Success :
+              ReturnCode.Failure
+    stats = SciMLBase.NLStats(original.f_calls, original.g_calls, original.g_calls,
+        original.g_calls, original.iterations)
+    return SciMLBase.build_solution(prob, alg, u, resid; retcode, original, stats)
+end
+
+end
diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl
index 3cdfa6c43..dd0a6cc33 100644
--- a/src/NonlinearSolve.jl
+++ b/src/NonlinearSolve.jl
@@ -195,7 +195,7 @@ include("default.jl")
     end
 
     nls_algs = (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(), PseudoTransient(),
-        GeneralBroyden(), GeneralKlement(), DFSane(), nothing)
+        Broyden(), Klement(), DFSane(), nothing)
 
     probs_nlls = NonlinearLeastSquaresProblem[]
     nlfuncs = ((NonlinearFunction{false}((u, p) -> (u .^ 2 .- p)[1:1]), [0.1, 0.0]),
@@ -235,8 +235,8 @@ end
 export RadiusUpdateSchemes
 
 export NewtonRaphson, TrustRegion, LevenbergMarquardt, DFSane, GaussNewton, PseudoTransient,
-    GeneralBroyden, GeneralKlement, LimitedMemoryBroyden
-export LeastSquaresOptimJL, FastLevenbergMarquardtJL
+    Broyden, Klement, LimitedMemoryBroyden
+export LeastSquaresOptimJL, FastLevenbergMarquardtJL, CMINPACK, NLsolveJL
 export NonlinearSolvePolyAlgorithm,
     RobustMultiNewton, FastShortcutNonlinearPolyalg, FastShortcutNLLSPolyalg
 
diff --git a/src/broyden.jl b/src/broyden.jl
index 22b0fed2f..7c90d6f92 100644
--- a/src/broyden.jl
+++ b/src/broyden.jl
@@ -1,6 +1,6 @@
 # Sadly `Broyden` is taken up by SimpleNonlinearSolve.jl
 """
-    GeneralBroyden(; max_resets = 100, linesearch = nothing, reset_tolerance = nothing,
+    Broyden(; max_resets = 100, linesearch = nothing, reset_tolerance = nothing,
         init_jacobian::Val = Val(:identity), autodiff = nothing, alpha = nothing)
 
 An implementation of `Broyden` with resetting and line search.
@@ -14,7 +14,7 @@ An implementation of `Broyden` with resetting and line search.
   - `linesearch`: the line search algorithm to use. Defaults to [`LineSearch()`](@ref),
     which means that no line search is performed. Algorithms from `LineSearches.jl` can be
     used here directly, and they will be converted to the correct `LineSearch`. It is
-    recommended to use [LiFukushimaLineSearch](@ref) -- a derivative free linesearch
+    recommended to use [`LiFukushimaLineSearch`](@ref) -- a derivative free linesearch
     specifically designed for Broyden's method.
   - `alpha`: If `init_jacobian` is set to `Val(:identity)`, then the initial Jacobian
     inverse is set to be `(αI)⁻¹`. Defaults to `nothing` which implies
@@ -37,7 +37,7 @@ An implementation of `Broyden` with resetting and line search.
         useful for specific problems, but whether it will work may depend strongly on the
         problem.
 """
-@concrete struct GeneralBroyden{IJ, UR, CJ, AD} <: AbstractNewtonAlgorithm{CJ, AD}
+@concrete struct Broyden{IJ, UR, CJ, AD} <: AbstractNewtonAlgorithm{CJ, AD}
     ad::AD
     max_resets::Int
     reset_tolerance
@@ -45,7 +45,7 @@ An implementation of `Broyden` with resetting and line search.
     alpha
 end
 
-function __alg_print_modifiers(alg::GeneralBroyden{IJ, UR}) where {IJ, UR}
+function __alg_print_modifiers(alg::Broyden{IJ, UR}) where {IJ, UR}
     modifiers = String[]
     IJ !== :identity && push!(modifiers, "init_jacobian = Val(:$(IJ))")
     UR !== :good_broyden && push!(modifiers, "update_rule = Val(:$(UR))")
@@ -53,12 +53,12 @@ function __alg_print_modifiers(alg::GeneralBroyden{IJ, UR}) where {IJ, UR}
     return modifiers
 end
 
-function set_ad(alg::GeneralBroyden{IJ, UR, CJ}, ad) where {IJ, UR, CJ}
-    return GeneralBroyden{IJ, UR, CJ}(ad, alg.max_resets, alg.reset_tolerance,
+function set_ad(alg::Broyden{IJ, UR, CJ}, ad) where {IJ, UR, CJ}
+    return Broyden{IJ, UR, CJ}(ad, alg.max_resets, alg.reset_tolerance,
         alg.linesearch, alg.alpha)
 end
 
-function GeneralBroyden(; max_resets = 100, linesearch = nothing, reset_tolerance = nothing,
+function Broyden(; max_resets = 100, linesearch = nothing, reset_tolerance = nothing,
         init_jacobian::Val = Val(:identity), autodiff = nothing, alpha = nothing,
         update_rule = Val(:good_broyden))
     UR = _unwrap_val(update_rule)
@@ -67,11 +67,11 @@ function GeneralBroyden(; max_resets = 100, linesearch = nothing, reset_toleranc
     @assert IJ ∈ (:identity, :true_jacobian)
     linesearch = linesearch isa LineSearch ? linesearch : LineSearch(; method = linesearch)
     CJ = IJ === :true_jacobian
-    return GeneralBroyden{IJ, UR, CJ}(autodiff, max_resets, reset_tolerance, linesearch,
+    return Broyden{IJ, UR, CJ}(autodiff, max_resets, reset_tolerance, linesearch,
         alpha)
 end
 
-@concrete mutable struct GeneralBroydenCache{iip, IJ, UR} <:
+@concrete mutable struct BroydenCache{iip, IJ, UR} <:
                          AbstractNonlinearSolveCache{iip}
     f
     alg
@@ -106,7 +106,7 @@ end
     trace
 end
 
-function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg_::GeneralBroyden{IJ, UR},
+function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg_::Broyden{IJ, UR},
         args...; alias_u0 = false, maxiters = 1000, abstol = nothing, reltol = nothing,
         termination_condition = nothing, internalnorm::F = DEFAULT_NORM,
         kwargs...) where {uType, iip, F, IJ, UR}
@@ -153,14 +153,14 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg_::GeneralBroyd
     trace = init_nonlinearsolve_trace(alg, u, fu, ApplyArray(__zero, J⁻¹), du;
         uses_jac_inverse = Val(true), kwargs...)
 
-    return GeneralBroydenCache{iip, IJ, UR}(f, alg, u, u_cache, du, fu, fu_cache, dfu, p,
+    return BroydenCache{iip, IJ, UR}(f, alg, u, u_cache, du, fu, fu_cache, dfu, p,
         uf, J⁻¹, J⁻¹_cache, J⁻¹dfu, inv_alpha, alg.alpha, false, 0, alg.max_resets,
         maxiters, internalnorm, ReturnCode.Default, abstol, reltol, reset_tolerance,
         reset_check, jac_cache, prob, NLStats(1, 0, 0, 0, 0),
         init_linesearch_cache(alg.linesearch, f, u, p, fu, Val(iip)), tc_cache, trace)
 end
 
-function perform_step!(cache::GeneralBroydenCache{iip, IJ, UR}) where {iip, IJ, UR}
+function perform_step!(cache::BroydenCache{iip, IJ, UR}) where {iip, IJ, UR}
     T = eltype(cache.u)
 
     if IJ === :true_jacobian && cache.stats.nsteps == 0
@@ -240,7 +240,7 @@ function perform_step!(cache::GeneralBroydenCache{iip, IJ, UR}) where {iip, IJ,
     return nothing
 end
 
-function __reinit_internal!(cache::GeneralBroydenCache; kwargs...)
+function __reinit_internal!(cache::BroydenCache; kwargs...)
     cache.inv_alpha = __initial_inv_alpha(cache.inv_alpha, cache.alpha_initial, cache.u,
         cache.fu, cache.internalnorm)
     cache.J⁻¹ = __reinit_identity_jacobian!!(cache.J⁻¹, cache.inv_alpha)
diff --git a/src/default.jl b/src/default.jl
index 459cf4083..a4b6325ca 100644
--- a/src/default.jl
+++ b/src/default.jl
@@ -19,7 +19,7 @@ residual is returned.
 ```julia
 using NonlinearSolve
 
-alg = NonlinearSolvePolyAlgorithm((NewtonRaphson(), GeneralBroyden()))
+alg = NonlinearSolvePolyAlgorithm((NewtonRaphson(), Broyden()))
 ```
 """
 struct NonlinearSolvePolyAlgorithm{pType, N, A} <: AbstractNonlinearSolveAlgorithm
@@ -166,7 +166,7 @@ end
 
 """
     RobustMultiNewton(; concrete_jac = nothing, linsolve = nothing, precs = DEFAULT_PRECS,
-        adkwargs...)
+        autodiff = nothing)
 
 A polyalgorithm focused on robustness. It uses a mixture of Newton methods with different
 globalizing techniques (trust region updates, line searches, etc.) in order to find a
@@ -180,7 +180,7 @@ or more precision / more stable linear solver choice is required).
 
   - `autodiff`: determines the backend used for the Jacobian. Note that this argument is
     ignored if an analytical Jacobian is passed, as that will be used instead. Defaults to
-    `AutoForwardDiff()`. Valid choices are types from ADTypes.jl.
+    `nothing`.
   - `concrete_jac`: whether to build a concrete Jacobian. If a Krylov-subspace method is used,
     then the Jacobian will not be constructed and instead direct Jacobian-vector products
     `J*v` are computed using forward-mode automatic differentiation or finite differencing
@@ -197,22 +197,23 @@ or more precision / more stable linear solver choice is required).
     [LinearSolve.jl documentation](https://docs.sciml.ai/LinearSolve/stable/).
 """
 function RobustMultiNewton(; concrete_jac = nothing, linsolve = nothing,
-        precs = DEFAULT_PRECS, adkwargs...)
-    algs = (TrustRegion(; concrete_jac, linsolve, precs, adkwargs...),
-        TrustRegion(; concrete_jac, linsolve, precs,
-            radius_update_scheme = RadiusUpdateSchemes.Bastin, adkwargs...),
+        precs = DEFAULT_PRECS, autodiff = nothing)
+    algs = (TrustRegion(; concrete_jac, linsolve, precs),
+        TrustRegion(; concrete_jac, linsolve, precs, autodiff,
+            radius_update_scheme = RadiusUpdateSchemes.Bastin),
         NewtonRaphson(; concrete_jac, linsolve, precs, linesearch = BackTracking(),
-            adkwargs...),
+            autodiff),
         TrustRegion(; concrete_jac, linsolve, precs,
-            radius_update_scheme = RadiusUpdateSchemes.NLsolve, adkwargs...),
+            radius_update_scheme = RadiusUpdateSchemes.NLsolve, autodiff),
         TrustRegion(; concrete_jac, linsolve, precs,
-            radius_update_scheme = RadiusUpdateSchemes.Fan, adkwargs...))
+            radius_update_scheme = RadiusUpdateSchemes.Fan, autodiff))
     return NonlinearSolvePolyAlgorithm(algs, Val(:NLS))
 end
 
 """
     FastShortcutNonlinearPolyalg(; concrete_jac = nothing, linsolve = nothing,
-        precs = DEFAULT_PRECS, must_use_jacobian::Val = Val(false), adkwargs...)
+        precs = DEFAULT_PRECS, must_use_jacobian::Val = Val(false),
+        prefer_simplenonlinearsolve::Val{SA} = Val(false), autodiff = nothing)
 
 A polyalgorithm focused on balancing speed and robustness. It first tries less robust methods
 for more performance and then tries more robust techniques if the faster ones fail.
@@ -221,7 +222,7 @@ for more performance and then tries more robust techniques if the faster ones fa
 
   - `autodiff`: determines the backend used for the Jacobian. Note that this argument is
     ignored if an analytical Jacobian is passed, as that will be used instead. Defaults to
-    `AutoForwardDiff()`. Valid choices are types from ADTypes.jl.
+    `nothing`.
   - `concrete_jac`: whether to build a concrete Jacobian. If a Krylov-subspace method is used,
     then the Jacobian will not be constructed and instead direct Jacobian-vector products
     `J*v` are computed using forward-mode automatic differentiation or finite differencing
@@ -239,31 +240,47 @@ for more performance and then tries more robust techniques if the faster ones fa
 """
 function FastShortcutNonlinearPolyalg(; concrete_jac = nothing, linsolve = nothing,
         precs = DEFAULT_PRECS, must_use_jacobian::Val{JAC} = Val(false),
-        adkwargs...) where {JAC}
+        prefer_simplenonlinearsolve::Val{SA} = Val(false),
+        autodiff = nothing) where {JAC, SA}
     if JAC
-        algs = (NewtonRaphson(; concrete_jac, linsolve, precs, adkwargs...),
+        algs = (NewtonRaphson(; concrete_jac, linsolve, precs, autodiff),
             NewtonRaphson(; concrete_jac, linsolve, precs, linesearch = BackTracking(),
-                adkwargs...),
-            TrustRegion(; concrete_jac, linsolve, precs, adkwargs...),
+                autodiff),
+            TrustRegion(; concrete_jac, linsolve, precs, autodiff),
             TrustRegion(; concrete_jac, linsolve, precs,
-                radius_update_scheme = RadiusUpdateSchemes.Bastin, adkwargs...))
+                radius_update_scheme = RadiusUpdateSchemes.Bastin, autodiff))
     else
-        algs = (GeneralBroyden(),
-            GeneralBroyden(; init_jacobian = Val(:true_jacobian)),
-            GeneralKlement(; linsolve, precs),
-            NewtonRaphson(; concrete_jac, linsolve, precs, adkwargs...),
-            NewtonRaphson(; concrete_jac, linsolve, precs, linesearch = BackTracking(),
-                adkwargs...),
-            TrustRegion(; concrete_jac, linsolve, precs, adkwargs...),
-            TrustRegion(; concrete_jac, linsolve, precs,
-                radius_update_scheme = RadiusUpdateSchemes.Bastin, adkwargs...))
+        # SimpleNewtonRaphson and SimpleTrustRegion are not robust to singular Jacobians
+        # and thus are not included in the polyalgorithm
+        if SA
+            algs = (SimpleBroyden(),
+                Broyden(; init_jacobian = Val(:true_jacobian)),
+                SimpleKlement(),
+                NewtonRaphson(; concrete_jac, linsolve, precs, autodiff),
+                NewtonRaphson(; concrete_jac, linsolve, precs, linesearch = BackTracking(),
+                    autodiff),
+                NewtonRaphson(; concrete_jac, linsolve, precs, linesearch = BackTracking(),
+                    autodiff),
+                TrustRegion(; concrete_jac, linsolve, precs,
+                    radius_update_scheme = RadiusUpdateSchemes.Bastin, autodiff))
+        else
+            algs = (Broyden(),
+                Broyden(; init_jacobian = Val(:true_jacobian)),
+                Klement(; linsolve, precs),
+                NewtonRaphson(; concrete_jac, linsolve, precs, autodiff),
+                NewtonRaphson(; concrete_jac, linsolve, precs, linesearch = BackTracking(),
+                    autodiff),
+                TrustRegion(; concrete_jac, linsolve, precs, autodiff),
+                TrustRegion(; concrete_jac, linsolve, precs,
+                    radius_update_scheme = RadiusUpdateSchemes.Bastin, autodiff))
+        end
     end
     return NonlinearSolvePolyAlgorithm(algs, Val(:NLS))
 end
 
 """
     FastShortcutNLLSPolyalg(; concrete_jac = nothing, linsolve = nothing,
-                              precs = DEFAULT_PRECS, adkwargs...)
+                              precs = DEFAULT_PRECS, kwargs...)
 
 A polyalgorithm focused on balancing speed and robustness. It first tries less robust methods
 for more performance and then tries more robust techniques if the faster ones fail.
@@ -289,11 +306,11 @@ for more performance and then tries more robust techniques if the faster ones fa
     [LinearSolve.jl documentation](https://docs.sciml.ai/LinearSolve/stable/).
 """
 function FastShortcutNLLSPolyalg(; concrete_jac = nothing, linsolve = nothing,
-        precs = DEFAULT_PRECS, adkwargs...)
-    algs = (GaussNewton(; concrete_jac, linsolve, precs, adkwargs...),
+        precs = DEFAULT_PRECS, kwargs...)
+    algs = (GaussNewton(; concrete_jac, linsolve, precs, kwargs...),
         GaussNewton(; concrete_jac, linsolve, precs, linesearch = BackTracking(),
-            adkwargs...),
-        LevenbergMarquardt(; concrete_jac, linsolve, precs, adkwargs...))
+            kwargs...),
+        LevenbergMarquardt(; concrete_jac, linsolve, precs, kwargs...))
     return NonlinearSolvePolyAlgorithm(algs, Val(:NLLS))
 end
 
@@ -313,8 +330,10 @@ end
 
 function SciMLBase.__solve(prob::NonlinearProblem, ::Nothing, args...; kwargs...)
     must_use_jacobian = Val(prob.f.jac !== nothing)
-    return SciMLBase.__solve(prob, FastShortcutNonlinearPolyalg(; must_use_jacobian),
-        args...; kwargs...)
+    prefer_simplenonlinearsolve = Val(prob.u0 isa SArray)
+    return SciMLBase.__solve(prob,
+        FastShortcutNonlinearPolyalg(; must_use_jacobian,
+            prefer_simplenonlinearsolve), args...; kwargs...)
 end
 
 function SciMLBase.__init(prob::NonlinearLeastSquaresProblem, ::Nothing, args...; kwargs...)
diff --git a/src/extension_algs.jl b/src/extension_algs.jl
index 8f9ed4400..b06414f0f 100644
--- a/src/extension_algs.jl
+++ b/src/extension_algs.jl
@@ -36,7 +36,7 @@ function LeastSquaresOptimJL(alg = :lm; linsolve = nothing, autodiff::Symbol = :
 end
 
 """
-    FastLevenbergMarquardtJL(linsolve = :cholesky)
+    FastLevenbergMarquardtJL(linsolve = :cholesky; autodiff = nothing)
 
 Wrapper over [FastLevenbergMarquardt.jl](https://github.com/kamesy/FastLevenbergMarquardt.jl)
 for solving `NonlinearLeastSquaresProblem`.
@@ -46,19 +46,20 @@ for solving `NonlinearLeastSquaresProblem`.
     This is not really the fastest solver. It is called that since the original package
     is called "Fast". `LevenbergMarquardt()` is almost always a better choice.
 
-!!! warning
-
-    This algorithm requires the jacobian function to be provided!
-
 ## Arguments:
 
   - `linsolve`: Linear solver to use. Can be `:qr` or `:cholesky`.
+  - `autodiff`: determines the backend used for the Jacobian. Note that this argument is
+    ignored if an analytical Jacobian is passed, as that will be used instead. Defaults to
+    `nothing` which means that a default is selected according to the problem specification!
+    Valid choices are `nothing`, `AutoForwardDiff` or `AutoFiniteDiff`.
 
 !!! note
 
     This algorithm is only available if `FastLevenbergMarquardt.jl` is installed.
 """
 @concrete struct FastLevenbergMarquardtJL{linsolve} <: AbstractNonlinearSolveAlgorithm
+    autodiff
     factor
     factoraccept
     factorreject
@@ -71,14 +72,137 @@ end
 
 function FastLevenbergMarquardtJL(linsolve::Symbol = :cholesky; factor = 1e-6,
         factoraccept = 13.0, factorreject = 3.0, factorupdate = :marquardt,
-        minscale = 1e-12, maxscale = 1e16, minfactor = 1e-28, maxfactor = 1e32)
+        minscale = 1e-12, maxscale = 1e16, minfactor = 1e-28, maxfactor = 1e32,
+        autodiff = nothing)
     @assert linsolve in (:qr, :cholesky)
     @assert factorupdate in (:marquardt, :nielson)
+    @assert autodiff === nothing || autodiff isa AutoFiniteDiff ||
+            autodiff isa AutoForwardDiff
 
     if Base.get_extension(@__MODULE__, :NonlinearSolveFastLevenbergMarquardtExt) === nothing
-        error("LeastSquaresOptimJL requires FastLevenbergMarquardt.jl to be loaded")
+        error("FastLevenbergMarquardtJL requires FastLevenbergMarquardt.jl to be loaded")
     end
 
-    return FastLevenbergMarquardtJL{linsolve}(factor, factoraccept, factorreject,
+    return FastLevenbergMarquardtJL{linsolve}(autodiff, factor, factoraccept, factorreject,
         factorupdate, minscale, maxscale, minfactor, maxfactor)
 end
+
+"""
+    CMINPACK(; show_trace::Bool=false, tracing::Bool=false, method::Symbol=:auto)
+
+### Keyword Arguments
+
+  - `show_trace`: whether to show the trace.
+  - `tracing`: who the hell knows what this does. If you find out, please open an issue/PR.
+  - `method`: the choice of method for the solver.
+
+### Method Choices
+
+The keyword argument `method` can take on different value depending on which method of
+`fsolve` you are calling. The standard choices of `method` are:
+
+  - `:hybr`: Modified version of Powell's algorithm. Uses MINPACK routine
+    [`hybrd1`](https://github.com/devernay/cminpack/blob/d1f5f5a273862ca1bbcf58394e4ac060d9e22c76/hybrd1.c)
+  - `:lm`: Levenberg-Marquardt. Uses MINPACK routine
+    [`lmdif1`](https://github.com/devernay/cminpack/blob/d1f5f5a273862ca1bbcf58394e4ac060d9e22c76/lmdif1.c)
+  - `:lmdif`: Advanced Levenberg-Marquardt (more options available with `; kwargs...`). See
+    MINPACK routine [`lmdif`](https://github.com/devernay/cminpack/blob/d1f5f5a273862ca1bbcf58394e4ac060d9e22c76/lmdif.c)
+    for more information
+  - `:hybrd`: Advanced modified version of Powell's algorithm (more options available with
+    `; kwargs...`). See MINPACK routine
+    [`hybrd`](https://github.com/devernay/cminpack/blob/d1f5f5a273862ca1bbcf58394e4ac060d9e22c76/hybrd.c)
+    for more information
+
+If a Jacobian is supplied as part of the [`NonlinearFunction`](@ref nonlinearfunctions),
+then the following methods are allowed:
+
+  - `:hybr`: Advanced modified version of Powell's algorithm with user supplied Jacobian.
+    Additional arguments are available via `; kwargs...`. See MINPACK routine
+    [`hybrj`](https://github.com/devernay/cminpack/blob/d1f5f5a273862ca1bbcf58394e4ac060d9e22c76/hybrj.c)
+    for more information
+  - `:lm`: Advanced Levenberg-Marquardt with user supplied Jacobian. Additional arguments
+    are available via `;kwargs...`. See MINPACK routine
+    [`lmder`](https://github.com/devernay/cminpack/blob/d1f5f5a273862ca1bbcf58394e4ac060d9e22c76/lmder.c)
+    for more information
+
+The default choice of `:auto` selects `:hybr` for NonlinearProblem and `:lm` for
+NonlinearLeastSquaresProblem.
+"""
+struct CMINPACK <: AbstractNonlinearAlgorithm
+    show_trace::Bool
+    tracing::Bool
+    method::Symbol
+end
+
+function CMINPACK(; show_trace::Bool = false, tracing::Bool = false, method::Symbol = :auto)
+    if Base.get_extension(@__MODULE__, :NonlinearSolveMINPACKExt) === nothing
+        error("CMINPACK requires MINPACK.jl to be loaded")
+    end
+
+    return CMINPACK(show_trace, tracing, method)
+end
+
+"""
+    NLsolveJL(; method=:trust_region, autodiff=:central, store_trace=false,
+        extended_trace=false, linesearch=LineSearches.Static(),
+        linsolve=(x, A, b) -> copyto!(x, A\\b), factor = one(Float64), autoscale=true,
+        m=10, beta=one(Float64), show_trace=false)
+
+### Keyword Arguments
+
+  - `method`: the choice of method for solving the nonlinear system.
+  - `autodiff`: the choice of method for generating the Jacobian. Defaults to `:central` or
+    central differencing via FiniteDiff.jl. The other choices are `:forward`
+  - `show_trace`: should a trace of the optimization algorithm's state be shown on `STDOUT`?
+  - `extended_trace`: should additional algorithm internals be added to the state trace?
+  - `linesearch`: the line search method to be used within the solver method. The choices
+    are line search types from
+    [LineSearches.jl](https://github.com/JuliaNLSolvers/LineSearches.jl).
+  - `linsolve`: a function `linsolve(x, A, b)` that solves `Ax = b`.
+  - `factor`: determines the size of the initial trust region. This size is set to the
+    product of factor and the euclidean norm of `u0` if nonzero, or else to factor itself.
+  - `autoscale`: if true, then the variables will be automatically rescaled. The scaling
+    factors are the norms of the Jacobian columns.
+  - `m`: the amount of history in the Anderson method. Naive "Picard"-style iteration can be
+    achieved by setting m=0, but that isn't advisable for contractions whose Lipschitz
+    constants are close to 1. If convergence fails, though, you may consider lowering it.
+  - `beta`: It is also known as DIIS or Pulay mixing, this method is based on the acceleration
+    of the fixed-point iteration xₙ₊₁ = xₙ + beta*f(xₙ), where by default beta = 1.
+  - `store_trace``: should a trace of the optimization algorithm's state be stored?
+
+### Submethod Choice
+
+Choices for methods in `NLsolveJL`:
+
+  - `:anderson`: Anderson-accelerated fixed-point iteration
+  - `:broyden`: Broyden's quasi-Newton method
+  - `:newton`: Classical Newton method with an optional line search
+  - `:trust_region`: Trust region Newton method (the default choice) For more information on
+    these arguments, consult the
+    [NLsolve.jl documentation](https://github.com/JuliaNLSolvers/NLsolve.jl).
+"""
+@concrete struct NLsolveJL <: AbstractNonlinearAlgorithm
+    method::Symbol
+    autodiff::Symbol
+    store_trace::Bool
+    extended_trace::Bool
+    linesearch
+    linsolve
+    factor
+    autoscale::Bool
+    m::Int
+    beta
+    show_trace::Bool
+end
+
+function NLsolveJL(; method = :trust_region, autodiff = :central, store_trace = false,
+        extended_trace = false, linesearch = LineSearches.Static(),
+        linsolve = (x, A, b) -> copyto!(x, A \ b), factor = 1.0, autoscale = true, m = 10,
+        beta = one(Float64), show_trace = false)
+    if Base.get_extension(@__MODULE__, :NonlinearSolveNLsolveExt) === nothing
+        error("NLsolveJL requires NLsolve.jl to be loaded")
+    end
+
+    return NLsolveJL(method, autodiff, store_trace, extended_trace, linesearch, linsolve,
+        factor, autoscale, m, beta, show_trace)
+end
diff --git a/src/gaussnewton.jl b/src/gaussnewton.jl
index 822b0ffc3..c7e99c912 100644
--- a/src/gaussnewton.jl
+++ b/src/gaussnewton.jl
@@ -47,10 +47,9 @@ function set_ad(alg::GaussNewton{CJ}, ad) where {CJ}
 end
 
 function GaussNewton(; concrete_jac = nothing, linsolve = nothing, precs = DEFAULT_PRECS,
-        linesearch = nothing, vjp_autodiff = nothing, adkwargs...)
-    ad = default_adargs_to_adtype(; adkwargs...)
+        linesearch = nothing, vjp_autodiff = nothing, autodiff = nothing)
     linesearch = linesearch isa LineSearch ? linesearch : LineSearch(; method = linesearch)
-    return GaussNewton{_unwrap_val(concrete_jac)}(ad, linsolve, precs, linesearch,
+    return GaussNewton{_unwrap_val(concrete_jac)}(autodiff, linsolve, precs, linesearch,
         vjp_autodiff)
 end
 
diff --git a/src/klement.jl b/src/klement.jl
index ed6a3b172..a49a3eda9 100644
--- a/src/klement.jl
+++ b/src/klement.jl
@@ -1,5 +1,5 @@
 """
-    GeneralKlement(; max_resets = 100, linsolve = nothing, linesearch = nothing,
+    Klement(; max_resets = 100, linsolve = nothing, linesearch = nothing,
         precs = DEFAULT_PRECS, alpha = true, init_jacobian::Val = Val(:identity),
         autodiff = nothing)
 
@@ -38,7 +38,7 @@ solves. It is recommended to use `Broyden` for most problems over this.
     `nothing` which means that a default is selected according to the problem specification!
     Valid choices are types from ADTypes.jl. (Used if `init_jacobian = Val(:true_jacobian)`)
 """
-@concrete struct GeneralKlement{IJ, CJ, AD} <: AbstractNewtonAlgorithm{CJ, AD}
+@concrete struct Klement{IJ, CJ, AD} <: AbstractNewtonAlgorithm{CJ, AD}
     ad::AD
     max_resets::Int
     linsolve
@@ -47,30 +47,30 @@ solves. It is recommended to use `Broyden` for most problems over this.
     alpha
 end
 
-function __alg_print_modifiers(alg::GeneralKlement{IJ}) where {IJ}
+function __alg_print_modifiers(alg::Klement{IJ}) where {IJ}
     modifiers = String[]
     IJ !== :identity && push!(modifiers, "init_jacobian = Val(:$(IJ))")
     alg.alpha !== nothing && push!(modifiers, "alpha = $(alg.alpha)")
     return modifiers
 end
 
-function set_ad(alg::GeneralKlement{IJ, CJ}, ad) where {IJ, CJ}
-    return GeneralKlement{IJ, CJ}(ad, alg.max_resets, alg.linsolve, alg.precs,
+function set_ad(alg::Klement{IJ, CJ}, ad) where {IJ, CJ}
+    return Klement{IJ, CJ}(ad, alg.max_resets, alg.linsolve, alg.precs,
         alg.linesearch, alg.alpha)
 end
 
-function GeneralKlement(; max_resets::Int = 100, linsolve = nothing, alpha = true,
+function Klement(; max_resets::Int = 100, linsolve = nothing, alpha = true,
         linesearch = nothing, precs = DEFAULT_PRECS, init_jacobian::Val = Val(:identity),
         autodiff = nothing)
     IJ = _unwrap_val(init_jacobian)
     @assert IJ ∈ (:identity, :true_jacobian, :true_jacobian_diagonal)
     linesearch = linesearch isa LineSearch ? linesearch : LineSearch(; method = linesearch)
     CJ = IJ !== :identity
-    return GeneralKlement{IJ, CJ}(autodiff, max_resets, linsolve, precs, linesearch,
+    return Klement{IJ, CJ}(autodiff, max_resets, linsolve, precs, linesearch,
         alpha)
 end
 
-@concrete mutable struct GeneralKlementCache{iip, IJ} <: AbstractNonlinearSolveCache{iip}
+@concrete mutable struct KlementCache{iip, IJ} <: AbstractNonlinearSolveCache{iip}
     f
     alg
     u
@@ -104,7 +104,7 @@ end
     trace
 end
 
-function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg_::GeneralKlement{IJ},
+function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg_::Klement{IJ},
         args...; alias_u0 = false, maxiters = 1000, abstol = nothing, reltol = nothing,
         termination_condition = nothing, internalnorm::F = DEFAULT_NORM,
         linsolve_kwargs = (;), kwargs...) where {uType, iip, F, IJ}
@@ -155,14 +155,14 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg_::GeneralKleme
         J_cache_2, Jdu_cache = nothing, nothing
     end
 
-    return GeneralKlementCache{iip, IJ}(f, alg, u, u_cache, fu, fu_cache, fu_cache_2, du, p,
+    return KlementCache{iip, IJ}(f, alg, u, u_cache, fu, fu_cache, fu_cache_2, du, p,
         uf, linsolve, J, J_cache, J_cache_2, Jdu, Jdu_cache, alpha, alg.alpha, 0, false,
         maxiters, internalnorm, ReturnCode.Default, abstol, reltol, prob, jac_cache,
         NLStats(1, 0, 0, 0, 0),
         init_linesearch_cache(alg.linesearch, f, u, p, fu, Val(iip)), tc_cache, trace)
 end
 
-function perform_step!(cache::GeneralKlementCache{iip, IJ}) where {iip, IJ}
+function perform_step!(cache::KlementCache{iip, IJ}) where {iip, IJ}
     @unpack linsolve, alg = cache
     T = eltype(cache.J)
 
@@ -250,7 +250,7 @@ function perform_step!(cache::GeneralKlementCache{iip, IJ}) where {iip, IJ}
     return nothing
 end
 
-function __reinit_internal!(cache::GeneralKlementCache; kwargs...)
+function __reinit_internal!(cache::KlementCache; kwargs...)
     cache.alpha = __initial_alpha(cache.alpha, cache.alpha_initial, cache.u, cache.fu,
         cache.internalnorm)
     cache.J = __reinit_identity_jacobian!!(cache.J, cache.alpha)
diff --git a/src/lbroyden.jl b/src/lbroyden.jl
index c4c73e11e..f31be0d46 100644
--- a/src/lbroyden.jl
+++ b/src/lbroyden.jl
@@ -14,7 +14,7 @@ An implementation of `LimitedMemoryBroyden` with resetting and line search.
   - `linesearch`: the line search algorithm to use. Defaults to [`LineSearch()`](@ref),
     which means that no line search is performed. Algorithms from `LineSearches.jl` can be
     used here directly, and they will be converted to the correct `LineSearch`. It is
-    recommended to use [LiFukushimaLineSearchCache](@ref) -- a derivative free linesearch
+    recommended to use [`LiFukushimaLineSearchCache`](@ref) -- a derivative free linesearch
     specifically designed for Broyden's method.
 """
 @concrete struct LimitedMemoryBroyden{threshold} <: AbstractNewtonAlgorithm{false, Nothing}
@@ -76,7 +76,7 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::LimitedMemory
     if u0 isa Number || length(u0) ≤ η
         # If u is a number or very small problem then we simply use Broyden
         return SciMLBase.__init(prob,
-            GeneralBroyden(; alg.max_resets, alg.reset_tolerance, alg.linesearch), args...;
+            Broyden(; alg.max_resets, alg.reset_tolerance, alg.linesearch), args...;
             alias_u0, maxiters, abstol, internalnorm, kwargs...)
     end
     u = __maybe_unaliased(u0, alias_u0)
diff --git a/src/levenberg.jl b/src/levenberg.jl
index 160406f66..305613df2 100644
--- a/src/levenberg.jl
+++ b/src/levenberg.jl
@@ -102,10 +102,9 @@ function LevenbergMarquardt(; concrete_jac = nothing, linsolve = nothing,
         precs = DEFAULT_PRECS, damping_initial::Real = 1.0, α_geodesic::Real = 0.75,
         damping_increase_factor::Real = 2.0, damping_decrease_factor::Real = 3.0,
         finite_diff_step_geodesic::Real = 0.1, b_uphill::Real = 1.0,
-        min_damping_D::Real = 1e-8, adkwargs...)
-    ad = default_adargs_to_adtype(; adkwargs...)
+        min_damping_D::Real = 1e-8, autodiff = nothing)
     _concrete_jac = ifelse(concrete_jac === nothing, true, concrete_jac)
-    return LevenbergMarquardt{_unwrap_val(_concrete_jac)}(ad, linsolve, precs,
+    return LevenbergMarquardt{_unwrap_val(_concrete_jac)}(autodiff, linsolve, precs,
         damping_initial, damping_increase_factor, damping_decrease_factor,
         finite_diff_step_geodesic, α_geodesic, b_uphill, min_damping_D)
 end
diff --git a/src/pseudotransient.jl b/src/pseudotransient.jl
index 1416cc4b8..7045e38cd 100644
--- a/src/pseudotransient.jl
+++ b/src/pseudotransient.jl
@@ -46,9 +46,9 @@ function set_ad(alg::PseudoTransient{CJ}, ad) where {CJ}
 end
 
 function PseudoTransient(; concrete_jac = nothing, linsolve = nothing,
-        precs = DEFAULT_PRECS, alpha_initial = 1e-3, adkwargs...)
-    ad = default_adargs_to_adtype(; adkwargs...)
-    return PseudoTransient{_unwrap_val(concrete_jac)}(ad, linsolve, precs, alpha_initial)
+        precs = DEFAULT_PRECS, alpha_initial = 1e-3, autodiff = nothing)
+    return PseudoTransient{_unwrap_val(concrete_jac)}(autodiff, linsolve, precs,
+        alpha_initial)
 end
 
 @concrete mutable struct PseudoTransientCache{iip} <: AbstractNonlinearSolveCache{iip}
diff --git a/src/raphson.jl b/src/raphson.jl
index 9ba6319aa..0fa918232 100644
--- a/src/raphson.jl
+++ b/src/raphson.jl
@@ -42,10 +42,9 @@ function set_ad(alg::NewtonRaphson{CJ}, ad) where {CJ}
 end
 
 function NewtonRaphson(; concrete_jac = nothing, linsolve = nothing, linesearch = nothing,
-        precs = DEFAULT_PRECS, adkwargs...)
-    ad = default_adargs_to_adtype(; adkwargs...)
+        precs = DEFAULT_PRECS, autodiff = nothing)
     linesearch = linesearch isa LineSearch ? linesearch : LineSearch(; method = linesearch)
-    return NewtonRaphson{_unwrap_val(concrete_jac)}(ad, linsolve, precs, linesearch)
+    return NewtonRaphson{_unwrap_val(concrete_jac)}(autodiff, linsolve, precs, linesearch)
 end
 
 @concrete mutable struct NewtonRaphsonCache{iip} <: AbstractNonlinearSolveCache{iip}
diff --git a/src/trustRegion.jl b/src/trustRegion.jl
index b63be58cc..e91bf8461 100644
--- a/src/trustRegion.jl
+++ b/src/trustRegion.jl
@@ -194,11 +194,11 @@ function TrustRegion(; concrete_jac = nothing, linsolve = nothing, precs = DEFAU
         step_threshold::Real = 1 // 10000, shrink_threshold::Real = 1 // 4,
         expand_threshold::Real = 3 // 4, shrink_factor::Real = 1 // 4,
         expand_factor::Real = 2 // 1, max_shrink_times::Int = 32, vjp_autodiff = nothing,
-        adkwargs...)
-    ad = default_adargs_to_adtype(; adkwargs...)
-    return TrustRegion{_unwrap_val(concrete_jac)}(ad, linsolve, precs, radius_update_scheme,
-        max_trust_radius, initial_trust_radius, step_threshold, shrink_threshold,
-        expand_threshold, shrink_factor, expand_factor, max_shrink_times, vjp_autodiff)
+        autodiff = nothing)
+    return TrustRegion{_unwrap_val(concrete_jac)}(autodiff, linsolve, precs,
+        radius_update_scheme, max_trust_radius, initial_trust_radius, step_threshold,
+        shrink_threshold, expand_threshold, shrink_factor, expand_factor, max_shrink_times,
+        vjp_autodiff)
 end
 
 @concrete mutable struct TrustRegionCache{iip} <: AbstractNonlinearSolveCache{iip}
diff --git a/src/utils.jl b/src/utils.jl
index a781a954d..d312c21c9 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -25,45 +25,6 @@ function ForwardDiff.checktag(::Type{<:ForwardDiff.Tag{<:NonlinearSolveTag, <:T}
     return true
 end
 
-"""
-    default_adargs_to_adtype(; chunk_size = Val{0}(), autodiff = Val{true}(),
-        standardtag = Val{true}(), diff_type = Val{:forward})
-
-Construct the AD type from the arguments. This is mostly needed for compatibility with older
-code.
-
-!!! warning
-
-    `chunk_size`, `standardtag`, `diff_type`, and `autodiff::Union{Val, Bool}` are
-    deprecated and will be removed in v3. Update your code to directly specify
-    `autodiff=<ADTypes>`.
-"""
-function default_adargs_to_adtype(; chunk_size = missing, autodiff = nothing,
-        standardtag = missing, diff_type = missing)
-    # If using the new API short circuit
-    autodiff === nothing && return nothing
-    autodiff isa ADTypes.AbstractADType && return autodiff
-
-    # Deprecate the old version
-    if chunk_size !== missing || standardtag !== missing || diff_type !== missing ||
-       autodiff !== missing
-        Base.depwarn("`chunk_size`, `standardtag`, `diff_type`, \
-            `autodiff::Union{Val, Bool}` kwargs have been deprecated and will be removed \
-             in v3. Update your code to directly specify autodiff=<ADTypes>",
-            :default_adargs_to_adtype)
-    end
-    chunk_size === missing && (chunk_size = Val{0}())
-    standardtag === missing && (standardtag = Val{true}())
-    diff_type === missing && (diff_type = Val{:forward}())
-    autodiff === missing && (autodiff = Val{true}())
-
-    ad = _unwrap_val(autodiff)
-    # We don't really know the typeof the input yet, so we can't use the correct tag!
-    ad &&
-        return AutoForwardDiff{_unwrap_val(chunk_size), NonlinearSolveTag}(NonlinearSolveTag())
-    return AutoFiniteDiff(; fdtype = diff_type)
-end
-
 """
     value_derivative(f, x)
 
diff --git a/test/23_test_problems.jl b/test/23_test_problems.jl
index fb4a37460..fbddaf56e 100644
--- a/test/23_test_problems.jl
+++ b/test/23_test_problems.jl
@@ -89,14 +89,12 @@ end
     test_on_library(problems, dicts, alg_ops, broken_tests)
 end
 
-@testset "GeneralBroyden 23 Test Problems" begin
-    alg_ops = (GeneralBroyden(),
-        GeneralBroyden(; init_jacobian = Val(:true_jacobian)),
-        GeneralBroyden(; update_rule = Val(:bad_broyden)),
-        GeneralBroyden(; init_jacobian = Val(:true_jacobian),
-            update_rule = Val(:bad_broyden)),
-        GeneralBroyden(; update_rule = Val(:diagonal)),
-        GeneralBroyden(; init_jacobian = Val(:true_jacobian), update_rule = Val(:diagonal)))
+@testset "Broyden 23 Test Problems" begin
+    alg_ops = (Broyden(), Broyden(; init_jacobian = Val(:true_jacobian)),
+        Broyden(; update_rule = Val(:bad_broyden)),
+        Broyden(; init_jacobian = Val(:true_jacobian), update_rule = Val(:bad_broyden)),
+        Broyden(; update_rule = Val(:diagonal)),
+        Broyden(; init_jacobian = Val(:true_jacobian), update_rule = Val(:diagonal)))
 
     broken_tests = Dict(alg => Int[] for alg in alg_ops)
     broken_tests[alg_ops[1]] = [1, 5, 6, 11]
@@ -117,10 +115,9 @@ end
     test_on_library(problems, dicts, alg_ops, broken_tests; skip_tests)
 end
 
-@testset "GeneralKlement 23 Test Problems" begin
-    alg_ops = (GeneralKlement(),
-        GeneralKlement(; init_jacobian = Val(:true_jacobian)),
-        GeneralKlement(; init_jacobian = Val(:true_jacobian_diagonal)))
+@testset "Klement 23 Test Problems" begin
+    alg_ops = (Klement(), Klement(; init_jacobian = Val(:true_jacobian)),
+        Klement(; init_jacobian = Val(:true_jacobian_diagonal)))
 
     broken_tests = Dict(alg => Int[] for alg in alg_ops)
     broken_tests[alg_ops[1]] = [1, 2, 4, 5, 6, 11, 22]
diff --git a/test/basictests.jl b/test/basictests.jl
index 9aad5d1e8..7acb47cae 100644
--- a/test/basictests.jl
+++ b/test/basictests.jl
@@ -124,9 +124,8 @@ const TERMINATION_CONDITIONS = [
     @test nlprob_iterator_interface(quadratic_f, p, Val(false)) ≈ sqrt.(p)
     @test nlprob_iterator_interface(quadratic_f!, p, Val(true)) ≈ sqrt.(p)
 
-    @testset "ADType: $(autodiff) u0: $(_nameof(u0))" for autodiff in (false, true,
-            AutoSparseForwardDiff(), AutoSparseFiniteDiff(), AutoZygote(),
-            AutoSparseZygote(), AutoSparseEnzyme()), u0 in (1.0, [1.0, 1.0])
+    @testset "ADType: $(autodiff) u0: $(_nameof(u0))" for autodiff in (AutoSparseForwardDiff(),
+            AutoSparseFiniteDiff(), AutoZygote(), AutoSparseZygote(), AutoSparseEnzyme()), u0 in (1.0, [1.0, 1.0])
         probN = NonlinearProblem(quadratic_f, u0, 2.0)
         @test all(solve(probN, NewtonRaphson(; autodiff)).u .≈ sqrt(2.0))
     end
@@ -244,9 +243,8 @@ end
     @test nlprob_iterator_interface(quadratic_f, p, Val(false)) ≈ sqrt.(p)
     @test nlprob_iterator_interface(quadratic_f!, p, Val(true)) ≈ sqrt.(p)
 
-    @testset "ADType: $(autodiff) u0: $(_nameof(u0)) radius_update_scheme: $(radius_update_scheme)" for autodiff in (false,
-            true, AutoSparseForwardDiff(), AutoSparseFiniteDiff(), AutoZygote(),
-            AutoSparseZygote(), AutoSparseEnzyme()), u0 in (1.0, [1.0, 1.0]),
+    @testset "ADType: $(autodiff) u0: $(_nameof(u0)) radius_update_scheme: $(radius_update_scheme)" for autodiff in (AutoSparseForwardDiff(),
+            AutoSparseFiniteDiff(), AutoZygote(), AutoSparseZygote(), AutoSparseEnzyme()), u0 in (1.0, [1.0, 1.0]),
         radius_update_scheme in radius_update_schemes
 
         probN = NonlinearProblem(quadratic_f, u0, 2.0)
@@ -380,9 +378,8 @@ end
     @test ForwardDiff.jacobian(p -> [benchmark_nlsolve_oop(quadratic_f2, 0.5, p).u],
         p) ≈ ForwardDiff.jacobian(t, p)
 
-    @testset "ADType: $(autodiff) u0: $(_nameof(u0))" for autodiff in (false, true,
-            AutoSparseForwardDiff(), AutoSparseFiniteDiff(), AutoZygote(),
-            AutoSparseZygote(), AutoSparseEnzyme()), u0 in (1.0, [1.0, 1.0])
+    @testset "ADType: $(autodiff) u0: $(_nameof(u0))" for autodiff in (AutoSparseForwardDiff(),
+            AutoSparseFiniteDiff(), AutoZygote(), AutoSparseZygote(), AutoSparseEnzyme()), u0 in (1.0, [1.0, 1.0])
         probN = NonlinearProblem(quadratic_f, u0, 2.0)
         @test all(solve(probN, LevenbergMarquardt(; autodiff); abstol = 1e-9,
             reltol = 1e-9).u .≈ sqrt(2.0))
@@ -664,9 +661,8 @@ end
     @test nlprob_iterator_interface(quadratic_f, p, Val(false)) ≈ sqrt.(p)
     @test nlprob_iterator_interface(quadratic_f!, p, Val(true)) ≈ sqrt.(p)
 
-    @testset "ADType: $(autodiff) u0: $(_nameof(u0))" for autodiff in (false, true,
-            AutoSparseForwardDiff(), AutoSparseFiniteDiff(), AutoZygote(),
-            AutoSparseZygote(), AutoSparseEnzyme()), u0 in (1.0, [1.0, 1.0])
+    @testset "ADType: $(autodiff) u0: $(_nameof(u0))" for autodiff in (AutoSparseForwardDiff(),
+            AutoSparseFiniteDiff(), AutoZygote(), AutoSparseZygote(), AutoSparseEnzyme()), u0 in (1.0, [1.0, 1.0])
         probN = NonlinearProblem(quadratic_f, u0, 2.0)
         @test all(solve(probN, PseudoTransient(; alpha_initial = 10.0, autodiff)).u .≈
                   sqrt(2.0))
@@ -689,20 +685,20 @@ end
     end
 end
 
-# --- GeneralBroyden tests ---
+# --- Broyden tests ---
 
-@testset "GeneralBroyden" begin
+@testset "Broyden" begin
     function benchmark_nlsolve_oop(f, u0, p = 2.0; linesearch = nothing,
             init_jacobian = Val(:identity), update_rule = Val(:good_broyden))
         prob = NonlinearProblem{false}(f, u0, p)
-        return solve(prob, GeneralBroyden(; linesearch, init_jacobian, update_rule);
+        return solve(prob, Broyden(; linesearch, init_jacobian, update_rule);
             abstol = 1e-9)
     end
 
     function benchmark_nlsolve_iip(f, u0, p = 2.0; linesearch = nothing,
             init_jacobian = Val(:identity), update_rule = Val(:good_broyden))
         prob = NonlinearProblem{true}(f, u0, p)
-        return solve(prob, GeneralBroyden(; linesearch, init_jacobian, update_rule);
+        return solve(prob, Broyden(; linesearch, init_jacobian, update_rule);
             abstol = 1e-9)
     end
 
@@ -723,7 +719,7 @@ end
             @test all(abs.(sol.u .* sol.u .- 2) .< 1e-9)
 
             cache = init(NonlinearProblem{false}(quadratic_f, u0, 2.0),
-                GeneralBroyden(; linesearch, update_rule, init_jacobian), abstol = 1e-9)
+                Broyden(; linesearch, update_rule, init_jacobian), abstol = 1e-9)
             @test (@ballocated solve!($cache)) < 200
         end
 
@@ -735,7 +731,7 @@ end
             @test all(abs.(sol.u .* sol.u .- 2) .< 1e-9)
 
             cache = init(NonlinearProblem{true}(quadratic_f!, u0, 2.0),
-                GeneralBroyden(; linesearch, update_rule, init_jacobian), abstol = 1e-9)
+                Broyden(; linesearch, update_rule, init_jacobian), abstol = 1e-9)
             @test (@ballocated solve!($cache)) ≤ 64
         end
     end
@@ -773,7 +769,7 @@ end
     # Iterator interface
     function nlprob_iterator_interface(f, p_range, ::Val{iip}) where {iip}
         probN = NonlinearProblem{iip}(f, iip ? [0.5] : 0.5, p_range[begin])
-        cache = init(probN, GeneralBroyden(); maxiters = 100, abstol = 1e-10)
+        cache = init(probN, Broyden(); maxiters = 100, abstol = 1e-10)
         sols = zeros(length(p_range))
         for (i, p) in enumerate(p_range)
             reinit!(cache, iip ? [cache.u[1]] : cache.u; p = p)
@@ -790,23 +786,23 @@ end
         u0 in (1.0, [1.0, 1.0])
 
         probN = NonlinearProblem(quadratic_f, u0, 2.0)
-        @test all(solve(probN, GeneralBroyden(); termination_condition).u .≈ sqrt(2.0))
+        @test all(solve(probN, Broyden(); termination_condition).u .≈ sqrt(2.0))
     end
 end
 
-# --- GeneralKlement tests ---
+# --- Klement tests ---
 
-@testset "GeneralKlement" begin
+@testset "Klement" begin
     function benchmark_nlsolve_oop(f, u0, p = 2.0; linesearch = nothing,
             init_jacobian = Val(:identity))
         prob = NonlinearProblem{false}(f, u0, p)
-        return solve(prob, GeneralKlement(; linesearch, init_jacobian), abstol = 1e-9)
+        return solve(prob, Klement(; linesearch, init_jacobian), abstol = 1e-9)
     end
 
     function benchmark_nlsolve_iip(f, u0, p = 2.0; linesearch = nothing,
             init_jacobian = Val(:identity))
         prob = NonlinearProblem{true}(f, u0, p)
-        return solve(prob, GeneralKlement(; linesearch, init_jacobian), abstol = 1e-9)
+        return solve(prob, Klement(; linesearch, init_jacobian), abstol = 1e-9)
     end
 
     @testset "LineSearch: $(_nameof(lsmethod)) LineSearch AD: $(_nameof(ad)) Init Jacobian: $(init_jacobian)" for lsmethod in (Static(),
@@ -824,7 +820,7 @@ end
             @test all(abs.(sol.u .* sol.u .- 2) .< 3e-9)
 
             cache = init(NonlinearProblem{false}(quadratic_f, u0, 2.0),
-                GeneralKlement(; linesearch), abstol = 1e-9)
+                Klement(; linesearch), abstol = 1e-9)
             @test (@ballocated solve!($cache)) < 200
         end
 
@@ -835,7 +831,7 @@ end
             @test all(abs.(sol.u .* sol.u .- 2) .< 1e-9)
 
             cache = init(NonlinearProblem{true}(quadratic_f!, u0, 2.0),
-                GeneralKlement(; linesearch), abstol = 1e-9)
+                Klement(; linesearch), abstol = 1e-9)
             @test (@ballocated solve!($cache)) ≤ 64
         end
     end
@@ -873,7 +869,7 @@ end
     # Iterator interface
     function nlprob_iterator_interface(f, p_range, ::Val{iip}) where {iip}
         probN = NonlinearProblem{iip}(f, iip ? [0.5] : 0.5, p_range[begin])
-        cache = init(probN, GeneralKlement(); maxiters = 100, abstol = 1e-10)
+        cache = init(probN, Klement(); maxiters = 100, abstol = 1e-10)
         sols = zeros(length(p_range))
         for (i, p) in enumerate(p_range)
             reinit!(cache, iip ? [cache.u[1]] : cache.u; p = p)
@@ -890,7 +886,7 @@ end
         u0 in (1.0, [1.0, 1.0])
 
         probN = NonlinearProblem(quadratic_f, u0, 2.0)
-        @test all(solve(probN, GeneralKlement(); termination_condition).u .≈ sqrt(2.0))
+        @test all(solve(probN, Klement(); termination_condition).u .≈ sqrt(2.0))
     end
 end
 
diff --git a/test/gpu.jl b/test/gpu.jl
index c314f4d76..8459a2a2a 100644
--- a/test/gpu.jl
+++ b/test/gpu.jl
@@ -11,7 +11,7 @@ linear_f(du, u, p) = (du .= A * u .+ b)
 prob = NonlinearProblem(linear_f, u0)
 
 for alg in (NewtonRaphson(), LevenbergMarquardt(; linsolve = QRFactorization()),
-    PseudoTransient(; alpha_initial = 1.0f0), GeneralKlement(), GeneralBroyden(),
+    PseudoTransient(; alpha_initial = 1.0f0), Klement(), Broyden(),
     LimitedMemoryBroyden(), TrustRegion())
     @test_nowarn sol = solve(prob, alg; abstol = 1.0f-8, reltol = 1.0f-8)
 end
@@ -21,7 +21,7 @@ linear_f(u, p) = A * u .+ b
 prob = NonlinearProblem{false}(linear_f, u0)
 
 for alg in (NewtonRaphson(), LevenbergMarquardt(; linsolve = QRFactorization()),
-    PseudoTransient(; alpha_initial = 1.0f0), GeneralKlement(), GeneralBroyden(),
+    PseudoTransient(; alpha_initial = 1.0f0), Klement(), Broyden(),
     LimitedMemoryBroyden(), TrustRegion())
     @test_nowarn sol = solve(prob, alg; abstol = 1.0f-8, reltol = 1.0f-8)
 end
diff --git a/test/matrix_resizing.jl b/test/matrix_resizing.jl
index c9579d9cf..978517525 100644
--- a/test/matrix_resizing.jl
+++ b/test/matrix_resizing.jl
@@ -7,7 +7,7 @@ vecprob = NonlinearProblem(ff, vec(u0), p)
 prob = NonlinearProblem(ff, u0, p)
 
 for alg in (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(), PseudoTransient(),
-    RobustMultiNewton(), FastShortcutNonlinearPolyalg(), GeneralBroyden(), GeneralKlement(),
+    RobustMultiNewton(), FastShortcutNonlinearPolyalg(), Broyden(), Klement(),
     LimitedMemoryBroyden(; threshold = 2))
     @test vec(solve(prob, alg).u) == solve(vecprob, alg).u
 end
@@ -19,7 +19,7 @@ vecprob = NonlinearProblem(fiip, vec(u0), p)
 prob = NonlinearProblem(fiip, u0, p)
 
 for alg in (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(), PseudoTransient(),
-    RobustMultiNewton(), FastShortcutNonlinearPolyalg(), GeneralBroyden(), GeneralKlement(),
+    RobustMultiNewton(), FastShortcutNonlinearPolyalg(), Broyden(), Klement(),
     LimitedMemoryBroyden(; threshold = 2))
     @test vec(solve(prob, alg).u) == solve(vecprob, alg).u
 end
diff --git a/test/minpack.jl b/test/minpack.jl
new file mode 100644
index 000000000..46cc6be6e
--- /dev/null
+++ b/test/minpack.jl
@@ -0,0 +1,35 @@
+using NonlinearSolve, MINPACK, Test
+
+function f_iip(du, u, p)
+    du[1] = 2 - 2u[1]
+    du[2] = u[1] - 4u[2]
+end
+u0 = zeros(2)
+prob_iip = NonlinearProblem{true}(f_iip, u0)
+abstol = 1e-8
+
+for alg in [CMINPACK()]
+    local sol
+    sol = solve(prob_iip, alg)
+    @test sol.retcode == ReturnCode.Success
+    p = nothing
+
+    du = zeros(2)
+    f_iip(du, sol.u, nothing)
+    @test maximum(du) < 1e-6
+end
+
+# OOP Tests
+f_oop(u, p) = [2 - 2u[1], u[1] - 4u[2]]
+u0 = zeros(2)
+prob_oop = NonlinearProblem{false}(f_oop, u0)
+
+for alg in [CMINPACK()]
+    local sol
+    sol = solve(prob_oop, alg)
+    @test sol.retcode == ReturnCode.Success
+
+    du = zeros(2)
+    du = f_oop(sol.u, nothing)
+    @test maximum(du) < 1e-6
+end
diff --git a/test/nlsolve.jl b/test/nlsolve.jl
new file mode 100644
index 000000000..ebda5d272
--- /dev/null
+++ b/test/nlsolve.jl
@@ -0,0 +1,138 @@
+using NonlinearSolve, NLsolve, LinearAlgebra, Test
+
+# IIP Tests
+function f_iip(du, u, p, t)
+    du[1] = 2 - 2u[1]
+    du[2] = u[1] - 4u[2]
+end
+u0 = zeros(2)
+prob_iip = SteadyStateProblem(f_iip, u0)
+abstol = 1e-8
+
+for alg in [NLsolveJL()]
+    sol = solve(prob_iip, alg)
+    @test sol.retcode == ReturnCode.Success
+    p = nothing
+
+    du = zeros(2)
+    f_iip(du, sol.u, nothing, 0)
+    @test maximum(du) < 1e-6
+end
+
+# OOP Tests
+f_oop(u, p, t) = [2 - 2u[1], u[1] - 4u[2]]
+u0 = zeros(2)
+prob_oop = SteadyStateProblem(f_oop, u0)
+
+for alg in [NLsolveJL()]
+    sol = solve(prob_oop, alg)
+    @test sol.retcode == ReturnCode.Success
+    # test the solver is doing reasonable things for linear solve
+    # and that the stats are working properly
+    @test 1 <= sol.stats.nf < 10
+
+    du = zeros(2)
+    du = f_oop(sol.u, nothing, 0)
+    @test maximum(du) < 1e-6
+end
+
+# NonlinearProblem Tests
+
+function f_iip(du, u, p)
+    du[1] = 2 - 2u[1]
+    du[2] = u[1] - 4u[2]
+end
+u0 = zeros(2)
+prob_iip = NonlinearProblem{true}(f_iip, u0)
+abstol = 1e-8
+for alg in [NLsolveJL()]
+    local sol
+    sol = solve(prob_iip, alg)
+    @test sol.retcode == ReturnCode.Success
+    p = nothing
+
+    du = zeros(2)
+    f_iip(du, sol.u, nothing)
+    @test maximum(du) < 1e-6
+end
+
+# OOP Tests
+f_oop(u, p) = [2 - 2u[1], u[1] - 4u[2]]
+u0 = zeros(2)
+prob_oop = NonlinearProblem{false}(f_oop, u0)
+for alg in [NLsolveJL()]
+    local sol
+    sol = solve(prob_oop, alg)
+    @test sol.retcode == ReturnCode.Success
+
+    du = zeros(2)
+    du = f_oop(sol.u, nothing)
+    @test maximum(du) < 1e-6
+end
+
+# tolerance tests
+f_tol(u, p) = u^2 - 2
+prob_tol = NonlinearProblem(f_tol, 1.0)
+for tol in [1e-1, 1e-3, 1e-6, 1e-10, 1e-15]
+    sol = solve(prob_tol, NLsolveJL(), abstol = tol)
+    @test abs(sol.u[1] - sqrt(2)) < tol
+end
+
+# Test the finite differencing technique
+function f!(fvec, x, p)
+    fvec[1] = (x[1] + 3) * (x[2]^3 - 7) + 18
+    fvec[2] = sin(x[2] * exp(x[1]) - 1)
+end
+
+prob = NonlinearProblem{true}(f!, [0.1; 1.2])
+sol = solve(prob, NLsolveJL(autodiff = :central))
+
+du = zeros(2)
+f!(du, sol.u, nothing)
+@test maximum(du) < 1e-6
+
+# Test the autodiff technique
+function f!(fvec, x, p)
+    fvec[1] = (x[1] + 3) * (x[2]^3 - 7) + 18
+    fvec[2] = sin(x[2] * exp(x[1]) - 1)
+end
+
+prob = NonlinearProblem{true}(f!, [0.1; 1.2])
+sol = solve(prob, NLsolveJL(autodiff = :forward))
+
+du = zeros(2)
+f!(du, sol.u, nothing)
+@test maximum(du) < 1e-6
+
+function problem(x, A)
+    return x .^ 2 - A
+end
+
+function problemJacobian(x, A)
+    return diagm(2 .* x)
+end
+
+function f!(F, u, p)
+    F[1:152] = problem(u, p)
+end
+
+function j!(J, u, p)
+    J[1:152, 1:152] = problemJacobian(u, p)
+end
+
+f = NonlinearFunction(f!)
+
+init = ones(152);
+A = ones(152);
+A[6] = 0.8
+
+f = NonlinearFunction(f!, jac = j!)
+
+p = A
+
+ProbN = NonlinearProblem(f, init, p)
+sol = solve(ProbN, NLsolveJL(), reltol = 1e-8, abstol = 1e-8)
+
+init = ones(Complex{Float64}, 152);
+ProbN = NonlinearProblem(f, init, p)
+sol = solve(ProbN, NLsolveJL(), reltol = 1e-8, abstol = 1e-8)
diff --git a/test/nonlinear_least_squares.jl b/test/nonlinear_least_squares.jl
index 7729f74d1..f4b8233e0 100644
--- a/test/nonlinear_least_squares.jl
+++ b/test/nonlinear_least_squares.jl
@@ -1,6 +1,6 @@
 using NonlinearSolve,
     LinearSolve, LinearAlgebra, Test, StableRNGs, Random, ForwardDiff, Zygote
-import FastLevenbergMarquardt, LeastSquaresOptim
+import FastLevenbergMarquardt, LeastSquaresOptim, MINPACK
 
 true_function(x, θ) = @. θ[1] * exp(θ[2] * x) * cos(θ[3] * x + θ[4])
 true_function(y, x, θ) = (@. y = θ[1] * exp(θ[2] * x) * cos(θ[3] * x + θ[4]))
@@ -89,12 +89,38 @@ function jac!(J, θ, p)
     return J
 end
 
-prob = NonlinearLeastSquaresProblem(NonlinearFunction(loss_function;
-        resid_prototype = zero(y_target), jac = jac!), θ_init, x)
+jac(θ, p) = ForwardDiff.jacobian(θ -> loss_function(θ, p), θ)
 
-solvers = [FastLevenbergMarquardtJL(:cholesky), FastLevenbergMarquardtJL(:qr)]
+probs = [
+    NonlinearLeastSquaresProblem(NonlinearFunction{true}(loss_function;
+            resid_prototype = zero(y_target), jac = jac!), θ_init, x),
+    NonlinearLeastSquaresProblem(NonlinearFunction{false}(loss_function;
+            resid_prototype = zero(y_target), jac = jac), θ_init, x),
+    NonlinearLeastSquaresProblem(NonlinearFunction{false}(loss_function; jac), θ_init, x),
+]
+
+solvers = Any[FastLevenbergMarquardtJL(linsolve) for linsolve in (:cholesky, :qr)]
+push!(solvers, CMINPACK())
+
+for solver in solvers, prob in probs
+    @time sol = solve(prob, solver; maxiters = 10000, abstol = 1e-8)
+    @test norm(sol.resid) < 1e-6
+end
+
+probs = [
+    NonlinearLeastSquaresProblem(NonlinearFunction{true}(loss_function;
+            resid_prototype = zero(y_target)), θ_init, x),
+    NonlinearLeastSquaresProblem(NonlinearFunction{false}(loss_function;
+            resid_prototype = zero(y_target)), θ_init, x),
+    NonlinearLeastSquaresProblem(NonlinearFunction{false}(loss_function), θ_init, x),
+]
+
+solvers = vec(Any[FastLevenbergMarquardtJL(linsolve; autodiff)
+                  for linsolve in (:cholesky, :qr),
+autodiff in (nothing, AutoForwardDiff(), AutoFiniteDiff())])
+append!(solvers, [CMINPACK(; method) for method in (:auto, :lm, :lmdif)])
 
-for solver in solvers
+for solver in solvers, prob in probs
     @time sol = solve(prob, solver; maxiters = 10000, abstol = 1e-8)
     @test norm(sol.resid) < 1e-6
 end
diff --git a/test/polyalgs.jl b/test/polyalgs.jl
index e56bb5353..9a0409671 100644
--- a/test/polyalgs.jl
+++ b/test/polyalgs.jl
@@ -4,7 +4,7 @@ f(u, p) = u .* u .- 2
 u0 = [1.0, 1.0]
 probN = NonlinearProblem{false}(f, u0)
 
-custom_polyalg = NonlinearSolvePolyAlgorithm((GeneralBroyden(), LimitedMemoryBroyden()))
+custom_polyalg = NonlinearSolvePolyAlgorithm((Broyden(), LimitedMemoryBroyden()))
 
 # Uses the `__solve` function
 @time solver = solve(probN; abstol = 1e-9)
diff --git a/test/runtests.jl b/test/runtests.jl
index 465f9d7b1..855a793fc 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -14,12 +14,14 @@ end
 @time begin
     if GROUP == "All" || GROUP == "Core"
         @time @safetestset "Quality Assurance" include("qa.jl")
-        @time @safetestset "Nonlinear Least Squares" include("nonlinear_least_squares.jl")
         @time @safetestset "Basic Tests + Some AD" include("basictests.jl")
         @time @safetestset "Sparsity Tests" include("sparse.jl")
         @time @safetestset "Polyalgs" include("polyalgs.jl")
         @time @safetestset "Matrix Resizing" include("matrix_resizing.jl")
         @time @safetestset "Infeasible Problems" include("infeasible.jl")
+        @time @safetestset "Nonlinear Least Squares" include("nonlinear_least_squares.jl")
+        @time @safetestset "MINPACK" include("minpack.jl")
+        @time @safetestset "NLsolve" include("nlsolve.jl")
     end
 
     if GROUP == "All" || GROUP == "23TestProblems"