diff --git a/src/bicgstab.jl b/src/bicgstab.jl index 54cc02ea9..4ebba1171 100644 --- a/src/bicgstab.jl +++ b/src/bicgstab.jl @@ -149,23 +149,23 @@ kwargs_bicgstab = (:c, :M, :N, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose, if warm_start mul!(r₀, A, Δx) - @kaxpby!(n, one(FC), b, -one(FC), r₀) + kaxpby!(n, one(FC), b, -one(FC), r₀) else - @kcopy!(n, r₀, b) # r₀ ← b + kcopy!(n, r₀, b) # r₀ ← b end - @kfill!(x, zero(FC)) # x₀ - @kfill!(s, zero(FC)) # s₀ - @kfill!(v, zero(FC)) # v₀ + kfill!(x, zero(FC)) # x₀ + kfill!(s, zero(FC)) # s₀ + kfill!(v, zero(FC)) # v₀ MisI || mulorldiv!(r, M, r₀, ldiv) # r₀ - @kcopy!(n, p, r) # p₁ + kcopy!(n, p, r) # p₁ - α = one(FC) # α₀ - ω = one(FC) # ω₀ - ρ = one(FC) # ρ₀ + α = one(FC) # α₀ + ω = one(FC) # ω₀ + ρ = one(FC) # ρ₀ # Compute residual norm ‖r₀‖₂. - rNorm = @knrm2(n, r) + rNorm = knorm(n, r) history && push!(rNorms, rNorm) if rNorm == 0 stats.niter = 0 @@ -183,7 +183,7 @@ kwargs_bicgstab = (:c, :M, :N, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose, (verbose > 0) && @printf(iostream, "%5s %7s %8s %8s %5s\n", "k", "‖rₖ‖", "|αₖ|", "|ωₖ|", "timer") kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %8.1e %8.1e %.2fs\n", iter, rNorm, abs(α), abs(ω), ktimer(start_time)) - next_ρ = @kdot(n, c, r) # ρ₁ = ⟨r̅₀,r₀⟩ + next_ρ = kdot(n, c, r) # ρ₁ = ⟨r̅₀,r₀⟩ if next_ρ == 0 stats.niter = 0 stats.solved, stats.inconsistent = false, false @@ -206,27 +206,27 @@ kwargs_bicgstab = (:c, :M, :N, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose, iter = iter + 1 ρ = next_ρ - NisI || mulorldiv!(y, N, p, ldiv) # yₖ = N⁻¹pₖ - mul!(q, A, y) # qₖ = Ayₖ - mulorldiv!(v, M, q, ldiv) # vₖ = M⁻¹qₖ - α = ρ / @kdot(n, c, v) # αₖ = ⟨r̅₀,rₖ₋₁⟩ / ⟨r̅₀,vₖ⟩ - @kcopy!(n, s, r) # sₖ = rₖ₋₁ - @kaxpy!(n, -α, v, s) # sₖ = sₖ - αₖvₖ - @kaxpy!(n, α, y, x) # xₐᵤₓ = xₖ₋₁ + αₖyₖ - NisI || mulorldiv!(z, N, s, ldiv) # zₖ = N⁻¹sₖ - mul!(d, A, z) # dₖ = Azₖ - MisI || mulorldiv!(t, M, d, ldiv) # tₖ = M⁻¹dₖ - ω = @kdot(n, t, s) / @kdot(n, t, t) # ⟨tₖ,sₖ⟩ / ⟨tₖ,tₖ⟩ - @kaxpy!(n, ω, z, x) # xₖ = xₐᵤₓ + ωₖzₖ - @kcopy!(n, r, s) # rₖ = sₖ - @kaxpy!(n, -ω, t, r) # rₖ = rₖ - ωₖtₖ - next_ρ = @kdot(n, c, r) # ρₖ₊₁ = ⟨r̅₀,rₖ⟩ - β = (next_ρ / ρ) * (α / ω) # βₖ₊₁ = (ρₖ₊₁ / ρₖ) * (αₖ / ωₖ) - @kaxpy!(n, -ω, v, p) # pₐᵤₓ = pₖ - ωₖvₖ - @kaxpby!(n, one(FC), r, β, p) # pₖ₊₁ = rₖ₊₁ + βₖ₊₁pₐᵤₓ + NisI || mulorldiv!(y, N, p, ldiv) # yₖ = N⁻¹pₖ + mul!(q, A, y) # qₖ = Ayₖ + mulorldiv!(v, M, q, ldiv) # vₖ = M⁻¹qₖ + α = ρ / kdot(n, c, v) # αₖ = ⟨r̅₀,rₖ₋₁⟩ / ⟨r̅₀,vₖ⟩ + kcopy!(n, s, r) # sₖ = rₖ₋₁ + kaxpy!(n, -α, v, s) # sₖ = sₖ - αₖvₖ + kaxpy!(n, α, y, x) # xₐᵤₓ = xₖ₋₁ + αₖyₖ + NisI || mulorldiv!(z, N, s, ldiv) # zₖ = N⁻¹sₖ + mul!(d, A, z) # dₖ = Azₖ + MisI || mulorldiv!(t, M, d, ldiv) # tₖ = M⁻¹dₖ + ω = kdot(n, t, s) / kdot(n, t, t) # ⟨tₖ,sₖ⟩ / ⟨tₖ,tₖ⟩ + kaxpy!(n, ω, z, x) # xₖ = xₐᵤₓ + ωₖzₖ + kcopy!(n, r, s) # rₖ = sₖ + kaxpy!(n, -ω, t, r) # rₖ = rₖ - ωₖtₖ + next_ρ = kdot(n, c, r) # ρₖ₊₁ = ⟨r̅₀,rₖ⟩ + β = (next_ρ / ρ) * (α / ω) # βₖ₊₁ = (ρₖ₊₁ / ρₖ) * (αₖ / ωₖ) + kaxpy!(n, -ω, v, p) # pₐᵤₓ = pₖ - ωₖvₖ + kaxpby!(n, one(FC), r, β, p) # pₖ₊₁ = rₖ₊₁ + βₖ₊₁pₐᵤₓ # Compute residual norm ‖rₖ‖₂. - rNorm = @knrm2(n, r) + rNorm = knorm(n, r) history && push!(rNorms, rNorm) # Stopping conditions that do not depend on user input. @@ -253,7 +253,7 @@ kwargs_bicgstab = (:c, :M, :N, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose, overtimed && (status = "time limit exceeded") # Update x - warm_start && @kaxpy!(n, one(FC), Δx, x) + warm_start && kaxpy!(n, one(FC), Δx, x) solver.warm_start = false # Update stats diff --git a/src/bilq.jl b/src/bilq.jl index a4e41c420..68fde4489 100644 --- a/src/bilq.jl +++ b/src/bilq.jl @@ -148,7 +148,7 @@ kwargs_bilq = (:c, :transfer_to_bicg, :M, :N, :ldiv, :atol, :rtol, :itmax, :time if warm_start mul!(r₀, A, Δx) - @kaxpby!(n, one(FC), b, -one(FC), r₀) + kaxpby!(n, one(FC), b, -one(FC), r₀) end if !MisI mulorldiv!(solver.t, M, r₀, ldiv) @@ -156,8 +156,8 @@ kwargs_bilq = (:c, :transfer_to_bicg, :M, :N, :ldiv, :atol, :rtol, :itmax, :time end # Initial solution x₀ and residual norm ‖r₀‖. - @kfill!(x, zero(FC)) - bNorm = @knrm2(n, r₀) # ‖r₀‖ = ‖b₀ - Ax₀‖ + kfill!(x, zero(FC)) + bNorm = knorm(n, r₀) # ‖r₀‖ = ‖b₀ - Ax₀‖ history && push!(rNorms, bNorm) if bNorm == 0 @@ -174,7 +174,7 @@ kwargs_bilq = (:c, :transfer_to_bicg, :M, :N, :ldiv, :atol, :rtol, :itmax, :time itmax == 0 && (itmax = 2*n) # Initialize the Lanczos biorthogonalization process. - cᴴb = @kdot(n, c, r₀) # ⟨c,r₀⟩ + cᴴb = kdot(n, c, r₀) # ⟨c,r₀⟩ if cᴴb == 0 stats.niter = 0 stats.solved = false @@ -191,13 +191,13 @@ kwargs_bilq = (:c, :transfer_to_bicg, :M, :N, :ldiv, :atol, :rtol, :itmax, :time βₖ = √(abs(cᴴb)) # β₁γ₁ = cᴴ(b - Ax₀) γₖ = cᴴb / βₖ # β₁γ₁ = cᴴ(b - Ax₀) - @kfill!(vₖ₋₁, zero(FC)) # v₀ = 0 - @kfill!(uₖ₋₁, zero(FC)) # u₀ = 0 + kfill!(vₖ₋₁, zero(FC)) # v₀ = 0 + kfill!(uₖ₋₁, zero(FC)) # u₀ = 0 vₖ .= r₀ ./ βₖ # v₁ = (b - Ax₀) / β₁ uₖ .= c ./ conj(γₖ) # u₁ = c / γ̄₁ cₖ₋₁ = cₖ = -one(T) # Givens cosines used for the LQ factorization of Tₖ sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the LQ factorization of Tₖ - @kfill!(d̅, zero(FC)) # Last column of D̅ₖ = Vₖ(Qₖ)ᴴ + kfill!(d̅, zero(FC)) # Last column of D̅ₖ = Vₖ(Qₖ)ᴴ ζₖ₋₁ = ζbarₖ = zero(FC) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁ ζₖ₋₂ = ηₖ = zero(FC) # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ δbarₖ₋₁ = δbarₖ = zero(FC) # Coefficients of Lₖ₋₁ and L̅ₖ modified over the course of two iterations @@ -230,17 +230,17 @@ kwargs_bilq = (:c, :transfer_to_bicg, :M, :N, :ldiv, :atol, :rtol, :itmax, :time mul!(s, Aᴴ, Mᴴuₖ) NisI || mulorldiv!(p, Nᴴ, s, ldiv) - @kaxpy!(n, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁ - @kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - β̄ₖ * uₖ₋₁ + kaxpy!(n, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁ + kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - β̄ₖ * uₖ₋₁ - αₖ = @kdot(n, uₖ, q) # αₖ = ⟨uₖ,q⟩ + αₖ = kdot(n, uₖ, q) # αₖ = ⟨uₖ,q⟩ - @kaxpy!(n, - αₖ , vₖ, q) # q ← q - αₖ * vₖ - @kaxpy!(n, -conj(αₖ), uₖ, p) # p ← p - ᾱₖ * uₖ + kaxpy!(n, - αₖ , vₖ, q) # q ← q - αₖ * vₖ + kaxpy!(n, -conj(αₖ), uₖ, p) # p ← p - ᾱₖ * uₖ - pᴴq = @kdot(n, p, q) # pᴴq = ⟨p,q⟩ - βₖ₊₁ = √(abs(pᴴq)) # βₖ₊₁ = √(|pᴴq|) - γₖ₊₁ = pᴴq / βₖ₊₁ # γₖ₊₁ = pᴴq / βₖ₊₁ + pᴴq = kdot(n, p, q) # pᴴq = ⟨p,q⟩ + βₖ₊₁ = √(abs(pᴴq)) # βₖ₊₁ = √(|pᴴq|) + γₖ₊₁ = pᴴq / βₖ₊₁ # γₖ₊₁ = pᴴq / βₖ₊₁ # Update the LQ factorization of Tₖ = L̅ₖQₖ. # [ α₁ γ₂ 0 • • • 0 ] [ δ₁ 0 • • • • 0 ] @@ -301,22 +301,22 @@ kwargs_bilq = (:c, :transfer_to_bicg, :M, :N, :ldiv, :atol, :rtol, :itmax, :time if iter ≥ 2 # Compute solution xₖ. # (xᴸ)ₖ₋₁ ← (xᴸ)ₖ₋₂ + ζₖ₋₁ * dₖ₋₁ - @kaxpy!(n, ζₖ₋₁ * cₖ, d̅, x) - @kaxpy!(n, ζₖ₋₁ * sₖ, vₖ, x) + kaxpy!(n, ζₖ₋₁ * cₖ, d̅, x) + kaxpy!(n, ζₖ₋₁ * sₖ, vₖ, x) end # Compute d̅ₖ. if iter == 1 # d̅₁ = v₁ - @kcopy!(n, d̅, vₖ) # d̅ ← vₖ + kcopy!(n, d̅, vₖ) # d̅ ← vₖ else # d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * vₖ - @kaxpby!(n, -cₖ, vₖ, conj(sₖ), d̅) + kaxpby!(n, -cₖ, vₖ, conj(sₖ), d̅) end # Compute vₖ₊₁ and uₖ₊₁. - @kcopy!(n, vₖ₋₁, vₖ) # vₖ₋₁ ← vₖ - @kcopy!(n, uₖ₋₁, uₖ) # uₖ₋₁ ← uₖ + kcopy!(n, vₖ₋₁, vₖ) # vₖ₋₁ ← vₖ + kcopy!(n, uₖ₋₁, uₖ) # uₖ₋₁ ← uₖ if pᴴq ≠ 0 vₖ .= q ./ βₖ₊₁ # βₖ₊₁vₖ₊₁ = q @@ -324,8 +324,8 @@ kwargs_bilq = (:c, :transfer_to_bicg, :M, :N, :ldiv, :atol, :rtol, :itmax, :time end # Compute ⟨vₖ,vₖ₊₁⟩ and ‖vₖ₊₁‖ - vₖᴴvₖ₊₁ = @kdot(n, vₖ₋₁, vₖ) - norm_vₖ₊₁ = @knrm2(n, vₖ) + vₖᴴvₖ₊₁ = kdot(n, vₖ₋₁, vₖ) + norm_vₖ₊₁ = knorm(n, vₖ) # Compute BiLQ residual norm # ‖rₖ‖ = √(|μₖ|²‖vₖ‖² + |ωₖ|²‖vₖ₊₁‖² + μ̄ₖωₖ⟨vₖ,vₖ₊₁⟩ + μₖω̄ₖ⟨vₖ₊₁,vₖ⟩) @@ -370,7 +370,7 @@ kwargs_bilq = (:c, :transfer_to_bicg, :M, :N, :ldiv, :atol, :rtol, :itmax, :time # Compute BICG point # (xᶜ)ₖ ← (xᴸ)ₖ₋₁ + ζbarₖ * d̅ₖ if solved_cg - @kaxpy!(n, ζbarₖ, d̅, x) + kaxpy!(n, ζbarₖ, d̅, x) end # Termination status @@ -386,7 +386,7 @@ kwargs_bilq = (:c, :transfer_to_bicg, :M, :N, :ldiv, :atol, :rtol, :itmax, :time copyto!(solver.s, x) mulorldiv!(x, N, solver.s, ldiv) end - warm_start && @kaxpy!(n, one(FC), Δx, x) + warm_start && kaxpy!(n, one(FC), Δx, x) solver.warm_start = false # Update stats diff --git a/src/bilqr.jl b/src/bilqr.jl index 377b4f69e..35df0b98e 100644 --- a/src/bilqr.jl +++ b/src/bilqr.jl @@ -137,18 +137,18 @@ kwargs_bilqr = (:transfer_to_bicg, :atol, :rtol, :itmax, :timemax, :verbose, :hi if warm_start mul!(r₀, A, Δx) - @kaxpby!(n, one(FC), b, -one(FC), r₀) + kaxpby!(n, one(FC), b, -one(FC), r₀) mul!(s₀, Aᴴ, Δy) - @kaxpby!(n, one(FC), c, -one(FC), s₀) + kaxpby!(n, one(FC), c, -one(FC), s₀) end # Initial solution x₀ and residual norm ‖r₀‖ = ‖b - Ax₀‖. - @kfill!(x, zero(FC)) # x₀ - bNorm = @knrm2(n, r₀) # rNorm = ‖r₀‖ + kfill!(x, zero(FC)) # x₀ + bNorm = knorm(n, r₀) # rNorm = ‖r₀‖ # Initial solution t₀ and residual norm ‖s₀‖ = ‖c - Aᴴy₀‖. - @kfill!(t, zero(FC)) # t₀ - cNorm = @knrm2(n, s₀) # sNorm = ‖s₀‖ + kfill!(t, zero(FC)) # t₀ + cNorm = knorm(n, s₀) # sNorm = ‖s₀‖ iter = 0 itmax == 0 && (itmax = 2*n) @@ -161,7 +161,7 @@ kwargs_bilqr = (:transfer_to_bicg, :atol, :rtol, :itmax, :timemax, :verbose, :hi kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %.2fs\n", iter, bNorm, cNorm, ktimer(start_time)) # Initialize the Lanczos biorthogonalization process. - cᴴb = @kdot(n, s₀, r₀) # ⟨s₀,r₀⟩ = ⟨c - Aᴴy₀,b - Ax₀⟩ + cᴴb = kdot(n, s₀, r₀) # ⟨s₀,r₀⟩ = ⟨c - Aᴴy₀,b - Ax₀⟩ if cᴴb == 0 stats.niter = 0 stats.solved_primal = false @@ -175,21 +175,21 @@ kwargs_bilqr = (:transfer_to_bicg, :atol, :rtol, :itmax, :timemax, :verbose, :hi # Set up workspace. βₖ = √(abs(cᴴb)) # β₁γ₁ = (c - Aᴴy₀)ᴴ(b - Ax₀) γₖ = cᴴb / βₖ # β₁γ₁ = (c - Aᴴy₀)ᴴ(b - Ax₀) - @kfill!(vₖ₋₁, zero(FC)) # v₀ = 0 - @kfill!(uₖ₋₁, zero(FC)) # u₀ = 0 + kfill!(vₖ₋₁, zero(FC)) # v₀ = 0 + kfill!(uₖ₋₁, zero(FC)) # u₀ = 0 vₖ .= r₀ ./ βₖ # v₁ = (b - Ax₀) / β₁ uₖ .= s₀ ./ conj(γₖ) # u₁ = (c - Aᴴy₀) / γ̄₁ cₖ₋₁ = cₖ = -one(T) # Givens cosines used for the LQ factorization of Tₖ sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the LQ factorization of Tₖ - @kfill!(d̅, zero(FC)) # Last column of D̅ₖ = Vₖ(Qₖ)ᴴ + kfill!(d̅, zero(FC)) # Last column of D̅ₖ = Vₖ(Qₖ)ᴴ ζₖ₋₁ = ζbarₖ = zero(FC) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁ ζₖ₋₂ = ηₖ = zero(FC) # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ δbarₖ₋₁ = δbarₖ = zero(FC) # Coefficients of Lₖ₋₁ and L̅ₖ modified over the course of two iterations ψbarₖ₋₁ = ψₖ₋₁ = zero(FC) # ψₖ₋₁ and ψbarₖ are the last components of h̅ₖ = Qₖγ̄₁e₁ norm_vₖ = bNorm / βₖ # ‖vₖ‖ is used for residual norm estimates ϵₖ₋₃ = λₖ₋₂ = zero(FC) # Components of Lₖ₋₁ - @kfill!(wₖ₋₃, zero(FC)) # Column k-3 of Wₖ = Uₖ(Lₖ)⁻ᴴ - @kfill!(wₖ₋₂, zero(FC)) # Column k-2 of Wₖ = Uₖ(Lₖ)⁻ᴴ + kfill!(wₖ₋₃, zero(FC)) # Column k-3 of Wₖ = Uₖ(Lₖ)⁻ᴴ + kfill!(wₖ₋₂, zero(FC)) # Column k-2 of Wₖ = Uₖ(Lₖ)⁻ᴴ τₖ = zero(T) # τₖ is used for the dual residual norm estimate # Stopping criterion. @@ -216,17 +216,17 @@ kwargs_bilqr = (:transfer_to_bicg, :atol, :rtol, :itmax, :timemax, :verbose, :hi mul!(q, A , vₖ) # Forms vₖ₊₁ : q ← Avₖ mul!(p, Aᴴ, uₖ) # Forms uₖ₊₁ : p ← Aᴴuₖ - @kaxpy!(n, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁ - @kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - β̄ₖ * uₖ₋₁ + kaxpy!(n, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁ + kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - β̄ₖ * uₖ₋₁ - αₖ = @kdot(n, uₖ, q) # αₖ = ⟨uₖ,q⟩ + αₖ = kdot(n, uₖ, q) # αₖ = ⟨uₖ,q⟩ - @kaxpy!(n, - αₖ , vₖ, q) # q ← q - αₖ * vₖ - @kaxpy!(n, -conj(αₖ), uₖ, p) # p ← p - ᾱₖ * uₖ + kaxpy!(n, - αₖ , vₖ, q) # q ← q - αₖ * vₖ + kaxpy!(n, -conj(αₖ), uₖ, p) # p ← p - ᾱₖ * uₖ - pᴴq = @kdot(n, p, q) # pᴴq = ⟨p,q⟩ - βₖ₊₁ = √(abs(pᴴq)) # βₖ₊₁ = √(|pᴴq|) - γₖ₊₁ = pᴴq / βₖ₊₁ # γₖ₊₁ = pᴴq / βₖ₊₁ + pᴴq = kdot(n, p, q) # pᴴq = ⟨p,q⟩ + βₖ₊₁ = √(abs(pᴴq)) # βₖ₊₁ = √(|pᴴq|) + γₖ₊₁ = pᴴq / βₖ₊₁ # γₖ₊₁ = pᴴq / βₖ₊₁ # Update the LQ factorization of Tₖ = L̅ₖQₖ. # [ α₁ γ₂ 0 • • • 0 ] [ δ₁ 0 • • • • 0 ] @@ -288,22 +288,22 @@ kwargs_bilqr = (:transfer_to_bicg, :atol, :rtol, :itmax, :timemax, :verbose, :hi if iter ≥ 2 # Compute solution xₖ. # (xᴸ)ₖ ← (xᴸ)ₖ₋₁ + ζₖ₋₁ * dₖ₋₁ - @kaxpy!(n, ζₖ₋₁ * cₖ, d̅, x) - @kaxpy!(n, ζₖ₋₁ * sₖ, vₖ, x) + kaxpy!(n, ζₖ₋₁ * cₖ, d̅, x) + kaxpy!(n, ζₖ₋₁ * sₖ, vₖ, x) end # Compute d̅ₖ. if iter == 1 # d̅₁ = v₁ - @kcopy!(n, d̅, vₖ) # d̅ ← vₖ + kcopy!(n, d̅, vₖ) # d̅ ← vₖ else # d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * vₖ - @kaxpby!(n, -cₖ, vₖ, conj(sₖ), d̅) + kaxpby!(n, -cₖ, vₖ, conj(sₖ), d̅) end # Compute ⟨vₖ,vₖ₊₁⟩ and ‖vₖ₊₁‖ - vₖᴴvₖ₊₁ = @kdot(n, vₖ, q) / βₖ₊₁ - norm_vₖ₊₁ = @knrm2(n, q) / βₖ₊₁ + vₖᴴvₖ₊₁ = kdot(n, vₖ, q) / βₖ₊₁ + norm_vₖ₊₁ = knorm(n, q) / βₖ₊₁ # Compute BiLQ residual norm # ‖rₖ‖ = √(|μₖ|²‖vₖ‖² + |ωₖ|²‖vₖ₊₁‖² + μ̄ₖωₖ⟨vₖ,vₖ₊₁⟩ + μₖω̄ₖ⟨vₖ₊₁,vₖ⟩) @@ -353,41 +353,41 @@ kwargs_bilqr = (:transfer_to_bicg, :atol, :rtol, :itmax, :timemax, :verbose, :hi # w₁ = u₁ / δ̄₁ if iter == 2 wₖ₋₁ = wₖ₋₂ - @kaxpy!(n, one(FC), uₖ₋₁, wₖ₋₁) + kaxpy!(n, one(FC), uₖ₋₁, wₖ₋₁) wₖ₋₁ .= uₖ₋₁ ./ conj(δₖ₋₁) end # w₂ = (u₂ - λ̄₁w₁) / δ̄₂ if iter == 3 wₖ₋₁ = wₖ₋₃ - @kaxpy!(n, one(FC), uₖ₋₁, wₖ₋₁) - @kaxpy!(n, -conj(λₖ₋₂), wₖ₋₂, wₖ₋₁) + kaxpy!(n, one(FC), uₖ₋₁, wₖ₋₁) + kaxpy!(n, -conj(λₖ₋₂), wₖ₋₂, wₖ₋₁) wₖ₋₁ .= wₖ₋₁ ./ conj(δₖ₋₁) end # wₖ₋₁ = (uₖ₋₁ - λ̄ₖ₋₂wₖ₋₂ - ϵ̄ₖ₋₃wₖ₋₃) / δ̄ₖ₋₁ if iter ≥ 4 - @kscal!(n, -conj(ϵₖ₋₃), wₖ₋₃) + kscal!(n, -conj(ϵₖ₋₃), wₖ₋₃) wₖ₋₁ = wₖ₋₃ - @kaxpy!(n, one(FC), uₖ₋₁, wₖ₋₁) - @kaxpy!(n, -conj(λₖ₋₂), wₖ₋₂, wₖ₋₁) + kaxpy!(n, one(FC), uₖ₋₁, wₖ₋₁) + kaxpy!(n, -conj(λₖ₋₂), wₖ₋₂, wₖ₋₁) wₖ₋₁ .= wₖ₋₁ ./ conj(δₖ₋₁) end if iter ≥ 3 # Swap pointers. - @kswap(wₖ₋₃, wₖ₋₂) + @kswap!(wₖ₋₃, wₖ₋₂) end if iter ≥ 2 # Compute solution tₖ₋₁. # tₖ₋₁ ← tₖ₋₂ + ψₖ₋₁ * wₖ₋₁ - @kaxpy!(n, ψₖ₋₁, wₖ₋₁, t) + kaxpy!(n, ψₖ₋₁, wₖ₋₁, t) end # Update ψbarₖ₋₁ ψbarₖ₋₁ = ψbarₖ # Compute τₖ = τₖ₋₁ + ‖uₖ‖² - τₖ += @kdotr(n, uₖ, uₖ) + τₖ += kdotr(n, uₖ, uₖ) # Compute QMR residual norm ‖sₖ₋₁‖ ≤ |ψbarₖ| * √τₖ sNorm = abs(ψbarₖ) * √τₖ @@ -400,8 +400,8 @@ kwargs_bilqr = (:transfer_to_bicg, :atol, :rtol, :itmax, :timemax, :verbose, :hi end # Compute vₖ₊₁ and uₖ₊₁. - @kcopy!(n, vₖ₋₁, vₖ) # vₖ₋₁ ← vₖ - @kcopy!(n, uₖ₋₁, uₖ) # uₖ₋₁ ← uₖ + kcopy!(n, vₖ₋₁, vₖ) # vₖ₋₁ ← vₖ + kcopy!(n, uₖ₋₁, uₖ) # uₖ₋₁ ← uₖ if pᴴq ≠ zero(FC) vₖ .= q ./ βₖ₊₁ # βₖ₊₁vₖ₊₁ = q @@ -436,7 +436,7 @@ kwargs_bilqr = (:transfer_to_bicg, :atol, :rtol, :itmax, :timemax, :verbose, :hi # Compute BICG point # (xᶜ)ₖ ← (xᴸ)ₖ₋₁ + ζbarₖ * d̅ₖ if solved_cg - @kaxpy!(n, ζbarₖ, d̅, x) + kaxpy!(n, ζbarₖ, d̅, x) end # Termination status @@ -460,8 +460,8 @@ kwargs_bilqr = (:transfer_to_bicg, :atol, :rtol, :itmax, :timemax, :verbose, :hi overtimed && (status = "time limit exceeded") # Update x and y - warm_start && @kaxpy!(n, one(FC), Δx, x) - warm_start && @kaxpy!(n, one(FC), Δy, t) + warm_start && kaxpy!(n, one(FC), Δx, x) + warm_start && kaxpy!(n, one(FC), Δy, t) solver.warm_start = false # Update stats diff --git a/src/block_gmres.jl b/src/block_gmres.jl index 5b9c14f31..28851cc08 100644 --- a/src/block_gmres.jl +++ b/src/block_gmres.jl @@ -243,7 +243,7 @@ kwargs_block_gmres = (:M, :N, :ldiv, :restart, :reorthogonalization, :atol, :rto for i = 1 : inner_iter-1 D1 .= R[nr+i] D2 .= R[nr+i+1] - @kormqr!('L', trans, H[i], τ[i], D) + kormqr!('L', trans, H[i], τ[i], D) R[nr+i] .= D1 R[nr+i+1] .= D2 end @@ -256,7 +256,7 @@ kwargs_block_gmres = (:M, :N, :ldiv, :restart, :reorthogonalization, :atol, :rto # Update Zₖ = (Qₖ)ᴴΓE₁ = (Λ₁, ..., Λₖ, Λbarₖ₊₁) D1 .= Z[inner_iter] D2 .= zero(FC) - @kormqr!('L', trans, H[inner_iter], τ[inner_iter], D) + kormqr!('L', trans, H[inner_iter], τ[inner_iter], D) Z[inner_iter] .= D1 # Update residual norm estimate. diff --git a/src/block_krylov_utils.jl b/src/block_krylov_utils.jl index 35c03aa35..0429a82bc 100644 --- a/src/block_krylov_utils.jl +++ b/src/block_krylov_utils.jl @@ -20,16 +20,16 @@ end function gs!(Q::AbstractMatrix{FC}, R::AbstractMatrix{FC}, v::AbstractVector{FC}) where FC <: FloatOrComplex n, k = size(Q) aⱼ = v - @kfill!(R, zero(FC)) + kfill!(R, zero(FC)) for j = 1:k qⱼ = view(Q,:,j) aⱼ .= qⱼ for i = 1:j-1 qᵢ = view(Q,:,i) - R[i,j] = @kdot(n, qᵢ, aⱼ) # rᵢⱼ = ⟨qᵢ , aⱼ⟩ - @kaxpy!(n, -R[i,j], qᵢ, qⱼ) # qⱼ = qⱼ - rᵢⱼqᵢ + R[i,j] = kdot(n, qᵢ, aⱼ) # rᵢⱼ = ⟨qᵢ , aⱼ⟩ + kaxpy!(n, -R[i,j], qᵢ, qⱼ) # qⱼ = qⱼ - rᵢⱼqᵢ end - R[j,j] = @knrm2(n, qⱼ) # rⱼⱼ = ‖qⱼ‖ + R[j,j] = knorm(n, qⱼ) # rⱼⱼ = ‖qⱼ‖ qⱼ ./= R[j,j] # qⱼ = qⱼ / rⱼⱼ end return Q, R @@ -54,15 +54,15 @@ end function mgs!(Q::AbstractMatrix{FC}, R::AbstractMatrix{FC}) where FC <: FloatOrComplex n, k = size(Q) - @kfill!(R, zero(FC)) + kfill!(R, zero(FC)) for i = 1:k qᵢ = view(Q,:,i) - R[i,i] = @knrm2(n, qᵢ) # rᵢᵢ = ‖qᵢ‖ + R[i,i] = knorm(n, qᵢ) # rᵢᵢ = ‖qᵢ‖ qᵢ ./= R[i,i] # qᵢ = qᵢ / rᵢᵢ for j = i+1:k qⱼ = view(Q,:,j) - R[i,j] = @kdot(n, qᵢ, qⱼ) # rᵢⱼ = ⟨qᵢ , qⱼ⟩ - @kaxpy!(n, -R[i,j], qᵢ, qⱼ) # qⱼ = qⱼ - rᵢⱼqᵢ + R[i,j] = kdot(n, qᵢ, qⱼ) # rᵢⱼ = ⟨qᵢ , qⱼ⟩ + kaxpy!(n, -R[i,j], qᵢ, qⱼ) # qⱼ = qⱼ - rᵢⱼqᵢ end end return Q, R @@ -90,7 +90,7 @@ end function givens!(Q::AbstractMatrix{FC}, R::AbstractMatrix{FC}, C::AbstractVector{T}, S::AbstractVector{FC}) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} n, k = size(Q) - @kfill!(R, zero(FC)) + kfill!(R, zero(FC)) pos = 0 for j = 1:k for i = n-1:-1:j @@ -106,7 +106,7 @@ function givens!(Q::AbstractMatrix{FC}, R::AbstractMatrix{FC}, C::AbstractVector R[i,j] = Q[i,j] end end - @kfill!(Q, zero(FC)) + kfill!(Q, zero(FC)) for i = 1:k Q[i,i] = one(FC) end @@ -194,9 +194,9 @@ end function householder!(Q::AbstractMatrix{FC}, R::AbstractMatrix{FC}, τ::AbstractVector{FC}; compact::Bool=false) where FC <: FloatOrComplex n, k = size(Q) - @kfill!(R, zero(FC)) - @kgeqrf!(Q, τ) + kfill!(R, zero(FC)) + kgeqrf!(Q, τ) copy_triangle(Q, R, k) - !compact && @korgqr!(Q, τ) + !compact && korgqr!(Q, τ) return Q, R end diff --git a/src/car.jl b/src/car.jl index 464a6458c..37d02b03d 100644 --- a/src/car.jl +++ b/src/car.jl @@ -122,42 +122,42 @@ kwargs_car = (:M, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose, :history, :ca rNorms, ArNorms = stats.residuals, stats.Aresiduals reset!(stats) - @kfill!(x, zero(FC)) + kfill!(x, zero(FC)) if warm_start mul!(r, A, Δx) - @kaxpby!(n, one(FC), b, -one(FC), r) + kaxpby!(n, one(FC), b, -one(FC), r) else - @kcopy!(n, r, b) # r ← b + kcopy!(n, r, b) # r ← b end # p₀ = r₀ = M(b - Ax₀) if MisI - @kcopy!(n, p, r) # p ← r + kcopy!(n, p, r) # p ← r else mulorldiv!(p, M, r, ldiv) - @kcopy!(n, r, p) # r ← p + kcopy!(n, r, p) # r ← p end mul!(s, A, r) # s₀ = Ar₀ # q₀ = MAp₀ and s₀ = MAr₀ if MisI - @kcopy!(n, q, s) # q ← s + kcopy!(n, q, s) # q ← s else mulorldiv!(q, M, s, ldiv) - @kcopy!(n, s, q) # s ← q + kcopy!(n, s, q) # s ← q end - mul!(t, A, s) # t₀ = As₀ - @kcopy!(n, u, t) # u₀ = Aq₀ - ρ = @kdotr(n, t, s) # ρ₀ = ⟨t₀ , s₀⟩ + mul!(t, A, s) # t₀ = As₀ + kcopy!(n, u, t) # u₀ = Aq₀ + ρ = kdotr(n, t, s) # ρ₀ = ⟨t₀ , s₀⟩ # Compute ‖r₀‖ - rNorm = @knrm2(n, r) + rNorm = knorm(n, r) history && push!(rNorms, rNorm) # Compute ‖Ar₀‖ - ArNorm = MisI ? @knrm2(n, s) : sqrt(@kdotr(n, r, u)) + ArNorm = MisI ? knorm(n, s) : sqrt(kdotr(n, r, u)) history && push!(ArNorms, ArNorm) if rNorm == 0 @@ -185,13 +185,13 @@ kwargs_car = (:M, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose, :history, :ca while !(solved || tired || user_requested_exit || overtimed) MisI || mulorldiv!(Mu, M, u, ldiv) - α = ρ / @kdotr(n, u, Mu) # αₖ = ρₖ / ⟨uₖ, Muₖ⟩ - @kaxpy!(n, α, p, x) # xₖ₊₁ = xₖ + αₖ * pₖ - @kaxpy!(n, -α, q, r) # rₖ₊₁ = rₖ - αₖ * qₖ - @kaxpy!(n, -α, Mu, s) # sₖ₊₁ = sₖ - αₖ * Muₖ + α = ρ / kdotr(n, u, Mu) # αₖ = ρₖ / ⟨uₖ, Muₖ⟩ + kaxpy!(n, α, p, x) # xₖ₊₁ = xₖ + αₖ * pₖ + kaxpy!(n, -α, q, r) # rₖ₊₁ = rₖ - αₖ * qₖ + kaxpy!(n, -α, Mu, s) # sₖ₊₁ = sₖ - αₖ * Muₖ # Compute ‖rₖ‖ - rNorm = @knrm2(n, r) + rNorm = knorm(n, r) history && push!(rNorms, rNorm) # Stopping conditions that do not depend on user input. @@ -201,16 +201,16 @@ kwargs_car = (:M, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose, :history, :ca solved = resid_decrease_lim || resid_decrease_mach if !solved - mul!(t, A, s) # tₖ₊₁ = A * sₖ₊₁ - ρ_next = @kdotr(n, t, s) # ρₖ₊₁ = ⟨tₖ₊₁ , sₖ₊₁⟩ - β = ρ_next / ρ # βₖ = ρₖ₊₁ / ρₖ + mul!(t, A, s) # tₖ₊₁ = A * sₖ₊₁ + ρ_next = kdotr(n, t, s) # ρₖ₊₁ = ⟨tₖ₊₁ , sₖ₊₁⟩ + β = ρ_next / ρ # βₖ = ρₖ₊₁ / ρₖ ρ = ρ_next - @kaxpby!(n, one(FC), r, β, p) # pₖ₊₁ = rₖ₊₁ + βₖ * pₖ - @kaxpby!(n, one(FC), s, β, q) # qₖ₊₁ = sₖ₊₁ + βₖ * qₖ - @kaxpby!(n, one(FC), t, β, u) # uₖ₊₁ = tₖ₊₁ + βₖ * uₖ + kaxpby!(n, one(FC), r, β, p) # pₖ₊₁ = rₖ₊₁ + βₖ * pₖ + kaxpby!(n, one(FC), s, β, q) # qₖ₊₁ = sₖ₊₁ + βₖ * qₖ + kaxpby!(n, one(FC), t, β, u) # uₖ₊₁ = tₖ₊₁ + βₖ * uₖ # Compute ‖Arₖ‖ - ArNorm = MisI ? @knrm2(n, s) : sqrt(@kdotr(n, r, u)) + ArNorm = MisI ? knorm(n, s) : sqrt(kdotr(n, r, u)) history && push!(ArNorms, ArNorm) end @@ -231,7 +231,7 @@ kwargs_car = (:M, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose, :history, :ca overtimed && (status = "time limit exceeded") # Update x - warm_start && @kaxpy!(n, one(FC), Δx, x) + warm_start && kaxpy!(n, one(FC), Δx, x) solver.warm_start = false # Update stats diff --git a/src/cg.jl b/src/cg.jl index 769bb6c3c..f30dc40c9 100644 --- a/src/cg.jl +++ b/src/cg.jl @@ -133,16 +133,16 @@ kwargs_cg = (:M, :ldiv, :radius, :linesearch, :atol, :rtol, :itmax, :timemax, :v reset!(stats) z = MisI ? r : solver.z - @kfill!(x, zero(FC)) + kfill!(x, zero(FC)) if warm_start mul!(r, A, Δx) - @kaxpby!(n, one(FC), b, -one(FC), r) + kaxpby!(n, one(FC), b, -one(FC), r) else - @kcopy!(n, r, b) # r ← b + kcopy!(n, r, b) # r ← b end MisI || mulorldiv!(z, M, r, ldiv) - @kcopy!(n, p, z) # p ← z - γ = @kdotr(n, r, z) + kcopy!(n, p, z) # p ← z + γ = kdotr(n, r, z) rNorm = sqrt(γ) history && push!(rNorms, rNorm) if γ == 0 @@ -175,14 +175,14 @@ kwargs_cg = (:M, :ldiv, :radius, :linesearch, :atol, :rtol, :itmax, :timemax, :v while !(solved || tired || zero_curvature || user_requested_exit || overtimed) mul!(Ap, A, p) - pAp = @kdotr(n, p, Ap) + pAp = kdotr(n, p, Ap) if (pAp ≤ eps(T) * pNorm²) && (radius == 0) if abs(pAp) ≤ eps(T) * pNorm² zero_curvature = true inconsistent = !linesearch end if linesearch - iter == 0 && @kcopy!(n, x, b) # x ← b + iter == 0 && kcopy!(n, x, b) # x ← b solved = true end end @@ -209,10 +209,10 @@ kwargs_cg = (:M, :ldiv, :radius, :linesearch, :atol, :rtol, :itmax, :timemax, :v on_boundary = true end - @kaxpy!(n, α, p, x) - @kaxpy!(n, -α, Ap, r) + kaxpy!(n, α, p, x) + kaxpy!(n, -α, Ap, r) MisI || mulorldiv!(z, M, r, ldiv) - γ_next = @kdotr(n, r, z) + γ_next = kdotr(n, r, z) rNorm = sqrt(γ_next) history && push!(rNorms, rNorm) @@ -228,7 +228,7 @@ kwargs_cg = (:M, :ldiv, :radius, :linesearch, :atol, :rtol, :itmax, :timemax, :v β = γ_next / γ pNorm² = γ_next + β^2 * pNorm² γ = γ_next - @kaxpby!(n, one(FC), z, β, p) + kaxpby!(n, one(FC), z, β, p) end iter = iter + 1 @@ -250,7 +250,7 @@ kwargs_cg = (:M, :ldiv, :radius, :linesearch, :atol, :rtol, :itmax, :timemax, :v overtimed && (status = "time limit exceeded") # Update x - warm_start && @kaxpy!(n, one(FC), Δx, x) + warm_start && kaxpy!(n, one(FC), Δx, x) solver.warm_start = false # Update stats diff --git a/src/cg_lanczos.jl b/src/cg_lanczos.jl index 0582561b9..636726c68 100644 --- a/src/cg_lanczos.jl +++ b/src/cg_lanczos.jl @@ -130,15 +130,15 @@ kwargs_cg_lanczos = (:M, :ldiv, :check_curvature, :atol, :rtol, :itmax, :timemax v = MisI ? Mv : solver.v # Initial state. - @kfill!(x, zero(FC)) + kfill!(x, zero(FC)) if warm_start mul!(Mv, A, Δx) - @kaxpby!(n, one(FC), b, -one(FC), Mv) + kaxpby!(n, one(FC), b, -one(FC), Mv) else - @kcopy!(n, Mv, b) # Mv ← b + kcopy!(n, Mv, b) # Mv ← b end MisI || mulorldiv!(v, M, Mv, ldiv) # v₁ = M⁻¹r₀ - β = sqrt(@kdotr(n, v, Mv)) # β₁ = v₁ᴴ M v₁ + β = sqrt(kdotr(n, v, Mv)) # β₁ = v₁ᴴ M v₁ σ = β rNorm = σ history && push!(rNorms, rNorm) @@ -152,13 +152,13 @@ kwargs_cg_lanczos = (:M, :ldiv, :check_curvature, :atol, :rtol, :itmax, :timemax solver.warm_start = false return solver end - @kcopy!(n, p, v) # p ← v + kcopy!(n, p, v) # p ← v # Initialize Lanczos process. # β₁Mv₁ = b - @kscal!(n, one(FC) / β, v) # v₁ ← v₁ / β₁ - MisI || @kscal!(n, one(FC) / β, Mv) # Mv₁ ← Mv₁ / β₁ - @kcopy!(n, Mv_prev, Mv) # Mv_prev ← Mv + kscal!(n, one(FC) / β, v) # v₁ ← v₁ / β₁ + MisI || kscal!(n, one(FC) / β, Mv) # Mv₁ ← Mv₁ / β₁ + kcopy!(n, Mv_prev, Mv) # Mv_prev ← Mv iter = 0 itmax == 0 && (itmax = 2 * n) @@ -185,8 +185,8 @@ kwargs_cg_lanczos = (:M, :ldiv, :check_curvature, :atol, :rtol, :itmax, :timemax while ! (solved || tired || (check_curvature & indefinite) || user_requested_exit || overtimed) # Form next Lanczos vector. # βₖ₊₁Mvₖ₊₁ = Avₖ - δₖMvₖ - βₖMvₖ₋₁ - mul!(Mv_next, A, v) # Mvₖ₊₁ ← Avₖ - δ = @kdotr(n, v, Mv_next) # δₖ = vₖᴴ A vₖ + mul!(Mv_next, A, v) # Mvₖ₊₁ ← Avₖ + δ = kdotr(n, v, Mv_next) # δₖ = vₖᴴ A vₖ # Check curvature. Exit fast if requested. # It is possible to show that σₖ² (δₖ - ωₖ₋₁ / γₖ₋₁) = pₖᴴ A pₖ. @@ -194,25 +194,25 @@ kwargs_cg_lanczos = (:M, :ldiv, :check_curvature, :atol, :rtol, :itmax, :timemax indefinite |= (γ ≤ 0) (check_curvature & indefinite) && continue - @kaxpy!(n, -δ, Mv, Mv_next) # Mvₖ₊₁ ← Mvₖ₊₁ - δₖMvₖ + kaxpy!(n, -δ, Mv, Mv_next) # Mvₖ₊₁ ← Mvₖ₊₁ - δₖMvₖ if iter > 0 - @kaxpy!(n, -β, Mv_prev, Mv_next) # Mvₖ₊₁ ← Mvₖ₊₁ - βₖMvₖ₋₁ - @kcopy!(n, Mv_prev, Mv) # Mvₖ₋₁ ← Mvₖ + kaxpy!(n, -β, Mv_prev, Mv_next) # Mvₖ₊₁ ← Mvₖ₊₁ - βₖMvₖ₋₁ + kcopy!(n, Mv_prev, Mv) # Mvₖ₋₁ ← Mvₖ end - @kcopy!(n, Mv, Mv_next) # Mvₖ ← Mvₖ₊₁ - MisI || mulorldiv!(v, M, Mv, ldiv) # vₖ₊₁ = M⁻¹ * Mvₖ₊₁ - β = sqrt(@kdotr(n, v, Mv)) # βₖ₊₁ = vₖ₊₁ᴴ M vₖ₊₁ - @kscal!(n, one(FC) / β, v) # vₖ₊₁ ← vₖ₊₁ / βₖ₊₁ - MisI || @kscal!(n, one(FC) / β, Mv) # Mvₖ₊₁ ← Mvₖ₊₁ / βₖ₊₁ - Anorm2 += β_prev^2 + β^2 + δ^2 # Use ‖Tₖ₊₁‖₂ as increasing approximation of ‖A‖₂. + kcopy!(n, Mv, Mv_next) # Mvₖ ← Mvₖ₊₁ + MisI || mulorldiv!(v, M, Mv, ldiv) # vₖ₊₁ = M⁻¹ * Mvₖ₊₁ + β = sqrt(kdotr(n, v, Mv)) # βₖ₊₁ = vₖ₊₁ᴴ M vₖ₊₁ + kscal!(n, one(FC) / β, v) # vₖ₊₁ ← vₖ₊₁ / βₖ₊₁ + MisI || kscal!(n, one(FC) / β, Mv) # Mvₖ₊₁ ← Mvₖ₊₁ / βₖ₊₁ + Anorm2 += β_prev^2 + β^2 + δ^2 # Use ‖Tₖ₊₁‖₂ as increasing approximation of ‖A‖₂. β_prev = β # Compute next CG iterate. - @kaxpy!(n, γ, p, x) # xₖ₊₁ = xₖ + γₖ * pₖ + kaxpy!(n, γ, p, x) # xₖ₊₁ = xₖ + γₖ * pₖ ω = β * γ σ = -ω * σ # σₖ₊₁ = - βₖ₊₁ * γₖ * σₖ ω = ω * ω # ωₖ = (βₖ₊₁ * γₖ)² - @kaxpby!(n, σ, v, ω, p) # pₖ₊₁ = σₖ₊₁ * vₖ₊₁ + ωₖ * pₖ + kaxpby!(n, σ, v, ω, p) # pₖ₊₁ = σₖ₊₁ * vₖ₊₁ + ωₖ * pₖ rNorm = abs(σ) # ‖rₖ₊₁‖_M = |σₖ₊₁| because rₖ₊₁ = σₖ₊₁ * vₖ₊₁ and ‖vₖ₊₁‖_M = 1 history && push!(rNorms, rNorm) iter = iter + 1 @@ -239,7 +239,7 @@ kwargs_cg_lanczos = (:M, :ldiv, :check_curvature, :atol, :rtol, :itmax, :timemax overtimed && (status = "time limit exceeded") # Update x - warm_start && @kaxpy!(n, one(FC), Δx, x) + warm_start && kaxpy!(n, one(FC), Δx, x) solver.warm_start = false # Update stats. TODO: Estimate Acond. diff --git a/src/cg_lanczos_shift.jl b/src/cg_lanczos_shift.jl index 97adc860f..0e0056f95 100644 --- a/src/cg_lanczos_shift.jl +++ b/src/cg_lanczos_shift.jl @@ -129,12 +129,12 @@ kwargs_cg_lanczos_shift = (:M, :ldiv, :check_curvature, :atol, :rtol, :itmax, :t # Initial state. ## Distribute x similarly to shifts. for i = 1 : nshifts - @kfill!(x[i], zero(FC)) # x₀ + kfill!(x[i], zero(FC)) # x₀ end - @kcopy!(n, Mv, b) # Mv₁ ← b + kcopy!(n, Mv, b) # Mv₁ ← b MisI || mulorldiv!(v, M, Mv, ldiv) # v₁ = M⁻¹ * Mv₁ - β = sqrt(@kdotr(n, v, Mv)) # β₁ = v₁ᴴ M v₁ - @kfill!(rNorms, β) + β = sqrt(kdotr(n, v, Mv)) # β₁ = v₁ᴴ M v₁ + kfill!(rNorms, β) if history for i = 1 : nshifts push!(rNorms_history[i], rNorms[i]) @@ -142,7 +142,7 @@ kwargs_cg_lanczos_shift = (:M, :ldiv, :check_curvature, :atol, :rtol, :itmax, :t end # Keep track of shifted systems with negative curvature if required. - # We don't want to use @kfill! here because "indefinite" is a BitVector. + # We don't want to use kfill! here because "indefinite" is a BitVector. fill!(indefinite, false) if β == 0 @@ -155,21 +155,21 @@ kwargs_cg_lanczos_shift = (:M, :ldiv, :check_curvature, :atol, :rtol, :itmax, :t # Initialize each p to v. for i = 1 : nshifts - @kcopy!(n, p[i], v) # pᵢ ← v + kcopy!(n, p[i], v) # pᵢ ← v end # Initialize Lanczos process. # β₁Mv₁ = b - @kscal!(n, one(FC) / β, v) # v₁ ← v₁ / β₁ - MisI || @kscal!(n, one(FC) / β, Mv) # Mv₁ ← Mv₁ / β₁ - @kcopy!(n, Mv_prev, Mv) # Mv_prev ← Mv + kscal!(n, one(FC) / β, v) # v₁ ← v₁ / β₁ + MisI || kscal!(n, one(FC) / β, Mv) # Mv₁ ← Mv₁ / β₁ + kcopy!(n, Mv_prev, Mv) # Mv_prev ← Mv # Initialize some constants used in recursions below. ρ = one(T) - @kfill!(σ, β) - @kfill!(δhat, zero(T)) - @kfill!(ω, zero(T)) - @kfill!(γ, one(T)) + kfill!(σ, β) + kfill!(δhat, zero(T)) + kfill!(ω, zero(T)) + kfill!(γ, one(T)) # Define stopping tolerance. ε = atol + rtol * β @@ -196,22 +196,22 @@ kwargs_cg_lanczos_shift = (:M, :ldiv, :check_curvature, :atol, :rtol, :itmax, :t while ! (solved || tired || user_requested_exit || overtimed) # Form next Lanczos vector. # βₖ₊₁Mvₖ₊₁ = Avₖ - δₖMvₖ - βₖMvₖ₋₁ - mul!(Mv_next, A, v) # Mvₖ₊₁ ← Avₖ - δ = @kdotr(n, v, Mv_next) # δₖ = vₖᴴ A vₖ - @kaxpy!(n, -δ, Mv, Mv_next) # Mvₖ₊₁ ← Mvₖ₊₁ - δₖMvₖ + mul!(Mv_next, A, v) # Mvₖ₊₁ ← Avₖ + δ = kdotr(n, v, Mv_next) # δₖ = vₖᴴ A vₖ + kaxpy!(n, -δ, Mv, Mv_next) # Mvₖ₊₁ ← Mvₖ₊₁ - δₖMvₖ if iter > 0 - @kaxpy!(n, -β, Mv_prev, Mv_next) # Mvₖ₊₁ ← Mvₖ₊₁ - βₖMvₖ₋₁ - @kcopy!(n, Mv_prev, Mv) # Mvₖ₋₁ ← Mvₖ + kaxpy!(n, -β, Mv_prev, Mv_next) # Mvₖ₊₁ ← Mvₖ₊₁ - βₖMvₖ₋₁ + kcopy!(n, Mv_prev, Mv) # Mvₖ₋₁ ← Mvₖ end - @kcopy!(n, Mv, Mv_next) # Mvₖ ← Mvₖ₊₁ - MisI || mulorldiv!(v, M, Mv, ldiv) # vₖ₊₁ = M⁻¹ * Mvₖ₊₁ - β = sqrt(@kdotr(n, v, Mv)) # βₖ₊₁ = vₖ₊₁ᴴ M vₖ₊₁ - @kscal!(n, one(FC) / β, v) # vₖ₊₁ ← vₖ₊₁ / βₖ₊₁ - MisI || @kscal!(n, one(FC) / β, Mv) # Mvₖ₊₁ ← Mvₖ₊₁ / βₖ₊₁ + kcopy!(n, Mv, Mv_next) # Mvₖ ← Mvₖ₊₁ + MisI || mulorldiv!(v, M, Mv, ldiv) # vₖ₊₁ = M⁻¹ * Mvₖ₊₁ + β = sqrt(kdotr(n, v, Mv)) # βₖ₊₁ = vₖ₊₁ᴴ M vₖ₊₁ + kscal!(n, one(FC) / β, v) # vₖ₊₁ ← vₖ₊₁ / βₖ₊₁ + MisI || kscal!(n, one(FC) / β, Mv) # Mvₖ₊₁ ← Mvₖ₊₁ / βₖ₊₁ # Check curvature: vₖᴴ(A + sᵢI)vₖ = vₖᴴAvₖ + sᵢ‖vₖ‖² = δₖ + ρₖ * sᵢ with ρₖ = ‖vₖ‖². # It is possible to show that σₖ² (δₖ + ρₖ * sᵢ - ωₖ₋₁ / γₖ₋₁) = pₖᴴ (A + sᵢ I) pₖ. - MisI || (ρ = @kdotr(n, v, v)) + MisI || (ρ = kdotr(n, v, v)) for i = 1 : nshifts δhat[i] = δ + ρ * shifts[i] γ[i] = 1 / (δhat[i] - ω[i] / γ[i]) @@ -225,11 +225,11 @@ kwargs_cg_lanczos_shift = (:M, :ldiv, :check_curvature, :atol, :rtol, :itmax, :t for i = 1 : nshifts not_cv[i] = check_curvature ? !(converged[i] || indefinite[i]) : !converged[i] if not_cv[i] - @kaxpy!(n, γ[i], p[i], x[i]) + kaxpy!(n, γ[i], p[i], x[i]) ω[i] = β * γ[i] σ[i] *= -ω[i] ω[i] *= ω[i] - @kaxpby!(n, σ[i], v, ω[i], p[i]) + kaxpby!(n, σ[i], v, ω[i], p[i]) # Update list of systems that have not converged. rNorms[i] = abs(σ[i]) diff --git a/src/cgls.jl b/src/cgls.jl index ad8992d17..76c9d7ade 100644 --- a/src/cgls.jl +++ b/src/cgls.jl @@ -146,9 +146,9 @@ kwargs_cgls = (:M, :ldiv, :radius, :λ, :atol, :rtol, :itmax, :timemax, :verbose Mr = MisI ? r : solver.Mr Mq = MisI ? q : solver.Mr - @kfill!(x, zero(FC)) - @kcopy!(m, r, b) # r ← b - bNorm = @knrm2(m, r) # Marginally faster than norm(b) + kfill!(x, zero(FC)) + kcopy!(m, r, b) # r ← b + bNorm = knorm(m, r) # Marginally faster than norm(b) if bNorm == 0 stats.niter = 0 stats.solved, stats.inconsistent = true, false @@ -160,8 +160,8 @@ kwargs_cgls = (:M, :ldiv, :radius, :λ, :atol, :rtol, :itmax, :timemax, :verbose end MisI || mulorldiv!(Mr, M, r, ldiv) mul!(s, Aᴴ, Mr) - @kcopy!(n, p, s) # p ← s - γ = @kdotr(n, s, s) # γ = sᴴs + kcopy!(n, p, s) # p ← s + γ = kdotr(n, s, s) # γ = sᴴs iter = 0 itmax == 0 && (itmax = m + n) @@ -183,8 +183,8 @@ kwargs_cgls = (:M, :ldiv, :radius, :λ, :atol, :rtol, :itmax, :timemax, :verbose while ! (solved || tired || user_requested_exit || overtimed) mul!(q, A, p) MisI || mulorldiv!(Mq, M, q, ldiv) - δ = @kdotr(m, q, Mq) # δ = qᴴMq - λ > 0 && (δ += λ * @kdotr(n, p, p)) # δ = δ + pᴴp + δ = kdotr(m, q, Mq) # δ = qᴴMq + λ > 0 && (δ += λ * kdotr(n, p, p)) # δ = δ + pᴴp α = γ / δ # if a trust-region constraint is give, compute step to the boundary @@ -194,16 +194,16 @@ kwargs_cgls = (:M, :ldiv, :radius, :λ, :atol, :rtol, :itmax, :timemax, :verbose on_boundary = true end - @kaxpy!(n, α, p, x) # Faster than x = x + α * p - @kaxpy!(m, -α, q, r) # Faster than r = r - α * q + kaxpy!(n, α, p, x) # Faster than x = x + α * p + kaxpy!(m, -α, q, r) # Faster than r = r - α * q MisI || mulorldiv!(Mr, M, r, ldiv) mul!(s, Aᴴ, Mr) - λ > 0 && @kaxpy!(n, -λ, x, s) # s = A' * r - λ * x - γ_next = @kdotr(n, s, s) # γ_next = sᴴs + λ > 0 && kaxpy!(n, -λ, x, s) # s = A' * r - λ * x + γ_next = kdotr(n, s, s) # γ_next = sᴴs β = γ_next / γ - @kaxpby!(n, one(FC), s, β, p) # p = s + βp + kaxpby!(n, one(FC), s, β, p) # p = s + βp γ = γ_next - rNorm = @knrm2(m, r) # Marginally faster than norm(r) + rNorm = knorm(m, r) # Marginally faster than norm(r) ArNorm = sqrt(γ) history && push!(rNorms, rNorm) history && push!(ArNorms, ArNorm) diff --git a/src/cgls_lanczos_shift.jl b/src/cgls_lanczos_shift.jl index f3e751b74..14caba17d 100644 --- a/src/cgls_lanczos_shift.jl +++ b/src/cgls_lanczos_shift.jl @@ -134,14 +134,14 @@ kwargs_cgls_lanczos_shift = (:M, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose # Initial state. ## Distribute x similarly to shifts. for i = 1 : nshifts - @kfill!(x[i], zero(FC)) # x₀ + kfill!(x[i], zero(FC)) # x₀ end - @kcopy!(m, u, b) # u ← b - @kfill!(u_prev, zero(FC)) - mul!(v, Aᴴ, u) # v₁ ← Aᴴ * b - β = sqrt(@kdotr(n, v, v)) # β₁ = v₁ᵀ M v₁ - @kfill!(rNorms, β) + kcopy!(m, u, b) # u ← b + kfill!(u_prev, zero(FC)) + mul!(v, Aᴴ, u) # v₁ ← Aᴴ * b + β = sqrt(kdotr(n, v, v)) # β₁ = v₁ᵀ M v₁ + kfill!(rNorms, β) if history for i = 1 : nshifts push!(rNorms_history[i], rNorms[i]) @@ -158,20 +158,20 @@ kwargs_cgls_lanczos_shift = (:M, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose # Initialize each p to v. for i = 1 : nshifts - @kcopy!(n, p[i], v) # pᵢ ← v + kcopy!(n, p[i], v) # pᵢ ← v end # Initialize Lanczos process. # β₁v₁ = b - @kscal!(n, one(FC) / β, v) # v₁ ← v₁ / β₁ - @kscal!(m, one(FC) / β, u) + kscal!(n, one(FC) / β, v) # v₁ ← v₁ / β₁ + kscal!(m, one(FC) / β, u) # Initialize some constants used in recursions below. ρ = one(T) - @kfill!(σ, β) - @kfill!(δhat, zero(T)) - @kfill!(ω, zero(T)) - @kfill!(γ, one(T)) + kfill!(σ, β) + kfill!(δhat, zero(T)) + kfill!(ω, zero(T)) + kfill!(γ, one(T)) # Define stopping tolerance. ε = atol + rtol * β @@ -198,18 +198,18 @@ kwargs_cgls_lanczos_shift = (:M, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose while ! (solved || tired || user_requested_exit || overtimed) # Form next Lanczos vector. - mul!(utilde, A, v) # utildeₖ ← Avₖ - δ = @kdotr(m, utilde, utilde) # δₖ = vₖᵀAᴴAvₖ - @kaxpy!(m, -δ, u, utilde) # uₖ₊₁ = utildeₖ - δₖuₖ - βₖuₖ₋₁ - @kaxpy!(m, -β, u_prev, utilde) - mul!(v, Aᴴ, utilde) # vₖ₊₁ = Aᴴuₖ₊₁ - β = sqrt(@kdotr(n, v, v)) # βₖ₊₁ = vₖ₊₁ᵀ M vₖ₊₁ - @kscal!(n, one(FC) / β, v) # vₖ₊₁ ← vₖ₊₁ / βₖ₊₁ - @kscal!(m, one(FC) / β, utilde) # uₖ₊₁ = uₖ₊₁ / βₖ₊₁ - @kcopy!(m, u_prev, u) # u_prev ← u - @kcopy!(m, u, utilde) # u ← utilde - - MisI || (ρ = @kdotr(n, v, v)) + mul!(utilde, A, v) # utildeₖ ← Avₖ + δ = kdotr(m, utilde, utilde) # δₖ = vₖᵀAᴴAvₖ + kaxpy!(m, -δ, u, utilde) # uₖ₊₁ = utildeₖ - δₖuₖ - βₖuₖ₋₁ + kaxpy!(m, -β, u_prev, utilde) + mul!(v, Aᴴ, utilde) # vₖ₊₁ = Aᴴuₖ₊₁ + β = sqrt(kdotr(n, v, v)) # βₖ₊₁ = vₖ₊₁ᵀ M vₖ₊₁ + kscal!(n, one(FC) / β, v) # vₖ₊₁ ← vₖ₊₁ / βₖ₊₁ + kscal!(m, one(FC) / β, utilde) # uₖ₊₁ = uₖ₊₁ / βₖ₊₁ + kcopy!(m, u_prev, u) # u_prev ← u + kcopy!(m, u, utilde) # u ← utilde + + MisI || (ρ = kdotr(n, v, v)) for i = 1 : nshifts δhat[i] = δ + ρ * shifts[i] γ[i] = 1 / (δhat[i] - ω[i] / γ[i]) @@ -219,11 +219,11 @@ kwargs_cgls_lanczos_shift = (:M, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose for i = 1 : nshifts not_cv[i] = !converged[i] if not_cv[i] - @kaxpy!(n, γ[i], p[i], x[i]) + kaxpy!(n, γ[i], p[i], x[i]) ω[i] = β * γ[i] σ[i] *= -ω[i] ω[i] *= ω[i] - @kaxpby!(n, σ[i], v, ω[i], p[i]) + kaxpby!(n, σ[i], v, ω[i], p[i]) # Update list of systems that have not converged. rNorms[i] = abs(σ[i]) diff --git a/src/cgne.jl b/src/cgne.jl index 906f51c01..357691c5f 100644 --- a/src/cgne.jl +++ b/src/cgne.jl @@ -151,10 +151,10 @@ kwargs_cgne = (:N, :ldiv, :λ, :atol, :rtol, :itmax, :timemax, :verbose, :histor reset!(stats) z = NisI ? r : solver.z - @kfill!(x, zero(FC)) - @kcopy!(m, r, b) # r ← b + kfill!(x, zero(FC)) + kcopy!(m, r, b) # r ← b NisI || mulorldiv!(z, N, r, ldiv) - rNorm = @knrm2(m, r) # Marginally faster than norm(r) + rNorm = knorm(m, r) # Marginally faster than norm(r) history && push!(rNorms, rNorm) if rNorm == 0 stats.niter = 0 @@ -163,7 +163,7 @@ kwargs_cgne = (:N, :ldiv, :λ, :atol, :rtol, :itmax, :timemax, :verbose, :histor stats.status = "x = 0 is a zero-residual solution" return solver end - λ > 0 && @kcopy!(m, s, r) # s ← r + λ > 0 && kcopy!(m, s, r) # s ← r mul!(p, Aᴴ, z) # Use ‖p‖ to detect inconsistent system. @@ -171,9 +171,9 @@ kwargs_cgne = (:N, :ldiv, :λ, :atol, :rtol, :itmax, :timemax, :verbose, :histor # Because CGNE is equivalent to CG applied to AAᴴy = b, there will be a # conjugate direction u such that uᴴAAᴴu = 0, i.e., Aᴴu = 0. In this # implementation, p is a substitute for Aᴴu. - pNorm = @knrm2(n, p) + pNorm = knorm(n, p) - γ = @kdotr(m, r, z) # Faster than γ = dot(r, z) + γ = kdotr(m, r, z) # Faster than γ = dot(r, z) iter = 0 itmax == 0 && (itmax = m + n) @@ -191,20 +191,20 @@ kwargs_cgne = (:N, :ldiv, :λ, :atol, :rtol, :itmax, :timemax, :verbose, :histor while ! (solved || inconsistent || tired || user_requested_exit || overtimed) mul!(q, A, p) - λ > 0 && @kaxpy!(m, λ, s, q) - δ = @kdotr(n, p, p) # Faster than dot(p, p) - λ > 0 && (δ += λ * @kdotr(m, s, s)) + λ > 0 && kaxpy!(m, λ, s, q) + δ = kdotr(n, p, p) # Faster than dot(p, p) + λ > 0 && (δ += λ * kdotr(m, s, s)) α = γ / δ - @kaxpy!(n, α, p, x) # Faster than x = x + α * p - @kaxpy!(m, -α, q, r) # Faster than r = r - α * q + kaxpy!(n, α, p, x) # Faster than x = x + α * p + kaxpy!(m, -α, q, r) # Faster than r = r - α * q NisI || mulorldiv!(z, N, r, ldiv) - γ_next = @kdotr(m, r, z) # Faster than γ_next = dot(r, z) + γ_next = kdotr(m, r, z) # Faster than γ_next = dot(r, z) β = γ_next / γ mul!(Aᴴz, Aᴴ, z) - @kaxpby!(n, one(FC), Aᴴz, β, p) # Faster than p = Aᴴz + β * p - pNorm = @knrm2(n, p) + kaxpby!(n, one(FC), Aᴴz, β, p) # Faster than p = Aᴴz + β * p + pNorm = knorm(n, p) if λ > 0 - @kaxpby!(m, one(FC), r, β, s) # s = r + β * s + kaxpby!(m, one(FC), r, β, s) # s = r + β * s end γ = γ_next rNorm = sqrt(γ_next) diff --git a/src/cgs.jl b/src/cgs.jl index fbe03ceeb..e93d46928 100644 --- a/src/cgs.jl +++ b/src/cgs.jl @@ -151,16 +151,16 @@ kwargs_cgs = (:c, :M, :N, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose, :hist if warm_start mul!(r₀, A, Δx) - @kaxpby!(n, one(FC), b, -one(FC), r₀) + kaxpby!(n, one(FC), b, -one(FC), r₀) else - @kcopy!(n, r₀, b) # r₀ ← b + kcopy!(n, r₀, b) # r₀ ← b end - @kfill!(x, zero(FC)) # x₀ + kfill!(x, zero(FC)) # x₀ MisI || mulorldiv!(r, M, r₀, ldiv) # r₀ # Compute residual norm ‖r₀‖₂. - rNorm = @knrm2(n, r) + rNorm = knorm(n, r) history && push!(rNorms, rNorm) if rNorm == 0 stats.niter = 0 @@ -172,7 +172,7 @@ kwargs_cgs = (:c, :M, :N, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose, :hist end # Compute ρ₀ = ⟨ r̅₀,r₀ ⟩ - ρ = @kdot(n, c, r) + ρ = kdot(n, c, r) if ρ == 0 stats.niter = 0 stats.solved, stats.inconsistent = false, false @@ -189,9 +189,9 @@ kwargs_cgs = (:c, :M, :N, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose, :hist (verbose > 0) && @printf(iostream, "%5s %7s %5s\n", "k", "‖rₖ‖", "timer") kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %.2fs\n", iter, rNorm, ktimer(start_time)) - @kcopy!(n, u, r) # u₀ - @kcopy!(n, p, r) # p₀ - @kfill!(q, zero(FC)) # q₋₁ + kcopy!(n, u, r) # u₀ + kcopy!(n, p, r) # p₀ + kfill!(q, zero(FC)) # q₋₁ # Stopping criterion. solved = rNorm ≤ ε @@ -206,22 +206,22 @@ kwargs_cgs = (:c, :M, :N, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose, :hist NisI || mulorldiv!(y, N, p, ldiv) # yₖ = N⁻¹pₖ mul!(t, A, y) # tₖ = Ayₖ MisI || mulorldiv!(v, M, t, ldiv) # vₖ = M⁻¹tₖ - σ = @kdot(n, c, v) # σₖ = ⟨ r̅₀,M⁻¹AN⁻¹pₖ ⟩ + σ = kdot(n, c, v) # σₖ = ⟨ r̅₀,M⁻¹AN⁻¹pₖ ⟩ α = ρ / σ # αₖ = ρₖ / σₖ - @kcopy!(n, q, u) # qₖ = uₖ - @kaxpy!(n, -α, v, q) # qₖ = qₖ - αₖ * M⁻¹AN⁻¹pₖ - @kaxpy!(n, one(FC), q, u) # uₖ₊½ = uₖ + qₖ + kcopy!(n, q, u) # qₖ = uₖ + kaxpy!(n, -α, v, q) # qₖ = qₖ - αₖ * M⁻¹AN⁻¹pₖ + kaxpy!(n, one(FC), q, u) # uₖ₊½ = uₖ + qₖ NisI || mulorldiv!(z, N, u, ldiv) # zₖ = N⁻¹uₖ₊½ - @kaxpy!(n, α, z, x) # xₖ₊₁ = xₖ + αₖ * N⁻¹(uₖ + qₖ) + kaxpy!(n, α, z, x) # xₖ₊₁ = xₖ + αₖ * N⁻¹(uₖ + qₖ) mul!(s, A, z) # sₖ = Azₖ MisI || mulorldiv!(w, M, s, ldiv) # wₖ = M⁻¹sₖ - @kaxpy!(n, -α, w, r) # rₖ₊₁ = rₖ - αₖ * M⁻¹AN⁻¹(uₖ + qₖ) - ρ_next = @kdot(n, c, r) # ρₖ₊₁ = ⟨ r̅₀,rₖ₊₁ ⟩ + kaxpy!(n, -α, w, r) # rₖ₊₁ = rₖ - αₖ * M⁻¹AN⁻¹(uₖ + qₖ) + ρ_next = kdot(n, c, r) # ρₖ₊₁ = ⟨ r̅₀,rₖ₊₁ ⟩ β = ρ_next / ρ # βₖ = ρₖ₊₁ / ρₖ - @kcopy!(n, u, r) # uₖ₊₁ = rₖ₊₁ - @kaxpy!(n, β, q, u) # uₖ₊₁ = uₖ₊₁ + βₖ * qₖ - @kaxpby!(n, one(FC), q, β, p) # pₐᵤₓ = qₖ + βₖ * pₖ - @kaxpby!(n, one(FC), u, β, p) # pₖ₊₁ = uₖ₊₁ + βₖ * pₐᵤₓ + kcopy!(n, u, r) # uₖ₊₁ = rₖ₊₁ + kaxpy!(n, β, q, u) # uₖ₊₁ = uₖ₊₁ + βₖ * qₖ + kaxpby!(n, one(FC), q, β, p) # pₐᵤₓ = qₖ + βₖ * pₖ + kaxpby!(n, one(FC), u, β, p) # pₖ₊₁ = uₖ₊₁ + βₖ * pₐᵤₓ # Update ρ. ρ = ρ_next # ρₖ ← ρₖ₊₁ @@ -230,7 +230,7 @@ kwargs_cgs = (:c, :M, :N, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose, :hist iter = iter + 1 # Compute residual norm ‖rₖ‖₂. - rNorm = @knrm2(n, r) + rNorm = knorm(n, r) history && push!(rNorms, rNorm) # Stopping conditions that do not depend on user input. @@ -257,7 +257,7 @@ kwargs_cgs = (:c, :M, :N, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose, :hist overtimed && (status = "time limit exceeded") # Update x - warm_start && @kaxpy!(n, one(FC), Δx, x) + warm_start && kaxpy!(n, one(FC), Δx, x) solver.warm_start = false # Update stats diff --git a/src/cr.jl b/src/cr.jl index 14dc46f44..0c1ef450b 100644 --- a/src/cr.jl +++ b/src/cr.jl @@ -141,19 +141,19 @@ kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timema Mq = MisI ? q : solver.Mq # Initial state. - @kfill!(x, zero(FC)) + kfill!(x, zero(FC)) if warm_start mul!(p, A, Δx) - @kaxpby!(n, one(FC), b, -one(FC), p) + kaxpby!(n, one(FC), b, -one(FC), p) else - @kcopy!(n, p, b) # p ← b + kcopy!(n, p, b) # p ← b end - MisI && @kcopy!(n, r, p) # r ← p + MisI && kcopy!(n, r, p) # r ← p MisI || mulorldiv!(r, M, p, ldiv) mul!(Ar, A, r) - ρ = @kdotr(n, r, Ar) + ρ = kdotr(n, r, Ar) - rNorm = sqrt(@kdotr(n, r, p)) # ‖r‖ + rNorm = sqrt(kdotr(n, r, p)) # ‖r‖ history && push!(rNorms, rNorm) # Values of ‖r‖ if ρ == 0 @@ -165,9 +165,9 @@ kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timema solver.warm_start = false return solver end - @kcopy!(n, p, r) # p ← r - @kcopy!(n, q, Ar) # q ← Ar - (verbose > 0) && (m = zero(T)) # quadratic model + kcopy!(n, p, r) # p ← r + kcopy!(n, q, Ar) # q ← Ar + (verbose > 0) && (m = zero(T)) # quadratic model iter = 0 itmax == 0 && (itmax = 2 * n) @@ -180,7 +180,7 @@ kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timema pAp = ρ abspAp = abs(pAp) xNorm = zero(T) - ArNorm = @knrm2(n, Ar) # ‖Ar‖ + ArNorm = knorm(n, Ar) # ‖Ar‖ history && push!(ArNorms, ArNorm) ε = atol + rtol * rNorm (verbose > 0) && @printf(iostream, "%5s %8s %8s %8s %5s\n", "k", "‖x‖", "‖r‖", "quad", "timer") @@ -221,12 +221,12 @@ kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timema tr = maximum(to_boundary(n, x, r, Mq, radius; flip = false, xNorm2 = xNorm², dNorm2 = rNorm²)) (verbose > 0) && @printf(iostream, "t1 = %8.1e, t2 = %8.1e and tr = %8.1e\n", t1, t2, tr) - if abspAp ≤ γ * pNorm * @knrm2(n, q) # pᴴAp ≃ 0 - npcurv = true # nonpositive curvature + if abspAp ≤ γ * pNorm * knorm(n, q) # pᴴAp ≃ 0 + npcurv = true # nonpositive curvature (verbose > 0) && @printf(iostream, "pᴴAp = %8.1e ≃ 0\n", pAp) - if abspr ≤ γ * pNorm * rNorm # pᴴr ≃ 0 + if abspr ≤ γ * pNorm * rNorm # pᴴr ≃ 0 (verbose > 0) && @printf(iostream, "pᴴr = %8.1e ≃ 0, redefining p := r\n", pr) - p = r # - ∇q(x) + p = r # - ∇q(x) q = Ar # q(x + αr) = q(x) - α ‖r‖² + ½ α² rᴴAr # 1) if rᴴAr > 0, the quadratic decreases from α = 0 to α = ‖r‖² / rᴴAr @@ -245,7 +245,7 @@ kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timema α = descent ? t1 : t2 ρ > 0 && (tr = min(tr, rNorm² / ρ)) Δ = -α * pr + tr * rNorm² - (tr)^2 * ρ / 2 # as pᴴAp = 0 - if Δ > 0 # direction r engenders a better decrease + if Δ > 0 # direction r engenders a better decrease (verbose > 0) && @printf(iostream, "direction r engenders a bigger decrease. q_p - q_r = %8.1e > 0\n", Δ) (verbose > 0) && @printf(iostream, "redefining p := r\n") p = r @@ -256,9 +256,9 @@ kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timema end end - elseif pAp > 0 && ρ > 0 # no negative curvature + elseif pAp > 0 && ρ > 0 # no negative curvature (verbose > 0) && @printf(iostream, "positive curvatures along p and r. pᴴAp = %8.1e and rᴴAr = %8.1e\n", pAp, ρ) - α = ρ / @kdotr(n, q, Mq) + α = ρ / kdotr(n, q, Mq) if α ≥ t1 α = t1 on_boundary = true @@ -313,15 +313,15 @@ kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timema end elseif radius == 0 - α = ρ / @kdotr(n, q, Mq) # step + α = ρ / kdotr(n, q, Mq) # step end - @kaxpy!(n, α, p, x) - xNorm = @knrm2(n, x) + kaxpy!(n, α, p, x) + xNorm = knorm(n, x) xNorm ≈ radius && (on_boundary = true) - @kaxpy!(n, -α, Mq, r) # residual + kaxpy!(n, -α, Mq, r) # residual if MisI - rNorm² = @kdotr(n, r, r) + rNorm² = kdotr(n, r, r) rNorm = sqrt(rNorm²) else ω = sqrt(α) * sqrt(ρ) @@ -330,7 +330,7 @@ kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timema end history && push!(rNorms, rNorm) mul!(Ar, A, r) - ArNorm = @knrm2(n, Ar) + ArNorm = knorm(n, Ar) history && push!(ArNorms, ArNorm) iter = iter + 1 @@ -353,10 +353,10 @@ kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timema (solved || tired || user_requested_exit || overtimed) && continue ρbar = ρ - ρ = @kdotr(n, r, Ar) + ρ = kdotr(n, r, Ar) β = ρ / ρbar # step for the direction computation - @kaxpby!(n, one(FC), r, β, p) - @kaxpby!(n, one(FC), Ar, β, q) + kaxpby!(n, one(FC), r, β, p) + kaxpby!(n, one(FC), Ar, β, q) pNorm² = rNorm² + 2 * β * pr - 2 * β * α * pAp + β^2 * pNorm² if pNorm² > sqrt(eps(T)) @@ -372,9 +372,9 @@ kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timema solver.warm_start = false return solver end - pr = rNorm² + β * pr - β * α * pAp # pᴴr + pr = rNorm² + β * pr - β * α * pAp # pᴴr abspr = abs(pr) - pAp = ρ + β^2 * pAp # pᴴq + pAp = ρ + β^2 * pAp # pᴴq abspAp = abs(pAp) descent = pr > 0 @@ -390,7 +390,7 @@ kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timema overtimed && (status = "time limit exceeded") # Update x - warm_start && @kaxpy!(n, one(FC), Δx, x) + warm_start && kaxpy!(n, one(FC), Δx, x) solver.warm_start = false # Update stats diff --git a/src/craig.jl b/src/craig.jl index 1311ad35e..de865f1c5 100644 --- a/src/craig.jl +++ b/src/craig.jl @@ -199,12 +199,12 @@ kwargs_craig = (:M, :N, :ldiv, :transfer_to_lsqr, :sqd, :λ, :btol, :conlim, :at u = MisI ? Mu : solver.u v = NisI ? Nv : solver.v - @kfill!(x, zero(FC)) - @kfill!(y, zero(FC)) + kfill!(x, zero(FC)) + kfill!(y, zero(FC)) - @kcopy!(m, Mu, b) # Mu ← b + kcopy!(m, Mu, b) # Mu ← b MisI || mulorldiv!(u, M, Mu, ldiv) - β₁ = sqrt(@kdotr(m, u, Mu)) + β₁ = sqrt(kdotr(m, u, Mu)) rNorm = β₁ history && push!(rNorms, rNorm) if β₁ == 0 @@ -223,13 +223,13 @@ kwargs_craig = (:M, :N, :ldiv, :transfer_to_lsqr, :sqd, :λ, :btol, :conlim, :at # Initialize Golub-Kahan process. # β₁Mu₁ = b. - @kscal!(m, one(FC) / β₁, u) - MisI || @kscal!(m, one(FC) / β₁, Mu) + kscal!(m, one(FC) / β₁, u) + MisI || kscal!(m, one(FC) / β₁, Mu) - @kfill!(Nv, zero(FC)) - @kfill!(w, zero(FC)) # Used to update y. + kfill!(Nv, zero(FC)) + kfill!(w, zero(FC)) # Used to update y. - λ > 0 && @kfill!(w2, zero(FC)) + λ > 0 && kfill!(w2, zero(FC)) Anorm² = zero(T) # Estimate of ‖A‖²_F. Anorm = zero(T) @@ -268,15 +268,15 @@ kwargs_craig = (:M, :N, :ldiv, :transfer_to_lsqr, :sqd, :λ, :btol, :conlim, :at # Generate the next Golub-Kahan vectors # 1. αₖ₊₁Nvₖ₊₁ = Aᴴuₖ₊₁ - βₖ₊₁Nvₖ mul!(Aᴴu, Aᴴ, u) - @kaxpby!(n, one(FC), Aᴴu, -β, Nv) + kaxpby!(n, one(FC), Aᴴu, -β, Nv) NisI || mulorldiv!(v, N, Nv, ldiv) - α = sqrt(@kdotr(n, v, Nv)) + α = sqrt(kdotr(n, v, Nv)) if α == 0 inconsistent = true continue end - @kscal!(n, one(FC) / α, v) - NisI || @kscal!(n, one(FC) / α, Nv) + kscal!(n, one(FC) / α, v) + NisI || kscal!(n, one(FC) / α, Nv) Anorm² += α * α + λ * λ @@ -296,27 +296,27 @@ kwargs_craig = (:M, :N, :ldiv, :transfer_to_lsqr, :sqd, :λ, :btol, :conlim, :at # w1 = c₁ * v + s₁ * w2 # w2 = s₁ * v - c₁ * w2 # x = x + ξ * w1 - @kaxpy!(n, ξ * c₁, v, x) - @kaxpy!(n, ξ * s₁, w2, x) - @kaxpby!(n, s₁, v, -c₁, w2) + kaxpy!(n, ξ * c₁, v, x) + kaxpy!(n, ξ * s₁, w2, x) + kaxpby!(n, s₁, v, -c₁, w2) else - @kaxpy!(n, ξ, v, x) # x = x + ξ * v + kaxpy!(n, ξ, v, x) # x = x + ξ * v end # Recur y. - @kaxpby!(m, one(FC), u, -θ/ρ_prev, w) # w = u - θ/ρ_prev * w - @kaxpy!(m, ξ/ρ, w, y) # y = y + ξ/ρ * w + kaxpby!(m, one(FC), u, -θ/ρ_prev, w) # w = u - θ/ρ_prev * w + kaxpy!(m, ξ/ρ, w, y) # y = y + ξ/ρ * w - Dnorm² += @knrm2(m, w) + Dnorm² += knorm(m, w) # 2. βₖ₊₁Muₖ₊₁ = Avₖ - αₖMuₖ mul!(Av, A, v) - @kaxpby!(m, one(FC), Av, -α, Mu) + kaxpby!(m, one(FC), Av, -α, Mu) MisI || mulorldiv!(u, M, Mu, ldiv) - β = sqrt(@kdotr(m, u, Mu)) + β = sqrt(kdotr(m, u, Mu)) if β ≠ 0 - @kscal!(m, one(FC) / β, u) - MisI || @kscal!(m, one(FC) / β, Mu) + kscal!(m, one(FC) / β, u) + MisI || kscal!(m, one(FC) / β, Mu) end # Finish updates from the first Givens rotation. @@ -333,7 +333,7 @@ kwargs_craig = (:M, :N, :ldiv, :transfer_to_lsqr, :sqd, :λ, :btol, :conlim, :at # k+1 [ γ λ ] [ -c₂ s₂ ] = [ 0 δ ] # k+2 [ 0 0 ] [ s₂ c₂ ] [ 0 0 ] c₂, s₂, δ = sym_givens(λ, γ) - @kscal!(n, s₂, w2) + kscal!(n, s₂, w2) end Anorm² += β * β @@ -373,7 +373,7 @@ kwargs_craig = (:M, :N, :ldiv, :transfer_to_lsqr, :sqd, :λ, :btol, :conlim, :at # transfer to LSQR point if requested if λ > 0 && transfer_to_lsqr ξ *= -θ / δ - @kaxpy!(n, ξ, w2, x) + kaxpy!(n, ξ, w2, x) # TODO: update y end diff --git a/src/craigmr.jl b/src/craigmr.jl index 341ec4420..0e87622b7 100644 --- a/src/craigmr.jl +++ b/src/craigmr.jl @@ -187,11 +187,11 @@ kwargs_craigmr = (:M, :N, :ldiv, :sqd, :λ, :atol, :rtol, :itmax, :timemax, :ver v = NisI ? Nv : solver.v # Compute y such that AAᴴy = b. Then recover x = Aᴴy. - @kfill!(x, zero(FC)) - @kfill!(y, zero(FC)) - @kcopy!(m, Mu, b) # Mu ← b + kfill!(x, zero(FC)) + kfill!(y, zero(FC)) + kcopy!(m, Mu, b) # Mu ← b MisI || mulorldiv!(u, M, Mu, ldiv) - β = sqrt(@kdotr(m, u, Mu)) + β = sqrt(kdotr(m, u, Mu)) if β == 0 stats.niter = 0 stats.solved, stats.inconsistent = true, false @@ -204,13 +204,13 @@ kwargs_craigmr = (:M, :N, :ldiv, :sqd, :λ, :atol, :rtol, :itmax, :timemax, :ver # Initialize Golub-Kahan process. # β₁Mu₁ = b. - @kscal!(m, one(FC)/β, u) - MisI || @kscal!(m, one(FC)/β, Mu) + kscal!(m, one(FC)/β, u) + MisI || kscal!(m, one(FC)/β, Mu) # α₁Nv₁ = Aᴴu₁. mul!(Aᴴu, Aᴴ, u) - @kcopy!(n, Nv, Aᴴu) # Nv ← Aᴴu + kcopy!(n, Nv, Aᴴu) # Nv ← Aᴴu NisI || mulorldiv!(v, N, Nv, ldiv) - α = sqrt(@kdotr(n, v, Nv)) + α = sqrt(kdotr(n, v, Nv)) Anorm² = α * α iter = 0 @@ -229,18 +229,18 @@ kwargs_craigmr = (:M, :N, :ldiv, :sqd, :λ, :atol, :rtol, :itmax, :timemax, :ver stats.status = "x = 0 is a minimum least-squares solution" return solver end - @kscal!(n, one(FC)/α, v) - NisI || @kscal!(n, one(FC)/α, Nv) + kscal!(n, one(FC)/α, v) + NisI || kscal!(n, one(FC)/α, Nv) # Regularization. - λₖ = λ # λ₁ = λ - cpₖ = spₖ = one(T) # Givens sines and cosines used to zero out λₖ - cdₖ = sdₖ = one(T) # Givens sines and cosines used to define λₖ₊₁ - λ > 0 && @kcopy!(n, q, v) # Additional vector needed to update x, by definition q₀ = 0 + λₖ = λ # λ₁ = λ + cpₖ = spₖ = one(T) # Givens sines and cosines used to zero out λₖ + cdₖ = sdₖ = one(T) # Givens sines and cosines used to define λₖ₊₁ + λ > 0 && kcopy!(n, q, v) # Additional vector needed to update x, by definition q₀ = 0 if λ > 0 (cpₖ, spₖ, αhat) = sym_givens(α, λₖ) - @kscal!(n, spₖ, q) # q̄₁ = sp₁ * v₁ + kscal!(n, spₖ, q) # q̄₁ = sp₁ * v₁ else αhat = α end @@ -254,13 +254,13 @@ kwargs_craigmr = (:M, :N, :ldiv, :sqd, :λ, :atol, :rtol, :itmax, :timemax, :ver ArNorm = α history && push!(ArNorms, ArNorm) - ɛ_c = atol + rtol * rNorm # Stopping tolerance for consistent systems. + ɛ_c = atol + rtol * rNorm # Stopping tolerance for consistent systems. ɛ_i = atol + rtol * ArNorm # Stopping tolerance for inconsistent systems. - @kcopy!(m, wbar, u) - @kscal!(m, one(FC)/αhat, wbar) - @kfill!(w, zero(FC)) - @kfill!(d, zero(FC)) + kcopy!(m, wbar, u) + kscal!(m, one(FC)/αhat, wbar) + kfill!(w, zero(FC)) + kfill!(d, zero(FC)) status = "unknown" solved = rNorm ≤ ɛ_c @@ -275,12 +275,12 @@ kwargs_craigmr = (:M, :N, :ldiv, :sqd, :λ, :atol, :rtol, :itmax, :timemax, :ver # Generate next Golub-Kahan vectors. # 1. βₖ₊₁Muₖ₊₁ = Avₖ - αₖMuₖ mul!(Av, A, v) - @kaxpby!(m, one(FC), Av, -α, Mu) + kaxpby!(m, one(FC), Av, -α, Mu) MisI || mulorldiv!(u, M, Mu, ldiv) - β = sqrt(@kdotr(m, u, Mu)) + β = sqrt(kdotr(m, u, Mu)) if β ≠ 0 - @kscal!(m, one(FC)/β, u) - MisI || @kscal!(m, one(FC)/β, Mu) + kscal!(m, one(FC)/β, u) + MisI || kscal!(m, one(FC)/β, Mu) end Anorm² = Anorm² + β * β # = ‖B_{k-1}‖² @@ -311,35 +311,35 @@ kwargs_craigmr = (:M, :N, :ldiv, :sqd, :λ, :atol, :rtol, :itmax, :timemax, :ver rNorm = abs(ζbar) history && push!(rNorms, rNorm) - @kaxpby!(m, one(FC)/ρ, wbar, -θ/ρ, w) # w = (wbar - θ * w) / ρ - @kaxpy!(m, ζ, w, y) # y = y + ζ * w + kaxpby!(m, one(FC)/ρ, wbar, -θ/ρ, w) # w = (wbar - θ * w) / ρ + kaxpy!(m, ζ, w, y) # y = y + ζ * w if λ > 0 # DₖRₖ = V̅ₖ with v̅ₖ = cpₖvₖ + spₖqₖ₋₁ if iter == 1 - @kaxpy!(n, one(FC)/ρ, cpₖ * v, d) + kaxpy!(n, one(FC)/ρ, cpₖ * v, d) else - @kaxpby!(n, one(FC)/ρ, cpₖ * v, -θ/ρ, d) - @kaxpy!(n, one(FC)/ρ, spₖ * q, d) - @kaxpby!(n, spₖ, v, -cpₖ, q) # q̄ₖ ← spₖ * vₖ - cpₖ * qₖ₋₁ + kaxpby!(n, one(FC)/ρ, cpₖ * v, -θ/ρ, d) + kaxpy!(n, one(FC)/ρ, spₖ * q, d) + kaxpby!(n, spₖ, v, -cpₖ, q) # q̄ₖ ← spₖ * vₖ - cpₖ * qₖ₋₁ end else # DₖRₖ = Vₖ if iter == 1 - @kaxpy!(n, one(FC)/ρ, v, d) + kaxpy!(n, one(FC)/ρ, v, d) else - @kaxpby!(n, one(FC)/ρ, v, -θ/ρ, d) + kaxpby!(n, one(FC)/ρ, v, -θ/ρ, d) end end # xₖ = Dₖzₖ - @kaxpy!(n, ζ, d, x) + kaxpy!(n, ζ, d, x) # 2. αₖ₊₁Nvₖ₊₁ = Aᴴuₖ₊₁ - βₖ₊₁Nvₖ mul!(Aᴴu, Aᴴ, u) - @kaxpby!(n, one(FC), Aᴴu, -β, Nv) + kaxpby!(n, one(FC), Aᴴu, -β, Nv) NisI || mulorldiv!(v, N, Nv, ldiv) - α = sqrt(@kdotr(n, v, Nv)) + α = sqrt(kdotr(n, v, Nv)) Anorm² = Anorm² + α * α # = ‖Lₖ‖ ArNorm = α * β * abs(ζ/ρ) history && push!(ArNorms, ArNorm) @@ -348,16 +348,16 @@ kwargs_craigmr = (:M, :N, :ldiv, :sqd, :λ, :atol, :rtol, :itmax, :timemax, :ver if λ > 0 (cdₖ, sdₖ, λₖ₊₁) = sym_givens(λ, λₐᵤₓ) - @kscal!(n, sdₖ, q) # qₖ ← sdₖ * q̄ₖ + kscal!(n, sdₖ, q) # qₖ ← sdₖ * q̄ₖ (cpₖ, spₖ, αhat) = sym_givens(α, λₖ₊₁) else αhat = α end if α ≠ 0 - @kscal!(n, one(FC)/α, v) - NisI || @kscal!(n, one(FC)/α, Nv) - @kaxpby!(m, one(T)/αhat, u, -βhat / αhat, wbar) # wbar = (u - beta * wbar) / alpha + kscal!(n, one(FC)/α, v) + NisI || kscal!(n, one(FC)/α, Nv) + kaxpby!(m, one(T)/αhat, u, -βhat / αhat, wbar) # wbar = (u - beta * wbar) / alpha end θ = s * αhat ρbar = -c * αhat diff --git a/src/crls.jl b/src/crls.jl index 2e4f4f177..b9c39db53 100644 --- a/src/crls.jl +++ b/src/crls.jl @@ -139,10 +139,10 @@ kwargs_crls = (:M, :ldiv, :radius, :λ, :atol, :rtol, :itmax, :timemax, :verbose Mr = MisI ? r : solver.Ms MAp = MisI ? Ap : solver.Ms - @kfill!(x, zero(FC)) - @kcopy!(m, r, b) # r ← b - bNorm = @knrm2(m, r) # norm(b - A * x0) if x0 ≠ 0. - rNorm = bNorm # + λ * ‖x0‖ if x0 ≠ 0 and λ > 0. + kfill!(x, zero(FC)) + kcopy!(m, r, b) # r ← b + bNorm = knorm(m, r) # norm(b - A * x0) if x0 ≠ 0. + rNorm = bNorm # + λ * ‖x0‖ if x0 ≠ 0 and λ > 0. history && push!(rNorms, rNorm) if bNorm == 0 stats.niter = 0 @@ -158,15 +158,15 @@ kwargs_crls = (:M, :ldiv, :radius, :λ, :atol, :rtol, :itmax, :timemax, :verbose mul!(s, A, Ar) MisI || mulorldiv!(Ms, M, s, ldiv) - @kcopy!(n, p, Ar) # p ← Ar - @kcopy!(m, Ap, s) # Ap ← s - mul!(q, Aᴴ, Ms) # Ap - λ > 0 && @kaxpy!(n, λ, p, q) # q = q + λ * p - γ = @kdotr(m, s, Ms) # Faster than γ = dot(s, Ms) + kcopy!(n, p, Ar) # p ← Ar + kcopy!(m, Ap, s) # Ap ← s + mul!(q, Aᴴ, Ms) # Ap + λ > 0 && kaxpy!(n, λ, p, q) # q = q + λ * p + γ = kdotr(m, s, Ms) # Faster than γ = dot(s, Ms) iter = 0 itmax == 0 && (itmax = m + n) - ArNorm = @knrm2(n, Ar) # Marginally faster than norm(Ar) + ArNorm = knorm(n, Ar) # Marginally faster than norm(Ar) λ > 0 && (γ += λ * ArNorm * ArNorm) history && push!(ArNorms, ArNorm) ε = atol + rtol * ArNorm @@ -182,14 +182,14 @@ kwargs_crls = (:M, :ldiv, :radius, :λ, :atol, :rtol, :itmax, :timemax, :verbose overtimed = false while ! (solved || tired || user_requested_exit || overtimed) - qNorm² = @kdotr(n, q, q) # dot(q, q) + qNorm² = kdotr(n, q, q) # dot(q, q) α = γ / qNorm² # if a trust-region constraint is give, compute step to the boundary # (note that α > 0 in CRLS) if radius > 0 - pNorm = @knrm2(n, p) - if @kdotr(m, Ap, Ap) ≤ ε * sqrt(qNorm²) * pNorm # the quadratic is constant in the direction p + pNorm = knorm(n, p) + if kdotr(m, Ap, Ap) ≤ ε * sqrt(qNorm²) * pNorm # the quadratic is constant in the direction p psd = true # det(AᴴA) = 0 p = Ar # p = Aᴴr pNorm² = ArNorm * ArNorm @@ -205,29 +205,29 @@ kwargs_crls = (:M, :ldiv, :radius, :λ, :atol, :rtol, :itmax, :timemax, :verbose end end - @kaxpy!(n, α, p, x) # Faster than x = x + α * p - @kaxpy!(n, -α, q, Ar) # Faster than Ar = Ar - α * q - ArNorm = @knrm2(n, Ar) + kaxpy!(n, α, p, x) # Faster than x = x + α * p + kaxpy!(n, -α, q, Ar) # Faster than Ar = Ar - α * q + ArNorm = knorm(n, Ar) solved = psd || on_boundary solved && continue - @kaxpy!(m, -α, Ap, r) # Faster than r = r - α * Ap + kaxpy!(m, -α, Ap, r) # Faster than r = r - α * Ap mul!(s, A, Ar) MisI || mulorldiv!(Ms, M, s, ldiv) - γ_next = @kdotr(m, s, Ms) # Faster than γ_next = dot(s, s) + γ_next = kdotr(m, s, Ms) # Faster than γ_next = dot(s, s) λ > 0 && (γ_next += λ * ArNorm * ArNorm) β = γ_next / γ - @kaxpby!(n, one(FC), Ar, β, p) # Faster than p = Ar + β * p - @kaxpby!(m, one(FC), s, β, Ap) # Faster than Ap = s + β * Ap + kaxpby!(n, one(FC), Ar, β, p) # Faster than p = Ar + β * p + kaxpby!(m, one(FC), s, β, Ap) # Faster than Ap = s + β * Ap MisI || mulorldiv!(MAp, M, Ap, ldiv) mul!(q, Aᴴ, MAp) - λ > 0 && @kaxpy!(n, λ, p, q) # q = q + λ * p + λ > 0 && kaxpy!(n, λ, p, q) # q = q + λ * p γ = γ_next if λ > 0 - rNorm = sqrt(@kdotr(m, r, r) + λ * @kdotr(n, x, x)) + rNorm = sqrt(kdotr(m, r, r) + λ * kdotr(n, x, x)) else - rNorm = @knrm2(m, r) # norm(r) + rNorm = knorm(m, r) end history && push!(rNorms, rNorm) history && push!(ArNorms, ArNorm) diff --git a/src/crmr.jl b/src/crmr.jl index 204f595f5..73dad4d1a 100644 --- a/src/crmr.jl +++ b/src/crmr.jl @@ -150,9 +150,9 @@ kwargs_crmr = (:N, :ldiv, :λ, :atol, :rtol, :itmax, :timemax, :verbose, :histor reset!(stats) Nq = NisI ? q : solver.Nq - @kfill!(x, zero(FC)) # initial estimation x = 0 + kfill!(x, zero(FC)) # initial estimation x = 0 mulorldiv!(r, N, b, ldiv) # initial residual r = N * (b - Ax) = N * b - bNorm = @knrm2(m, r) # norm(b - A * x0) if x0 ≠ 0. + bNorm = knorm(m, r) # norm(b - A * x0) if x0 ≠ 0. rNorm = bNorm # + λ * ‖x0‖ if x0 ≠ 0 and λ > 0. history && push!(rNorms, rNorm) if bNorm == 0 @@ -163,17 +163,17 @@ kwargs_crmr = (:N, :ldiv, :λ, :atol, :rtol, :itmax, :timemax, :verbose, :histor history && push!(ArNorms, zero(T)) return solver end - λ > 0 && @kcopy!(m, s, r) # s ← r - mul!(Aᴴr, Aᴴ, r) # - λ * x0 if x0 ≠ 0. - @kcopy!(n, p, Aᴴr) # p ← Aᴴr - γ = @kdotr(n, Aᴴr, Aᴴr) # Faster than γ = dot(Aᴴr, Aᴴr) + λ > 0 && kcopy!(m, s, r) # s ← r + mul!(Aᴴr, Aᴴ, r) # - λ * x0 if x0 ≠ 0. + kcopy!(n, p, Aᴴr) # p ← Aᴴr + γ = kdotr(n, Aᴴr, Aᴴr) # Faster than γ = dot(Aᴴr, Aᴴr) λ > 0 && (γ += λ * rNorm * rNorm) iter = 0 itmax == 0 && (itmax = m + n) ArNorm = sqrt(γ) history && push!(ArNorms, ArNorm) - ɛ_c = atol + rtol * rNorm # Stopping tolerance for consistent systems. + ɛ_c = atol + rtol * rNorm # Stopping tolerance for consistent systems. ɛ_i = atol + rtol * ArNorm # Stopping tolerance for inconsistent systems. (verbose > 0) && @printf(iostream, "%5s %8s %8s %5s\n", "k", "‖Aᴴr‖", "‖r‖", "timer") kdisplay(iter, verbose) && @printf(iostream, "%5d %8.2e %8.2e %.2fs\n", iter, ArNorm, rNorm, ktimer(start_time)) @@ -187,20 +187,20 @@ kwargs_crmr = (:N, :ldiv, :λ, :atol, :rtol, :itmax, :timemax, :verbose, :histor while ! (solved || inconsistent || tired || user_requested_exit || overtimed) mul!(q, A, p) - λ > 0 && @kaxpy!(m, λ, s, q) # q = q + λ * s + λ > 0 && kaxpy!(m, λ, s, q) # q = q + λ * s NisI || mulorldiv!(Nq, N, q, ldiv) - α = γ / @kdotr(m, q, Nq) # Compute qᴴ * N * q - @kaxpy!(n, α, p, x) # Faster than x = x + α * p - @kaxpy!(m, -α, Nq, r) # Faster than r = r - α * Nq - rNorm = @knrm2(m, r) # norm(r) + α = γ / kdotr(m, q, Nq) # Compute qᴴ * N * q + kaxpy!(n, α, p, x) # Faster than x = x + α * p + kaxpy!(m, -α, Nq, r) # Faster than r = r - α * Nq + rNorm = knorm(m, r) # norm(r) mul!(Aᴴr, Aᴴ, r) - γ_next = @kdotr(n, Aᴴr, Aᴴr) # Faster than γ_next = dot(Aᴴr, Aᴴr) + γ_next = kdotr(n, Aᴴr, Aᴴr) # Faster than γ_next = dot(Aᴴr, Aᴴr) λ > 0 && (γ_next += λ * rNorm * rNorm) β = γ_next / γ - @kaxpby!(n, one(FC), Aᴴr, β, p) # Faster than p = Aᴴr + β * p + kaxpby!(n, one(FC), Aᴴr, β, p) # Faster than p = Aᴴr + β * p if λ > 0 - @kaxpby!(m, one(FC), r, β, s) # s = r + β * s + kaxpby!(m, one(FC), r, β, s) # s = r + β * s end γ = γ_next diff --git a/src/diom.jl b/src/diom.jl index e491f64e5..2c3d2331f 100644 --- a/src/diom.jl +++ b/src/diom.jl @@ -141,15 +141,15 @@ kwargs_diom = (:M, :N, :ldiv, :reorthogonalization, :atol, :rtol, :itmax, :timem r₀ = MisI ? t : solver.w # Initial solution x₀ and residual r₀. - @kfill!(x, zero(FC)) # x₀ + kfill!(x, zero(FC)) # x₀ if warm_start mul!(t, A, Δx) - @kaxpby!(n, one(FC), b, -one(FC), t) + kaxpby!(n, one(FC), b, -one(FC), t) else - @kcopy!(n, t, b) # t ← b + kcopy!(n, t, b) # t ← b end MisI || mulorldiv!(r₀, M, t, ldiv) # M(b - Ax₀) - rNorm = @knrm2(n, r₀) # β = ‖r₀‖₂ + rNorm = knorm(n, r₀) # β = ‖r₀‖₂ history && push!(rNorms, rNorm) if rNorm == 0 stats.niter = 0 @@ -169,17 +169,17 @@ kwargs_diom = (:M, :N, :ldiv, :reorthogonalization, :atol, :rtol, :itmax, :timem mem = length(V) # Memory for i = 1 : mem - @kfill!(V[i], zero(FC)) # Orthogonal basis of Kₖ(MAN, Mr₀). + kfill!(V[i], zero(FC)) # Orthogonal basis of Kₖ(MAN, Mr₀). end for i = 1 : mem-1 - @kfill!(P[i], zero(FC)) # Directions Pₖ = NVₖ(Uₖ)⁻¹. + kfill!(P[i], zero(FC)) # Directions Pₖ = NVₖ(Uₖ)⁻¹. end - @kfill!(H, zero(FC)) # Last column of the band hessenberg matrix Hₖ = LₖUₖ. + kfill!(H, zero(FC)) # Last column of the band hessenberg matrix Hₖ = LₖUₖ. # Each column has at most mem + 1 nonzero elements. # hᵢ.ₖ is stored as H[k-i+1], i ≤ k. hₖ₊₁.ₖ is not stored in H. # k-i+1 represents the indice of the diagonal where hᵢ.ₖ is located. # In addition of that, the last column of Uₖ is stored in H. - @kfill!(L, zero(FC)) # Last mem-1 pivots of Lₖ. + kfill!(L, zero(FC)) # Last mem-1 pivots of Lₖ. # Initial ξ₁ and V₁. ξ = rNorm @@ -209,8 +209,8 @@ kwargs_diom = (:M, :N, :ldiv, :reorthogonalization, :atol, :rtol, :itmax, :timem for i = max(1, iter-mem+1) : iter ipos = mod(i-1, mem) + 1 # Position corresponding to vᵢ in the circular stack V. diag = iter - i + 1 - H[diag] = @kdot(n, w, V[ipos]) # hᵢ.ₖ = ⟨MANvₖ, vᵢ⟩ - @kaxpy!(n, -H[diag], V[ipos], w) # w ← w - hᵢ.ₖvᵢ + H[diag] = kdot(n, w, V[ipos]) # hᵢ.ₖ = ⟨MANvₖ, vᵢ⟩ + kaxpy!(n, -H[diag], V[ipos], w) # w ← w - hᵢ.ₖvᵢ end # Partial reorthogonalization of the Krylov basis. @@ -218,14 +218,14 @@ kwargs_diom = (:M, :N, :ldiv, :reorthogonalization, :atol, :rtol, :itmax, :timem for i = max(1, iter-mem+1) : iter ipos = mod(i-1, mem) + 1 diag = iter - i + 1 - Htmp = @kdot(n, w, V[ipos]) + Htmp = kdot(n, w, V[ipos]) H[diag] += Htmp - @kaxpy!(n, -Htmp, V[ipos], w) + kaxpy!(n, -Htmp, V[ipos], w) end end # Compute hₖ₊₁.ₖ and vₖ₊₁. - Haux = @knrm2(n, w) # hₖ₊₁.ₖ = ‖vₖ₊₁‖₂ + Haux = knorm(n, w) # hₖ₊₁.ₖ = ‖vₖ₊₁‖₂ if Haux ≠ 0 # hₖ₊₁.ₖ = 0 ⇒ "lucky breakdown" V[next_pos] .= w ./ Haux # vₖ₊₁ = w / hₖ₊₁.ₖ end @@ -262,20 +262,20 @@ kwargs_diom = (:M, :N, :ldiv, :reorthogonalization, :atol, :rtol, :itmax, :timem diag = iter - i + 1 if ipos == ppos # pₖ ← -uₖ₋ₘₑₘ₊₁.ₖ * pₖ₋ₘₑₘ₊₁ - @kscal!(n, -H[diag], P[ppos]) + kscal!(n, -H[diag], P[ppos]) else # pₖ ← pₖ - uᵢ.ₖ * pᵢ - @kaxpy!(n, -H[diag], P[ipos], P[ppos]) + kaxpy!(n, -H[diag], P[ipos], P[ppos]) end end # pₐᵤₓ ← pₐᵤₓ + Nvₖ - @kaxpy!(n, one(FC), z, P[ppos]) + kaxpy!(n, one(FC), z, P[ppos]) # pₖ = pₐᵤₓ / uₖ.ₖ P[ppos] .= P[ppos] ./ H[1] # Update solution xₖ. # xₖ = xₖ₋₁ + ξₖ * pₖ - @kaxpy!(n, ξ, P[ppos], x) + kaxpy!(n, ξ, P[ppos], x) # Compute residual norm. # ‖ M(b - Axₖ) ‖₂ = hₖ₊₁.ₖ * |ξₖ / uₖ.ₖ| @@ -304,7 +304,7 @@ kwargs_diom = (:M, :N, :ldiv, :reorthogonalization, :atol, :rtol, :itmax, :timem overtimed && (status = "time limit exceeded") # Update x - warm_start && @kaxpy!(n, one(FC), Δx, x) + warm_start && kaxpy!(n, one(FC), Δx, x) solver.warm_start = false # Update stats diff --git a/src/dqgmres.jl b/src/dqgmres.jl index 53fa3ac76..06b78d5e9 100644 --- a/src/dqgmres.jl +++ b/src/dqgmres.jl @@ -141,15 +141,15 @@ kwargs_dqgmres = (:M, :N, :ldiv, :reorthogonalization, :atol, :rtol, :itmax, :ti r₀ = MisI ? t : solver.w # Initial solution x₀ and residual r₀. - @kfill!(x, zero(FC)) # x₀ + kfill!(x, zero(FC)) # x₀ if warm_start mul!(t, A, Δx) - @kaxpby!(n, one(FC), b, -one(FC), t) + kaxpby!(n, one(FC), b, -one(FC), t) else - @kcopy!(n, t, b) # t ← b + kcopy!(n, t, b) # t ← b end MisI || mulorldiv!(r₀, M, t, ldiv) # M(b - Ax₀) - rNorm = @knrm2(n, r₀) # β = ‖r₀‖₂ + rNorm = knorm(n, r₀) # β = ‖r₀‖₂ history && push!(rNorms, rNorm) if rNorm == 0 stats.niter = 0 @@ -170,12 +170,12 @@ kwargs_dqgmres = (:M, :N, :ldiv, :reorthogonalization, :atol, :rtol, :itmax, :ti # Set up workspace. mem = length(V) # Memory. for i = 1 : mem - @kfill!(V[i], zero(FC)) # Orthogonal basis of Kₖ(MAN, Mr₀). - @kfill!(P[i], zero(FC)) # Directions for x : Pₖ = NVₖ(Rₖ)⁻¹. + kfill!(V[i], zero(FC)) # Orthogonal basis of Kₖ(MAN, Mr₀). + kfill!(P[i], zero(FC)) # Directions for x : Pₖ = NVₖ(Rₖ)⁻¹. end - @kfill!(c, zero(T)) # Last mem Givens cosines used for the factorization QₖRₖ = Hₖ. - @kfill!(s, zero(FC)) # Last mem Givens sines used for the factorization QₖRₖ = Hₖ. - @kfill!(H, zero(FC)) # Last column of the band hessenberg matrix Hₖ. + kfill!(c, zero(T)) # Last mem Givens cosines used for the factorization QₖRₖ = Hₖ. + kfill!(s, zero(FC)) # Last mem Givens sines used for the factorization QₖRₖ = Hₖ. + kfill!(H, zero(FC)) # Last column of the band hessenberg matrix Hₖ. # Each column has at most mem + 1 nonzero elements. # hᵢ.ₖ is stored as H[k-i+1], i ≤ k. hₖ₊₁.ₖ is not stored in H. # k-i+1 represents the indice of the diagonal where hᵢ.ₖ is located. @@ -211,8 +211,8 @@ kwargs_dqgmres = (:M, :N, :ldiv, :reorthogonalization, :atol, :rtol, :itmax, :ti for i = max(1, iter-mem+1) : iter ipos = mod(i-1, mem) + 1 # Position corresponding to vᵢ in the circular stack V. diag = iter - i + 1 - H[diag] = @kdot(n, w, V[ipos]) # hᵢ.ₖ = ⟨MANvₖ, vᵢ⟩ - @kaxpy!(n, -H[diag], V[ipos], w) # w ← w - hᵢ.ₖvᵢ + H[diag] = kdot(n, w, V[ipos]) # hᵢ.ₖ = ⟨MANvₖ, vᵢ⟩ + kaxpy!(n, -H[diag], V[ipos], w) # w ← w - hᵢ.ₖvᵢ end # Partial reorthogonalization of the Krylov basis. @@ -220,14 +220,14 @@ kwargs_dqgmres = (:M, :N, :ldiv, :reorthogonalization, :atol, :rtol, :itmax, :ti for i = max(1, iter-mem+1) : iter ipos = mod(i-1, mem) + 1 diag = iter - i + 1 - Htmp = @kdot(n, w, V[ipos]) + Htmp = kdot(n, w, V[ipos]) H[diag] += Htmp - @kaxpy!(n, -Htmp, V[ipos], w) + kaxpy!(n, -Htmp, V[ipos], w) end end # Compute hₖ₊₁.ₖ and vₖ₊₁. - Haux = @knrm2(n, w) # hₖ₊₁.ₖ = ‖vₖ₊₁‖₂ + Haux = knorm(n, w) # hₖ₊₁.ₖ = ‖vₖ₊₁‖₂ if Haux ≠ 0 # hₖ₊₁.ₖ = 0 ⇒ "lucky breakdown" V[next_pos] .= w ./ Haux # vₖ₊₁ = w / hₖ₊₁.ₖ end @@ -261,20 +261,20 @@ kwargs_dqgmres = (:M, :N, :ldiv, :reorthogonalization, :atol, :rtol, :itmax, :ti diag = iter - i + 1 if ipos == pos # pₐᵤₓ ← -hₖ₋ₘₑₘ.ₖ * pₖ₋ₘₑₘ - @kscal!(n, -H[diag], P[pos]) + kscal!(n, -H[diag], P[pos]) else # pₐᵤₓ ← pₐᵤₓ - hᵢ.ₖ * pᵢ - @kaxpy!(n, -H[diag], P[ipos], P[pos]) + kaxpy!(n, -H[diag], P[ipos], P[pos]) end end # pₐᵤₓ ← pₐᵤₓ + Nvₖ - @kaxpy!(n, one(FC), z, P[pos]) + kaxpy!(n, one(FC), z, P[pos]) # pₖ = pₐᵤₓ / hₖ.ₖ P[pos] .= P[pos] ./ H[1] # Compute solution xₖ. # xₖ ← xₖ₋₁ + γₖ * pₖ - @kaxpy!(n, γₖ, P[pos], x) + kaxpy!(n, γₖ, P[pos], x) # Update residual norm estimate. # ‖ M(b - Axₖ) ‖₂ ≈ |γₖ₊₁| @@ -306,7 +306,7 @@ kwargs_dqgmres = (:M, :N, :ldiv, :reorthogonalization, :atol, :rtol, :itmax, :ti overtimed && (status = "time limit exceeded") # Update x - warm_start && @kaxpy!(n, one(FC), Δx, x) + warm_start && kaxpy!(n, one(FC), Δx, x) solver.warm_start = false # Update stats diff --git a/src/fgmres.jl b/src/fgmres.jl index 4482897a9..6b5a3a23b 100644 --- a/src/fgmres.jl +++ b/src/fgmres.jl @@ -144,18 +144,18 @@ kwargs_fgmres = (:M, :N, :ldiv, :restart, :reorthogonalization, :atol, :rtol, :i xr = restart ? Δx : x # Initial solution x₀. - @kfill!(x, zero(FC)) + kfill!(x, zero(FC)) # Initial residual r₀. if warm_start mul!(w, A, Δx) - @kaxpby!(n, one(FC), b, -one(FC), w) - restart && @kaxpy!(n, one(FC), Δx, x) + kaxpby!(n, one(FC), b, -one(FC), w) + restart && kaxpy!(n, one(FC), Δx, x) else - @kcopy!(n, w, b) # w ← b + kcopy!(n, w, b) # w ← b end MisI || mulorldiv!(r₀, M, w, ldiv) # r₀ = M(b - Ax₀) - β = @knrm2(n, r₀) # β = ‖r₀‖₂ + β = knorm(n, r₀) # β = ‖r₀‖₂ rNorm = β history && push!(rNorms, β) @@ -200,25 +200,25 @@ kwargs_fgmres = (:M, :N, :ldiv, :restart, :reorthogonalization, :atol, :rtol, :i # Initialize workspace. nr = 0 # Number of coefficients stored in Rₖ. for i = 1 : mem - @kfill!(V[i], zero(FC)) # Orthogonal basis of {Mr₀, MANₖr₀, ..., (MANₖ)ᵏ⁻¹r₀}. - @kfill!(Z[i], zero(FC)) # Zₖ = [N₁v₁, ..., Nₖvₖ] + kfill!(V[i], zero(FC)) # Orthogonal basis of {Mr₀, MANₖr₀, ..., (MANₖ)ᵏ⁻¹r₀}. + kfill!(Z[i], zero(FC)) # Zₖ = [N₁v₁, ..., Nₖvₖ] end - @kfill!(s, zero(FC)) # Givens sines used for the factorization QₖRₖ = Hₖ₊₁.ₖ. - @kfill!(c, zero(T)) # Givens cosines used for the factorization QₖRₖ = Hₖ₊₁.ₖ. - @kfill!(R, zero(FC)) # Upper triangular matrix Rₖ. - @kfill!(z, zero(FC)) # Right-hand of the least squares problem min ‖Hₖ₊₁.ₖyₖ - βe₁‖₂. + kfill!(s, zero(FC)) # Givens sines used for the factorization QₖRₖ = Hₖ₊₁.ₖ. + kfill!(c, zero(T)) # Givens cosines used for the factorization QₖRₖ = Hₖ₊₁.ₖ. + kfill!(R, zero(FC)) # Upper triangular matrix Rₖ. + kfill!(z, zero(FC)) # Right-hand of the least squares problem min ‖Hₖ₊₁.ₖyₖ - βe₁‖₂. if restart - @kfill!(xr, zero(FC)) # xr === Δx when restart is set to true + kfill!(xr, zero(FC)) # xr === Δx when restart is set to true if npass ≥ 1 mul!(w, A, x) - @kaxpby!(n, one(FC), b, -one(FC), w) + kaxpby!(n, one(FC), b, -one(FC), w) MisI || mulorldiv!(r₀, M, w, ldiv) end end # Initial ζ₁ and V₁ - β = @knrm2(n, r₀) + β = knorm(n, r₀) z[1] = β V[1] .= r₀ ./ rNorm @@ -248,21 +248,21 @@ kwargs_fgmres = (:M, :N, :ldiv, :restart, :reorthogonalization, :atol, :rtol, :i mul!(w, A, Z[inner_iter]) # w ← Azₖ MisI || mulorldiv!(q, M, w, ldiv) # q ← MAzₖ for i = 1 : inner_iter - R[nr+i] = @kdot(n, V[i], q) # hᵢₖ = (vᵢ)ᴴq - @kaxpy!(n, -R[nr+i], V[i], q) # q ← q - hᵢₖvᵢ + R[nr+i] = kdot(n, V[i], q) # hᵢₖ = (vᵢ)ᴴq + kaxpy!(n, -R[nr+i], V[i], q) # q ← q - hᵢₖvᵢ end # Reorthogonalization of the basis. if reorthogonalization for i = 1 : inner_iter - Htmp = @kdot(n, V[i], q) + Htmp = kdot(n, V[i], q) R[nr+i] += Htmp - @kaxpy!(n, -Htmp, V[i], q) + kaxpy!(n, -Htmp, V[i], q) end end # Compute hₖ₊₁.ₖ - Hbis = @knrm2(n, q) # hₖ₊₁.ₖ = ‖vₖ₊₁‖₂ + Hbis = knorm(n, q) # hₖ₊₁.ₖ = ‖vₖ₊₁‖₂ # Update the QR factorization of Hₖ₊₁.ₖ. # Apply previous Givens reflections Ωᵢ. @@ -335,9 +335,9 @@ kwargs_fgmres = (:M, :N, :ldiv, :restart, :reorthogonalization, :atol, :rtol, :i # Form xₖ = N₁v₁y₁ + ... + Nₖvₖyₖ = z₁y₁ + ... + zₖyₖ for i = 1 : inner_iter - @kaxpy!(n, y[i], Z[i], xr) + kaxpy!(n, y[i], Z[i], xr) end - restart && @kaxpy!(n, one(FC), xr, x) + restart && kaxpy!(n, one(FC), xr, x) # Update inner_itmax, iter and tired variables. inner_itmax = inner_itmax - inner_iter @@ -356,7 +356,7 @@ kwargs_fgmres = (:M, :N, :ldiv, :restart, :reorthogonalization, :atol, :rtol, :i overtimed && (status = "time limit exceeded") # Update x - warm_start && !restart && @kaxpy!(n, one(FC), Δx, x) + warm_start && !restart && kaxpy!(n, one(FC), Δx, x) solver.warm_start = false # Update stats diff --git a/src/fom.jl b/src/fom.jl index ee6874a09..05450862d 100644 --- a/src/fom.jl +++ b/src/fom.jl @@ -139,18 +139,18 @@ kwargs_fom = (:M, :N, :ldiv, :restart, :reorthogonalization, :atol, :rtol, :itma xr = restart ? Δx : x # Initial solution x₀. - @kfill!(x, zero(FC)) + kfill!(x, zero(FC)) # Initial residual r₀. if warm_start mul!(w, A, Δx) - @kaxpby!(n, one(FC), b, -one(FC), w) - restart && @kaxpy!(n, one(FC), Δx, x) + kaxpby!(n, one(FC), b, -one(FC), w) + restart && kaxpy!(n, one(FC), Δx, x) else - @kcopy!(n, w, b) # w ← b + kcopy!(n, w, b) # w ← b end MisI || mulorldiv!(r₀, M, w, ldiv) # r₀ = M(b - Ax₀) - β = @knrm2(n, r₀) # β = ‖r₀‖₂ + β = knorm(n, r₀) # β = ‖r₀‖₂ rNorm = β history && push!(rNorms, β) @@ -194,23 +194,23 @@ kwargs_fom = (:M, :N, :ldiv, :restart, :reorthogonalization, :atol, :rtol, :itma # Initialize workspace. nr = 0 # Number of coefficients stored in Uₖ. for i = 1 : mem - @kfill!(V[i], zero(FC)) # Orthogonal basis of Kₖ(MAN, Mr₀). + kfill!(V[i], zero(FC)) # Orthogonal basis of Kₖ(MAN, Mr₀). end - @kfill!(l, zero(FC)) # Lower unit triangular matrix Lₖ. - @kfill!(U, zero(FC)) # Upper triangular matrix Uₖ. - @kfill!(z, zero(FC)) # Solution of Lₖzₖ = βe₁. + kfill!(l, zero(FC)) # Lower unit triangular matrix Lₖ. + kfill!(U, zero(FC)) # Upper triangular matrix Uₖ. + kfill!(z, zero(FC)) # Solution of Lₖzₖ = βe₁. if restart - @kfill!(xr, zero(FC)) # xr === Δx when restart is set to true + kfill!(xr, zero(FC)) # xr === Δx when restart is set to true if npass ≥ 1 mul!(w, A, x) - @kaxpby!(n, one(FC), b, -one(FC), w) + kaxpby!(n, one(FC), b, -one(FC), w) MisI || mulorldiv!(r₀, M, w, ldiv) end end # Initial ζ₁ and V₁ - β = @knrm2(n, r₀) + β = knorm(n, r₀) z[1] = β V[1] .= r₀ ./ rNorm @@ -238,21 +238,21 @@ kwargs_fom = (:M, :N, :ldiv, :restart, :reorthogonalization, :atol, :rtol, :itma mul!(w, A, p) # w ← ANvₖ MisI || mulorldiv!(q, M, w, ldiv) # q ← MANvₖ for i = 1 : inner_iter - U[nr+i] = @kdot(n, V[i], q) # hᵢₖ = (vᵢ)ᴴq - @kaxpy!(n, -U[nr+i], V[i], q) # q ← q - hᵢₖvᵢ + U[nr+i] = kdot(n, V[i], q) # hᵢₖ = (vᵢ)ᴴq + kaxpy!(n, -U[nr+i], V[i], q) # q ← q - hᵢₖvᵢ end # Reorthogonalization of the Krylov basis. if reorthogonalization for i = 1 : inner_iter - Htmp = @kdot(n, V[i], q) + Htmp = kdot(n, V[i], q) U[nr+i] += Htmp - @kaxpy!(n, -Htmp, V[i], q) + kaxpy!(n, -Htmp, V[i], q) end end # Compute hₖ₊₁.ₖ - Hbis = @knrm2(n, q) # hₖ₊₁.ₖ = ‖vₖ₊₁‖₂ + Hbis = knorm(n, q) # hₖ₊₁.ₖ = ‖vₖ₊₁‖₂ # Update the LU factorization of Hₖ. if inner_iter ≥ 2 @@ -311,13 +311,13 @@ kwargs_fom = (:M, :N, :ldiv, :restart, :reorthogonalization, :atol, :rtol, :itma # Form xₖ = NVₖyₖ for i = 1 : inner_iter - @kaxpy!(n, y[i], V[i], xr) + kaxpy!(n, y[i], V[i], xr) end if !NisI - @kcopy!(n, solver.p, xr) # p ← xr + kcopy!(n, solver.p, xr) # p ← xr mulorldiv!(xr, N, solver.p, ldiv) end - restart && @kaxpy!(n, one(FC), xr, x) + restart && kaxpy!(n, one(FC), xr, x) # Update inner_itmax, iter, tired and overtimed variables. inner_itmax = inner_itmax - inner_iter @@ -336,7 +336,7 @@ kwargs_fom = (:M, :N, :ldiv, :restart, :reorthogonalization, :atol, :rtol, :itma overtimed && (status = "time limit exceeded") # Update x - warm_start && !restart && @kaxpy!(n, one(FC), Δx, x) + warm_start && !restart && kaxpy!(n, one(FC), Δx, x) solver.warm_start = false # Update stats diff --git a/src/gmres.jl b/src/gmres.jl index 4c8d14d45..ce07382e8 100644 --- a/src/gmres.jl +++ b/src/gmres.jl @@ -139,18 +139,18 @@ kwargs_gmres = (:M, :N, :ldiv, :restart, :reorthogonalization, :atol, :rtol, :it xr = restart ? Δx : x # Initial solution x₀. - @kfill!(x, zero(FC)) + kfill!(x, zero(FC)) # Initial residual r₀. if warm_start mul!(w, A, Δx) - @kaxpby!(n, one(FC), b, -one(FC), w) - restart && @kaxpy!(n, one(FC), Δx, x) + kaxpby!(n, one(FC), b, -one(FC), w) + restart && kaxpy!(n, one(FC), Δx, x) else - @kcopy!(n, w, b) # w ← b + kcopy!(n, w, b) # w ← b end MisI || mulorldiv!(r₀, M, w, ldiv) # r₀ = M(b - Ax₀) - β = @knrm2(n, r₀) # β = ‖r₀‖₂ + β = knorm(n, r₀) # β = ‖r₀‖₂ rNorm = β history && push!(rNorms, β) @@ -195,24 +195,24 @@ kwargs_gmres = (:M, :N, :ldiv, :restart, :reorthogonalization, :atol, :rtol, :it # Initialize workspace. nr = 0 # Number of coefficients stored in Rₖ. for i = 1 : mem - @kfill!(V[i], zero(FC)) # Orthogonal basis of Kₖ(MAN, Mr₀). + kfill!(V[i], zero(FC)) # Orthogonal basis of Kₖ(MAN, Mr₀). end - @kfill!(s, zero(FC)) # Givens sines used for the factorization QₖRₖ = Hₖ₊₁.ₖ. - @kfill!(c, zero(T)) # Givens cosines used for the factorization QₖRₖ = Hₖ₊₁.ₖ. - @kfill!(R, zero(FC)) # Upper triangular matrix Rₖ. - @kfill!(z, zero(FC)) # Right-hand of the least squares problem min ‖Hₖ₊₁.ₖyₖ - βe₁‖₂. + kfill!(s, zero(FC)) # Givens sines used for the factorization QₖRₖ = Hₖ₊₁.ₖ. + kfill!(c, zero(T)) # Givens cosines used for the factorization QₖRₖ = Hₖ₊₁.ₖ. + kfill!(R, zero(FC)) # Upper triangular matrix Rₖ. + kfill!(z, zero(FC)) # Right-hand of the least squares problem min ‖Hₖ₊₁.ₖyₖ - βe₁‖₂. if restart - @kfill!(xr, zero(FC)) # xr === Δx when restart is set to true + kfill!(xr, zero(FC)) # xr === Δx when restart is set to true if npass ≥ 1 mul!(w, A, x) - @kaxpby!(n, one(FC), b, -one(FC), w) + kaxpby!(n, one(FC), b, -one(FC), w) MisI || mulorldiv!(r₀, M, w, ldiv) end end # Initial ζ₁ and V₁ - β = @knrm2(n, r₀) + β = knorm(n, r₀) z[1] = β V[1] .= r₀ ./ rNorm @@ -241,21 +241,21 @@ kwargs_gmres = (:M, :N, :ldiv, :restart, :reorthogonalization, :atol, :rtol, :it mul!(w, A, p) # w ← ANvₖ MisI || mulorldiv!(q, M, w, ldiv) # q ← MANvₖ for i = 1 : inner_iter - R[nr+i] = @kdot(n, V[i], q) # hᵢₖ = (vᵢ)ᴴq - @kaxpy!(n, -R[nr+i], V[i], q) # q ← q - hᵢₖvᵢ + R[nr+i] = kdot(n, V[i], q) # hᵢₖ = (vᵢ)ᴴq + kaxpy!(n, -R[nr+i], V[i], q) # q ← q - hᵢₖvᵢ end # Reorthogonalization of the Krylov basis. if reorthogonalization for i = 1 : inner_iter - Htmp = @kdot(n, V[i], q) + Htmp = kdot(n, V[i], q) R[nr+i] += Htmp - @kaxpy!(n, -Htmp, V[i], q) + kaxpy!(n, -Htmp, V[i], q) end end # Compute hₖ₊₁.ₖ - Hbis = @knrm2(n, q) # hₖ₊₁.ₖ = ‖vₖ₊₁‖₂ + Hbis = knorm(n, q) # hₖ₊₁.ₖ = ‖vₖ₊₁‖₂ # Update the QR factorization of Hₖ₊₁.ₖ. # Apply previous Givens reflections Ωᵢ. @@ -328,13 +328,13 @@ kwargs_gmres = (:M, :N, :ldiv, :restart, :reorthogonalization, :atol, :rtol, :it # Form xₖ = NVₖyₖ for i = 1 : inner_iter - @kaxpy!(n, y[i], V[i], xr) + kaxpy!(n, y[i], V[i], xr) end if !NisI - @kcopy!(n, solver.p, xr) # p ← xr + kcopy!(n, solver.p, xr) # p ← xr mulorldiv!(xr, N, solver.p, ldiv) end - restart && @kaxpy!(n, one(FC), xr, x) + restart && kaxpy!(n, one(FC), xr, x) # Update inner_itmax, iter, tired and overtimed variables. inner_itmax = inner_itmax - inner_iter @@ -353,7 +353,7 @@ kwargs_gmres = (:M, :N, :ldiv, :restart, :reorthogonalization, :atol, :rtol, :it overtimed && (status = "time limit exceeded") # Update x - warm_start && !restart && @kaxpy!(n, one(FC), Δx, x) + warm_start && !restart && kaxpy!(n, one(FC), Δx, x) solver.warm_start = false # Update stats diff --git a/src/gpmr.jl b/src/gpmr.jl index f0db40825..53d13c33a 100644 --- a/src/gpmr.jl +++ b/src/gpmr.jl @@ -198,8 +198,8 @@ kwargs_gpmr = (:C, :D, :E, :F, :ldiv, :gsp, :λ, :μ, :reorthogonalization, :ato p = DisI ? dB : solver.p # Initial solutions x₀ and y₀. - @kfill!(x, zero(FC)) - @kfill!(y, zero(FC)) + kfill!(x, zero(FC)) + kfill!(y, zero(FC)) iter = 0 itmax == 0 && (itmax = m+n) @@ -209,41 +209,41 @@ kwargs_gpmr = (:C, :D, :E, :F, :ldiv, :gsp, :λ, :μ, :reorthogonalization, :ato mem = length(V) # Memory ωₖ = zero(FC) # Auxiliary variable to store fₖₖ for i = 1 : mem - @kfill!(V[i], zero(FC)) - @kfill!(U[i], zero(FC)) + kfill!(V[i], zero(FC)) + kfill!(U[i], zero(FC)) end - @kfill!(gs, zero(FC)) # Givens sines used for the factorization QₖRₖ = Sₖ₊₁.ₖ. - @kfill!(gc, zero(T)) # Givens cosines used for the factorization QₖRₖ = Sₖ₊₁.ₖ. - @kfill!(R , zero(FC)) # Upper triangular matrix Rₖ. - @kfill!(zt, zero(FC)) # Rₖzₖ = tₖ with (tₖ, τbar₂ₖ₊₁, τbar₂ₖ₊₂) = (Qₖ)ᴴ(βe₁ + γe₂). + kfill!(gs, zero(FC)) # Givens sines used for the factorization QₖRₖ = Sₖ₊₁.ₖ. + kfill!(gc, zero(T)) # Givens cosines used for the factorization QₖRₖ = Sₖ₊₁.ₖ. + kfill!(R , zero(FC)) # Upper triangular matrix Rₖ. + kfill!(zt, zero(FC)) # Rₖzₖ = tₖ with (tₖ, τbar₂ₖ₊₁, τbar₂ₖ₊₂) = (Qₖ)ᴴ(βe₁ + γe₂). # Warm-start # If λ ≠ 0, Cb₀ = Cb - CAΔy - λΔx because CM = Iₘ and E = Iₘ # E ≠ Iₘ is only allowed when λ = 0 because E⁻¹Δx can't be computed to use CME = Iₘ # Compute C(b - AΔy) - λΔx warm_start && mul!(b₀, A, Δy) - warm_start && @kaxpby!(m, one(FC), b, -one(FC), b₀) + warm_start && kaxpby!(m, one(FC), b, -one(FC), b₀) !CisI && mulorldiv!(q, C, b₀, ldiv) !CisI && (b₀ = q) - warm_start && (λ ≠ 0) && @kaxpy!(m, -λ, Δx, b₀) + warm_start && (λ ≠ 0) && kaxpy!(m, -λ, Δx, b₀) # If μ ≠ 0, Dc₀ = Dc - DBΔx - μΔy because DN = Iₙ and F = Iₙ # F ≠ Iₙ is only allowed when μ = 0 because F⁻¹Δy can't be computed to use DNF = Iₘ # Compute D(c - BΔx) - μΔy warm_start && mul!(c₀, B, Δx) - warm_start && @kaxpby!(n, one(FC), c, -one(FC), c₀) + warm_start && kaxpby!(n, one(FC), c, -one(FC), c₀) !DisI && mulorldiv!(p, D, c₀, ldiv) !DisI && (c₀ = p) - warm_start && (μ ≠ 0) && @kaxpy!(n, -μ, Δy, c₀) + warm_start && (μ ≠ 0) && kaxpy!(n, -μ, Δy, c₀) # Initialize the orthogonal Hessenberg reduction process. # βv₁ = Cb - β = @knrm2(m, b₀) + β = knorm(m, b₀) β ≠ 0 || error("b must be nonzero") V[1] .= b₀ ./ β # γu₁ = Dc - γ = @knrm2(n, c₀) + γ = knorm(n, c₀) γ ≠ 0 || error("c must be nonzero") U[1] .= c₀ ./ γ @@ -303,10 +303,10 @@ kwargs_gpmr = (:C, :D, :E, :F, :ldiv, :gsp, :λ, :μ, :reorthogonalization, :ato DisI || mulorldiv!(p, D, dB, ldiv) # p = DBEvₖ for i = 1 : iter - hᵢₖ = @kdot(m, V[i], q) # hᵢ.ₖ = (vᵢ)ᴴq - fᵢₖ = @kdot(n, U[i], p) # fᵢ.ₖ = (uᵢ)ᴴp - @kaxpy!(m, -hᵢₖ, V[i], q) # q ← q - hᵢ.ₖvᵢ - @kaxpy!(n, -fᵢₖ, U[i], p) # p ← p - fᵢ.ₖuᵢ + hᵢₖ = kdot(m, V[i], q) # hᵢ.ₖ = (vᵢ)ᴴq + fᵢₖ = kdot(n, U[i], p) # fᵢ.ₖ = (uᵢ)ᴴp + kaxpy!(m, -hᵢₖ, V[i], q) # q ← q - hᵢ.ₖvᵢ + kaxpy!(n, -fᵢₖ, U[i], p) # p ← p - fᵢ.ₖuᵢ R[nr₂ₖ + 2i-1] = hᵢₖ (i < iter) ? R[nr₂ₖ₋₁ + 2i] = fᵢₖ : ωₖ = fᵢₖ end @@ -314,17 +314,17 @@ kwargs_gpmr = (:C, :D, :E, :F, :ldiv, :gsp, :λ, :μ, :reorthogonalization, :ato # Reorthogonalization of the Krylov basis. if reorthogonalization for i = 1 : iter - Htmp = @kdot(m, V[i], q) # hₜₘₚ = (vᵢ)ᴴq - Ftmp = @kdot(n, U[i], p) # fₜₘₚ = (uᵢ)ᴴp - @kaxpy!(m, -Htmp, V[i], q) # q ← q - hₜₘₚvᵢ - @kaxpy!(n, -Ftmp, U[i], p) # p ← p - fₜₘₚuᵢ + Htmp = kdot(m, V[i], q) # hₜₘₚ = (vᵢ)ᴴq + Ftmp = kdot(n, U[i], p) # fₜₘₚ = (uᵢ)ᴴp + kaxpy!(m, -Htmp, V[i], q) # q ← q - hₜₘₚvᵢ + kaxpy!(n, -Ftmp, U[i], p) # p ← p - fₜₘₚuᵢ R[nr₂ₖ + 2i-1] += Htmp # hᵢ.ₖ = hᵢ.ₖ + hₜₘₚ (i < iter) ? R[nr₂ₖ₋₁ + 2i] += Ftmp : ωₖ += Ftmp # fᵢ.ₖ = fᵢ.ₖ + fₜₘₚ end end - Haux = @knrm2(m, q) # hₖ₊₁.ₖ = ‖q‖₂ - Faux = @knrm2(n, p) # fₖ₊₁.ₖ = ‖p‖₂ + Haux = knorm(m, q) # hₖ₊₁.ₖ = ‖q‖₂ + Faux = knorm(n, p) # fₖ₊₁.ₖ = ‖p‖₂ # Add regularization terms. R[nr₂ₖ₋₁ + 2k-1] = λ # S₂ₖ₋₁.₂ₖ₋₁ = λ @@ -461,7 +461,7 @@ kwargs_gpmr = (:C, :D, :E, :F, :ldiv, :gsp, :λ, :μ, :reorthogonalization, :ato V[k+1] .= q ./ Haux # hₖ₊₁.ₖvₖ₊₁ = q else # Breakdown -- hₖ₊₁.ₖ = ‖q‖₂ = 0 and Auₖ ∈ Span{v₁, ..., vₖ} - @kfill!(V[k+1], zero(FC)) # vₖ₊₁ = 0 such that vₖ₊₁ ⊥ Span{v₁, ..., vₖ} + kfill!(V[k+1], zero(FC)) # vₖ₊₁ = 0 such that vₖ₊₁ ⊥ Span{v₁, ..., vₖ} end # fₖ₊₁.ₖ ≠ 0 @@ -469,7 +469,7 @@ kwargs_gpmr = (:C, :D, :E, :F, :ldiv, :gsp, :λ, :μ, :reorthogonalization, :ato U[k+1] .= p ./ Faux # fₖ₊₁.ₖuₖ₊₁ = p else # Breakdown -- fₖ₊₁.ₖ = ‖p‖₂ = 0 and Bvₖ ∈ Span{u₁, ..., uₖ} - @kfill!(U[k+1], zero(FC)) # uₖ₊₁ = 0 such that uₖ₊₁ ⊥ Span{u₁, ..., uₖ} + kfill!(U[k+1], zero(FC)) # uₖ₊₁ = 0 such that uₖ₊₁ ⊥ Span{u₁, ..., uₖ} end zt[2k+1] = τbar₂ₖ₊₁ @@ -496,19 +496,19 @@ kwargs_gpmr = (:C, :D, :E, :F, :ldiv, :gsp, :λ, :μ, :reorthogonalization, :ato # Compute xₖ and yₖ for i = 1 : iter - @kaxpy!(m, zt[2i-1], V[i], x) # xₖ = ζ₁v₁ + ζ₃v₂ + ••• + ζ₂ₖ₋₁vₖ - @kaxpy!(n, zt[2i] , U[i], y) # xₖ = ζ₂u₁ + ζ₄u₂ + ••• + ζ₂ₖuₖ + kaxpy!(m, zt[2i-1], V[i], x) # xₖ = ζ₁v₁ + ζ₃v₂ + ••• + ζ₂ₖ₋₁vₖ + kaxpy!(n, zt[2i] , U[i], y) # xₖ = ζ₂u₁ + ζ₄u₂ + ••• + ζ₂ₖuₖ end if !EisI - @kcopy!(m, wB, x) # wB ← x + kcopy!(m, wB, x) # wB ← x mulorldiv!(x, E, wB, ldiv) end if !FisI - @kcopy!(n, wA, y) # wA ← y + kcopy!(n, wA, y) # wA ← y mulorldiv!(y, F, wA, ldiv) end - warm_start && @kaxpy!(m, one(FC), Δx, x) - warm_start && @kaxpy!(n, one(FC), Δy, y) + warm_start && kaxpy!(m, one(FC), Δx, x) + warm_start && kaxpy!(n, one(FC), Δy, y) solver.warm_start = false # Termination status diff --git a/src/krylov_processes.jl b/src/krylov_processes.jl index 1541d0ab9..98fb57b84 100644 --- a/src/krylov_processes.jl +++ b/src/krylov_processes.jl @@ -52,7 +52,7 @@ function hermitian_lanczos(A, b::AbstractVector{FC}, k::Int) where FC <: FloatOr vᵢ = view(V,:,i) vᵢ₊₁ = q = view(V,:,i+1) if i == 1 - β₁ = @knrm2(n, b) + β₁ = knorm(n, b) vᵢ .= b ./ β₁ end mul!(q, A, vᵢ) @@ -60,12 +60,12 @@ function hermitian_lanczos(A, b::AbstractVector{FC}, k::Int) where FC <: FloatOr vᵢ₋₁ = view(V,:,i-1) βᵢ = nzval[pαᵢ-2] # βᵢ = Tᵢ.ᵢ₋₁ nzval[pαᵢ-1] = βᵢ # Tᵢ₋₁.ᵢ = βᵢ - @kaxpy!(n, -βᵢ, vᵢ₋₁, q) + kaxpy!(n, -βᵢ, vᵢ₋₁, q) end - αᵢ = @kdotr(n, vᵢ, q) + αᵢ = kdotr(n, vᵢ, q) nzval[pαᵢ] = αᵢ # Tᵢ.ᵢ = αᵢ - @kaxpy!(n, -αᵢ, vᵢ, q) - βᵢ₊₁ = @knrm2(n, q) + kaxpy!(n, -αᵢ, vᵢ, q) + βᵢ₊₁ = knorm(n, q) nzval[pαᵢ+1] = βᵢ₊₁ # Tᵢ₊₁.ᵢ = βᵢ₊₁ vᵢ₊₁ .= q ./ βᵢ₊₁ pαᵢ = pαᵢ + 3 @@ -135,7 +135,7 @@ function nonhermitian_lanczos(A, b::AbstractVector{FC}, c::AbstractVector{FC}, k vᵢ₊₁ = q = view(V,:,i+1) uᵢ₊₁ = p = view(U,:,i+1) if i == 1 - cᴴb = @kdot(n, c, b) + cᴴb = kdot(n, c, b) β₁ = √(abs(cᴴb)) γ₁ᴴ = conj(cᴴb / β₁) vᵢ .= b ./ β₁ @@ -148,15 +148,15 @@ function nonhermitian_lanczos(A, b::AbstractVector{FC}, c::AbstractVector{FC}, k uᵢ₋₁ = view(U,:,i-1) βᵢ = nzval_T[pαᵢ-2] # βᵢ = Tᵢ.ᵢ₋₁ γᵢ = nzval_T[pαᵢ-1] # γᵢ = Tᵢ₋₁.ᵢ - @kaxpy!(n, - γᵢ , vᵢ₋₁, q) - @kaxpy!(n, -conj(βᵢ), uᵢ₋₁, p) + kaxpy!(n, - γᵢ , vᵢ₋₁, q) + kaxpy!(n, -conj(βᵢ), uᵢ₋₁, p) end - αᵢ = @kdot(n, uᵢ, q) + αᵢ = kdot(n, uᵢ, q) nzval_T[pαᵢ] = αᵢ # Tᵢ.ᵢ = αᵢ nzval_Tᴴ[pαᵢ] = conj(αᵢ) # Tᴴᵢ.ᵢ = ᾱᵢ - @kaxpy!(m, - αᵢ , vᵢ, q) - @kaxpy!(n, -conj(αᵢ), uᵢ, p) - pᴴq = @kdot(n, p, q) + kaxpy!(m, - αᵢ , vᵢ, q) + kaxpy!(n, -conj(αᵢ), uᵢ, p) + pᴴq = kdot(n, p, q) βᵢ₊₁ = √(abs(pᴴq)) γᵢ₊₁ = pᴴq / βᵢ₊₁ vᵢ₊₁ .= q ./ βᵢ₊₁ @@ -209,24 +209,24 @@ function arnoldi(A, b::AbstractVector{FC}, k::Int; reorthogonalization::Bool=fal vⱼ = view(V,:,j) vⱼ₊₁ = q = view(V,:,j+1) if j == 1 - β = @knrm2(n, b) + β = knorm(n, b) vⱼ .= b ./ β end mul!(q, A, vⱼ) for i = 1:j vᵢ = view(V,:,i) - H[i,j] = @kdot(n, vᵢ, q) - @kaxpy!(n, -H[i,j], vᵢ, q) + H[i,j] = kdot(n, vᵢ, q) + kaxpy!(n, -H[i,j], vᵢ, q) end if reorthogonalization for i = 1:j vᵢ = view(V,:,i) - Htmp = @kdot(n, vᵢ, q) - @kaxpy!(n, -Htmp, vᵢ, q) + Htmp = kdot(n, vᵢ, q) + kaxpy!(n, -Htmp, vᵢ, q) H[i,j] += Htmp end end - H[j+1,j] = @knrm2(n, q) + H[j+1,j] = knorm(n, q) vⱼ₊₁ .= q ./ H[j+1,j] end return V, β, H @@ -290,21 +290,21 @@ function golub_kahan(A, b::AbstractVector{FC}, k::Int) where FC <: FloatOrComple vᵢ₊₁ = p = view(V,:,i+1) if i == 1 wᵢ = vᵢ - β₁ = @knrm2(m, b) + β₁ = knorm(m, b) uᵢ .= b ./ β₁ mul!(wᵢ, Aᴴ, uᵢ) - αᵢ = @knrm2(n, wᵢ) + αᵢ = knorm(n, wᵢ) nzval[pαᵢ] = αᵢ # Lᵢ.ᵢ = αᵢ vᵢ .= wᵢ ./ αᵢ end mul!(q, A, vᵢ) αᵢ = nzval[pαᵢ] # αᵢ = Lᵢ.ᵢ - @kaxpy!(m, -αᵢ, uᵢ, q) - βᵢ₊₁ = @knrm2(m, q) + kaxpy!(m, -αᵢ, uᵢ, q) + βᵢ₊₁ = knorm(m, q) uᵢ₊₁ .= q ./ βᵢ₊₁ mul!(p, Aᴴ, uᵢ₊₁) - @kaxpy!(n, -βᵢ₊₁, vᵢ, p) - αᵢ₊₁ = @knrm2(n, p) + kaxpy!(n, -βᵢ₊₁, vᵢ, p) + αᵢ₊₁ = knorm(n, p) vᵢ₊₁ .= p ./ αᵢ₊₁ nzval[pαᵢ+1] = βᵢ₊₁ # Lᵢ₊₁.ᵢ = βᵢ₊₁ nzval[pαᵢ+2] = αᵢ₊₁ # Lᵢ₊₁.ᵢ₊₁ = αᵢ₊₁ @@ -375,8 +375,8 @@ function saunders_simon_yip(A, b::AbstractVector{FC}, c::AbstractVector{FC}, k:: vᵢ₊₁ = q = view(V,:,i+1) uᵢ₊₁ = p = view(U,:,i+1) if i == 1 - β₁ = @knrm2(m, b) - γ₁ᴴ = @knrm2(n, c) + β₁ = knorm(m, b) + γ₁ᴴ = knorm(n, c) vᵢ .= b ./ β₁ uᵢ .= c ./ γ₁ᴴ end @@ -387,16 +387,16 @@ function saunders_simon_yip(A, b::AbstractVector{FC}, c::AbstractVector{FC}, k:: uᵢ₋₁ = view(U,:,i-1) βᵢ = nzval_T[pαᵢ-2] # βᵢ = Tᵢ.ᵢ₋₁ γᵢ = nzval_T[pαᵢ-1] # γᵢ = Tᵢ₋₁.ᵢ - @kaxpy!(m, -γᵢ, vᵢ₋₁, q) - @kaxpy!(n, -βᵢ, uᵢ₋₁, p) + kaxpy!(m, -γᵢ, vᵢ₋₁, q) + kaxpy!(n, -βᵢ, uᵢ₋₁, p) end - αᵢ = @kdot(m, vᵢ, q) + αᵢ = kdot(m, vᵢ, q) nzval_T[pαᵢ] = αᵢ # Tᵢ.ᵢ = αᵢ nzval_Tᴴ[pαᵢ] = conj(αᵢ) # Tᴴᵢ.ᵢ = ᾱᵢ - @kaxpy!(m, - αᵢ , vᵢ, q) - @kaxpy!(n, -conj(αᵢ), uᵢ, p) - βᵢ₊₁ = @knrm2(m, q) - γᵢ₊₁ = @knrm2(n, p) + kaxpy!(m, - αᵢ , vᵢ, q) + kaxpy!(n, -conj(αᵢ), uᵢ, p) + βᵢ₊₁ = knorm(m, q) + γᵢ₊₁ = knorm(n, p) vᵢ₊₁ .= q ./ βᵢ₊₁ uᵢ₊₁ .= p ./ γᵢ₊₁ nzval_T[pαᵢ+1] = βᵢ₊₁ # Tᵢ₊₁.ᵢ = βᵢ₊₁ @@ -456,8 +456,8 @@ function montoison_orban(A, B, b::AbstractVector{FC}, c::AbstractVector{FC}, k:: vⱼ₊₁ = q = view(V,:,j+1) uⱼ₊₁ = p = view(U,:,j+1) if j == 1 - β = @knrm2(m, b) - γ = @knrm2(n, c) + β = knorm(m, b) + γ = knorm(n, c) vⱼ .= b ./ β uⱼ .= c ./ γ end @@ -466,26 +466,26 @@ function montoison_orban(A, B, b::AbstractVector{FC}, c::AbstractVector{FC}, k:: for i = 1:j vᵢ = view(V,:,i) uᵢ = view(U,:,i) - H[i,j] = @kdot(m, vᵢ, q) - @kaxpy!(n, -H[i,j], vᵢ, q) - F[i,j] = @kdot(n, uᵢ, p) - @kaxpy!(m, -F[i,j], uᵢ, p) + H[i,j] = kdot(m, vᵢ, q) + kaxpy!(n, -H[i,j], vᵢ, q) + F[i,j] = kdot(n, uᵢ, p) + kaxpy!(m, -F[i,j], uᵢ, p) end if reorthogonalization for i = 1:j vᵢ = view(V,:,i) uᵢ = view(U,:,i) - Htmp = @kdot(m, vᵢ, q) - @kaxpy!(m, -Htmp, vᵢ, q) + Htmp = kdot(m, vᵢ, q) + kaxpy!(m, -Htmp, vᵢ, q) H[i,j] += Htmp - Ftmp = @kdot(n, uᵢ, p) - @kaxpy!(n, -Ftmp, uᵢ, p) + Ftmp = kdot(n, uᵢ, p) + kaxpy!(n, -Ftmp, uᵢ, p) F[i,j] += Ftmp end end - H[j+1,j] = @knrm2(m, q) + H[j+1,j] = knorm(m, q) vⱼ₊₁ .= q ./ H[j+1,j] - F[j+1,j] = @knrm2(n, p) + F[j+1,j] = knorm(n, p) uⱼ₊₁ .= p ./ F[j+1,j] end return V, β, H, U, γ, F diff --git a/src/krylov_utils.jl b/src/krylov_utils.jl index f19de8ce4..14c0e430c 100644 --- a/src/krylov_utils.jl +++ b/src/krylov_utils.jl @@ -305,32 +305,32 @@ ktimer(start_time::UInt64) = (time_ns() - start_time) / 1e9 mulorldiv!(y, P, x, ldiv::Bool) = ldiv ? ldiv!(y, P, x) : mul!(y, P, x) -kdot(n :: Integer, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasReal = BLAS.dot(n, x, dx, y, dy) -kdot(n :: Integer, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasComplex = BLAS.dotc(n, x, dx, y, dy) -kdot(n :: Integer, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: FloatOrComplex = dot(x, y) +kdot(n :: Integer, x :: Vector{T}, y :: Vector{T}) where T <: BLAS.BlasReal = BLAS.dot(n, x, 1, y, 1) +kdot(n :: Integer, x :: Vector{T}, y :: Vector{T}) where T <: BLAS.BlasComplex = BLAS.dotc(n, x, 1, y, 1) +kdot(n :: Integer, x :: AbstractVector{T}, y :: AbstractVector{T}) where T <: FloatOrComplex = dot(x, y) -kdotr(n :: Integer, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: AbstractFloat = kdot(n, x, dx, y, dy) -kdotr(n :: Integer, x :: AbstractVector{Complex{T}}, dx :: Integer, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = real(kdot(n, x, dx, y, dy)) +kdotr(n :: Integer, x :: AbstractVector{T}, y :: AbstractVector{T}) where T <: AbstractFloat = kdot(n, x, y) +kdotr(n :: Integer, x :: AbstractVector{Complex{T}}, y :: AbstractVector{Complex{T}}) where T <: AbstractFloat = real(kdot(n, x, y)) -knrm2(n :: Integer, x :: Vector{T}, dx :: Integer) where T <: BLAS.BlasFloat = BLAS.nrm2(n, x, dx) -knrm2(n :: Integer, x :: AbstractVector{T}, dx :: Integer) where T <: FloatOrComplex = norm(x) +knorm(n :: Integer, x :: Vector{T}) where T <: BLAS.BlasFloat = BLAS.nrm2(n, x, 1) +knorm(n :: Integer, x :: AbstractVector{T}) where T <: FloatOrComplex = norm(x) -kscal!(n :: Integer, s :: T, x :: Vector{T}, dx :: Integer) where T <: BLAS.BlasFloat = BLAS.scal!(n, s, x, dx) -kscal!(n :: Integer, s :: T, x :: AbstractVector{T}, dx :: Integer) where T <: FloatOrComplex = (x .*= s) -kscal!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer) where T <: AbstractFloat = kscal!(n, Complex{T}(s), x, dx) +kscal!(n :: Integer, s :: T, x :: Vector{T}) where T <: BLAS.BlasFloat = BLAS.scal!(n, s, x, 1) +kscal!(n :: Integer, s :: T, x :: AbstractVector{T}) where T <: FloatOrComplex = (x .*= s) +kscal!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}) where T <: AbstractFloat = kscal!(n, Complex{T}(s), x) -kaxpy!(n :: Integer, s :: T, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasFloat = BLAS.axpy!(n, s, x, dx, y, dy) -kaxpy!(n :: Integer, s :: T, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: FloatOrComplex = axpy!(s, x, y) -kaxpy!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = kaxpy!(n, Complex{T}(s), x, dx, y, dy) +kaxpy!(n :: Integer, s :: T, x :: Vector{T}, y :: Vector{T}) where T <: BLAS.BlasFloat = BLAS.axpy!(n, s, x, 1, y, 1) +kaxpy!(n :: Integer, s :: T, x :: AbstractVector{T}, y :: AbstractVector{T}) where T <: FloatOrComplex = axpy!(s, x, y) +kaxpy!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, y :: AbstractVector{Complex{T}}) where T <: AbstractFloat = kaxpy!(n, Complex{T}(s), x, y) -kaxpby!(n :: Integer, s :: T, x :: Vector{T}, dx :: Integer, t :: T, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasFloat = BLAS.axpby!(n, s, x, dx, t, y, dy) -kaxpby!(n :: Integer, s :: T, x :: AbstractVector{T}, dx :: Integer, t :: T, y :: AbstractVector{T}, dy :: Integer) where T <: FloatOrComplex = axpby!(s, x, t, y) -kaxpby!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer, t :: Complex{T}, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = kaxpby!(n, Complex{T}(s), x, dx, t, y, dy) -kaxpby!(n :: Integer, s :: Complex{T}, x :: AbstractVector{Complex{T}}, dx :: Integer, t :: T, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = kaxpby!(n, s, x, dx, Complex{T}(t), y, dy) -kaxpby!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer, t :: T, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = kaxpby!(n, Complex{T}(s), x, dx, Complex{T}(t), y, dy) +kaxpby!(n :: Integer, s :: T, x :: Vector{T}, t :: T, y :: Vector{T}) where T <: BLAS.BlasFloat = BLAS.axpby!(n, s, x, 1, t, y, 1) +kaxpby!(n :: Integer, s :: T, x :: AbstractVector{T}, t :: T, y :: AbstractVector{T}) where T <: FloatOrComplex = axpby!(s, x, t, y) +kaxpby!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, t :: Complex{T}, y :: AbstractVector{Complex{T}}) where T <: AbstractFloat = kaxpby!(n, Complex{T}(s), x, t, y) +kaxpby!(n :: Integer, s :: Complex{T}, x :: AbstractVector{Complex{T}}, t :: T, y :: AbstractVector{Complex{T}}) where T <: AbstractFloat = kaxpby!(n, s, x, Complex{T}(t), y) +kaxpby!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, t :: T, y :: AbstractVector{Complex{T}}) where T <: AbstractFloat = kaxpby!(n, Complex{T}(s), x, Complex{T}(t), y) -kcopy!(n :: Integer, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasFloat = BLAS.blascopy!(n, x, dx, y, dy) -kcopy!(n :: Integer, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: FloatOrComplex = copyto!(y, x) +kcopy!(n :: Integer, y :: Vector{T}, x :: Vector{T}) where T <: BLAS.BlasFloat = BLAS.blascopy!(n, x, 1, y, 1) +kcopy!(n :: Integer, y :: AbstractVector{T}, x :: AbstractVector{T}) where T <: FloatOrComplex = copyto!(y, x) kfill!(x :: AbstractArray{T}, val :: T) where T <: FloatOrComplex = fill!(x, val) @@ -338,52 +338,9 @@ kgeqrf!(A :: AbstractMatrix{T}, tau :: AbstractVector{T}) where T <: BLAS.BlasFl korgqr!(A :: AbstractMatrix{T}, tau :: AbstractVector{T}) where T <: BLAS.BlasFloat = LAPACK.orgqr!(A, tau) kormqr!(side :: Char, trans :: Char, A :: AbstractMatrix{T}, tau :: AbstractVector{T}, C :: AbstractMatrix{T}) where T <: BLAS.BlasFloat = LAPACK.ormqr!(side, trans, A, tau, C) -macro kgeqrf!(A, tau) - return esc(:(Krylov.kgeqrf!($A, $tau))) -end - -macro korgqr!(A, tau) - return esc(:(Krylov.korgqr!($A, $tau))) -end - -macro kormqr!(side, trans, A, tau, C) - return esc(:(Krylov.kormqr!($side, $trans, $A, $tau, $C))) -end - -# the macros are just for readability, so we don't have to write the increments (always equal to 1) -macro kdot(n, x, y) - return esc(:(Krylov.kdot($n, $x, 1, $y, 1))) -end - -macro kdotr(n, x, y) - return esc(:(Krylov.kdotr($n, $x, 1, $y, 1))) -end +kref!(n, x, y, c, s) = reflect!(x, y, c, s) -macro knrm2(n, x) - return esc(:(Krylov.knrm2($n, $x, 1))) -end - -macro kscal!(n, s, x) - return esc(:(Krylov.kscal!($n, $s, $x, 1))) -end - -macro kaxpy!(n, s, x, y) - return esc(:(Krylov.kaxpy!($n, $s, $x, 1, $y, 1))) -end - -macro kaxpby!(n, s, x, t, y) - return esc(:(Krylov.kaxpby!($n, $s, $x, 1, $t, $y, 1))) -end - -macro kcopy!(n, y, x) - return esc(:(Krylov.kcopy!($n, $x, 1, $y, 1))) -end - -macro kfill!(x, val) - return esc(:(Krylov.kfill!($x, $val))) -end - -macro kswap(x, y) +macro kswap!(x, y) quote local tmp = $(esc(x)) $(esc(x)) = $(esc(y)) @@ -391,10 +348,6 @@ macro kswap(x, y) end end -macro kref!(n, x, y, c, s) - return esc(:(reflect!($x, $y, $c, $s))) -end - """ roots = to_boundary(n, x, d, radius; flip, xNorm2, dNorm2) @@ -416,9 +369,9 @@ function to_boundary(n :: Int, x :: AbstractVector{FC}, d :: AbstractVector{FC}, if M === I # ‖d‖² σ² + (xᴴd + dᴴx) σ + (‖x‖² - Δ²). - rxd = @kdotr(n, x, d) - dNorm2 == zero(T) && (dNorm2 = @kdotr(n, d, d)) - xNorm2 == zero(T) && (xNorm2 = @kdotr(n, x, x)) + rxd = kdotr(n, x, d) + dNorm2 == zero(T) && (dNorm2 = kdotr(n, d, d)) + xNorm2 == zero(T) && (xNorm2 = kdotr(n, x, x)) else # (dᴴMd) σ² + (xᴴMd + dᴴMx) σ + (xᴴMx - Δ²). mulorldiv!(z, M, x, ldiv) diff --git a/src/lnlq.jl b/src/lnlq.jl index 1ca359b54..58a3cf611 100644 --- a/src/lnlq.jl +++ b/src/lnlq.jl @@ -198,10 +198,10 @@ kwargs_lnlq = (:M, :N, :ldiv, :transfer_to_craig, :sqd, :λ, :σ, :utolx, :utoly complex_error_bnd = false # Initial solutions (x₀, y₀) and residual norm ‖r₀‖. - @kfill!(x, zero(FC)) - @kfill!(y, zero(FC)) + kfill!(x, zero(FC)) + kfill!(y, zero(FC)) - bNorm = @knrm2(m, b) + bNorm = knorm(m, b) if bNorm == 0 stats.niter = 0 stats.solved = true @@ -226,35 +226,35 @@ kwargs_lnlq = (:M, :N, :ldiv, :transfer_to_craig, :sqd, :λ, :σ, :utolx, :utoly # Initialize generalized Golub-Kahan bidiagonalization. # β₁Mu₁ = b. - @kcopy!(m, Mu, b) # Mu ← b + kcopy!(m, Mu, b) # Mu ← b MisI || mulorldiv!(u, M, Mu, ldiv) # u₁ = M⁻¹ * Mu₁ - βₖ = sqrt(@kdotr(m, u, Mu)) # β₁ = ‖u₁‖_M + βₖ = sqrt(kdotr(m, u, Mu)) # β₁ = ‖u₁‖_M if βₖ ≠ 0 - @kscal!(m, one(FC) / βₖ, u) - MisI || @kscal!(m, one(FC) / βₖ, Mu) + kscal!(m, one(FC) / βₖ, u) + MisI || kscal!(m, one(FC) / βₖ, Mu) end # α₁Nv₁ = Aᴴu₁. mul!(Aᴴu, Aᴴ, u) - @kcopy!(n, Nv, Aᴴu) # Nv ← Aᴴu + kcopy!(n, Nv, Aᴴu) # Nv ← Aᴴu NisI || mulorldiv!(v, N, Nv, ldiv) # v₁ = N⁻¹ * Nv₁ - αₖ = sqrt(@kdotr(n, v, Nv)) # α₁ = ‖v₁‖_N + αₖ = sqrt(kdotr(n, v, Nv)) # α₁ = ‖v₁‖_N if αₖ ≠ 0 - @kscal!(n, one(FC) / αₖ, v) - NisI || @kscal!(n, one(FC) / αₖ, Nv) + kscal!(n, one(FC) / αₖ, v) + NisI || kscal!(n, one(FC) / αₖ, Nv) end - @kcopy!(m, w̄, u) # Direction w̄₁ + kcopy!(m, w̄, u) # Direction w̄₁ cₖ = zero(T) # Givens cosines used for the LQ factorization of (Lₖ)ᴴ sₖ = zero(FC) # Givens sines used for the LQ factorization of (Lₖ)ᴴ ζₖ₋₁ = zero(FC) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ ηₖ = zero(FC) # Coefficient of M̅ₖ # Variable used for the regularization. - λₖ = λ # λ₁ = λ - cpₖ = spₖ = one(T) # Givens sines and cosines used to zero out λₖ - cdₖ = sdₖ = one(FC) # Givens sines and cosines used to define λₖ₊₁ - λ > 0 && @kcopy!(n, q, v) # Additional vector needed to update x, by definition q₀ = 0 + λₖ = λ # λ₁ = λ + cpₖ = spₖ = one(T) # Givens sines and cosines used to zero out λₖ + cdₖ = sdₖ = one(FC) # Givens sines and cosines used to define λₖ₊₁ + λ > 0 && kcopy!(n, q, v) # Additional vector needed to update x, by definition q₀ = 0 # Initialize the regularization. if λ > 0 @@ -264,7 +264,7 @@ kwargs_lnlq = (:M, :N, :ldiv, :transfer_to_craig, :sqd, :λ, :σ, :utolx, :utoly (cpₖ, spₖ, αhatₖ) = sym_givens(αₖ, λₖ) # q̄₁ = sp₁ * v₁ - @kscal!(n, spₖ, q) + kscal!(n, spₖ, q) else αhatₖ = αₖ end @@ -316,15 +316,15 @@ kwargs_lnlq = (:M, :N, :ldiv, :transfer_to_craig, :sqd, :λ, :σ, :utolx, :utoly # Update of (xᵃᵘˣ)ₖ = Vₖtₖ if λ > 0 # (xᵃᵘˣ)ₖ ← (xᵃᵘˣ)ₖ₋₁ + τₖ * (cpₖvₖ + spₖqₖ₋₁) - @kaxpy!(n, τₖ * cpₖ, v, x) + kaxpy!(n, τₖ * cpₖ, v, x) if iter ≥ 2 - @kaxpy!(n, τₖ * spₖ, q, x) + kaxpy!(n, τₖ * spₖ, q, x) # q̄ₖ ← spₖ * vₖ - cpₖ * qₖ₋₁ - @kaxpby!(n, spₖ, v, -cpₖ, q) + kaxpby!(n, spₖ, v, -cpₖ, q) end else # (xᵃᵘˣ)ₖ ← (xᵃᵘˣ)ₖ₋₁ + τₖ * vₖ - @kaxpy!(n, τₖ, v, x) + kaxpy!(n, τₖ, v, x) end # Continue the generalized Golub-Kahan bidiagonalization. @@ -344,22 +344,22 @@ kwargs_lnlq = (:M, :N, :ldiv, :transfer_to_craig, :sqd, :λ, :σ, :utolx, :utoly # βₖ₊₁Muₖ₊₁ = Avₖ - αₖMuₖ mul!(Av, A, v) - @kaxpby!(m, one(FC), Av, -αₖ, Mu) + kaxpby!(m, one(FC), Av, -αₖ, Mu) MisI || mulorldiv!(u, M, Mu, ldiv) # uₖ₊₁ = M⁻¹ * Muₖ₊₁ - βₖ₊₁ = sqrt(@kdotr(m, u, Mu)) # βₖ₊₁ = ‖uₖ₊₁‖_M + βₖ₊₁ = sqrt(kdotr(m, u, Mu)) # βₖ₊₁ = ‖uₖ₊₁‖_M if βₖ₊₁ ≠ 0 - @kscal!(m, one(FC) / βₖ₊₁, u) - MisI || @kscal!(m, one(FC) / βₖ₊₁, Mu) + kscal!(m, one(FC) / βₖ₊₁, u) + MisI || kscal!(m, one(FC) / βₖ₊₁, Mu) end # αₖ₊₁Nvₖ₊₁ = Aᴴuₖ₊₁ - βₖ₊₁Nvₖ mul!(Aᴴu, Aᴴ, u) - @kaxpby!(n, one(FC), Aᴴu, -βₖ₊₁, Nv) + kaxpby!(n, one(FC), Aᴴu, -βₖ₊₁, Nv) NisI || mulorldiv!(v, N, Nv, ldiv) # vₖ₊₁ = N⁻¹ * Nvₖ₊₁ - αₖ₊₁ = sqrt(@kdotr(n, v, Nv)) # αₖ₊₁ = ‖vₖ₊₁‖_N + αₖ₊₁ = sqrt(kdotr(n, v, Nv)) # αₖ₊₁ = ‖vₖ₊₁‖_N if αₖ₊₁ ≠ 0 - @kscal!(n, one(FC) / αₖ₊₁, v) - NisI || @kscal!(n, one(FC) / αₖ₊₁, Nv) + kscal!(n, one(FC) / αₖ₊₁, v) + NisI || kscal!(n, one(FC) / αₖ₊₁, Nv) end # Continue the regularization. @@ -376,7 +376,7 @@ kwargs_lnlq = (:M, :N, :ldiv, :transfer_to_craig, :sqd, :λ, :σ, :utolx, :utoly (cdₖ, sdₖ, λₖ₊₁) = sym_givens(λ, θₖ₊₁) # qₖ ← sdₖ * q̄ₖ - @kscal!(n, sdₖ, q) + kscal!(n, sdₖ, q) # k+1 2k+1 k+1 2k+1 k+1 2k+1 # k+1 [ αₖ₊₁ λₖ₊₁ ] [ cpₖ₊₁ spₖ₊₁ ] = [ αhatₖ₊₁ 0 ] @@ -428,11 +428,11 @@ kwargs_lnlq = (:M, :N, :ldiv, :transfer_to_craig, :sqd, :λ, :σ, :utolx, :utoly # [sₖ₊₁ -cₖ₊₁] → w̄ₖ₊₁ = sₖ₊₁ * w̄ₖ - cₖ₊₁ * uₖ₊₁ # (yᴸ)ₖ₊₁ ← (yᴸ)ₖ + ζₖ * wₖ - @kaxpy!(m, ζₖ * cₖ₊₁, w̄, y) - @kaxpy!(m, ζₖ * sₖ₊₁, u, y) + kaxpy!(m, ζₖ * cₖ₊₁, w̄, y) + kaxpy!(m, ζₖ * sₖ₊₁, u, y) # Compute w̄ₖ₊₁ - @kaxpby!(m, -cₖ₊₁, u, sₖ₊₁, w̄) + kaxpby!(m, -cₖ₊₁, u, sₖ₊₁, w̄) if σₑₛₜ > 0 && !complex_error_bnd if transfer_to_craig @@ -509,26 +509,26 @@ kwargs_lnlq = (:M, :N, :ldiv, :transfer_to_craig, :sqd, :λ, :σ, :utolx, :utoly if solved_cg if λ > 0 # (xᶜ)ₖ ← (xᵃᵘˣ)ₖ₋₁ + τₖ * (cpₖvₖ + spₖqₖ₋₁) - @kaxpy!(n, τₖ * cpₖ, v, x) + kaxpy!(n, τₖ * cpₖ, v, x) if iter ≥ 2 - @kaxpy!(n, τₖ * spₖ, q, x) + kaxpy!(n, τₖ * spₖ, q, x) end else # (xᶜ)ₖ ← (xᵃᵘˣ)ₖ₋₁ + τₖ * vₖ - @kaxpy!(n, τₖ, v, x) + kaxpy!(n, τₖ, v, x) end # (yᶜ)ₖ ← (yᴸ)ₖ₋₁ + ζbarₖ * w̄ₖ - @kaxpy!(m, ζbarₖ, w̄, y) + kaxpy!(m, ζbarₖ, w̄, y) else if λ > 0 # (xᴸ)ₖ ← (xᵃᵘˣ)ₖ₋₁ + ηₖζₖ₋₁ * (cpₖvₖ + spₖqₖ₋₁) - @kaxpy!(n, ηₖ * ζₖ₋₁ * cpₖ, v, x) + kaxpy!(n, ηₖ * ζₖ₋₁ * cpₖ, v, x) if iter ≥ 2 - @kaxpy!(n, ηₖ * ζₖ₋₁ * spₖ, q, x) + kaxpy!(n, ηₖ * ζₖ₋₁ * spₖ, q, x) end else # (xᴸ)ₖ ← (xᵃᵘˣ)ₖ₋₁ + ηₖζₖ₋₁ * vₖ - @kaxpy!(n, ηₖ * ζₖ₋₁, v, x) + kaxpy!(n, ηₖ * ζₖ₋₁, v, x) end end diff --git a/src/lslq.jl b/src/lslq.jl index 76ba2b052..c6b5bd42a 100644 --- a/src/lslq.jl +++ b/src/lslq.jl @@ -222,13 +222,13 @@ kwargs_lslq = (:M, :N, :ldiv, :transfer_to_lsqr, :sqd, :λ, :σ, :etol, :utol, : λ² = λ * λ ctol = conlim > 0 ? 1/conlim : zero(T) - @kfill!(x, zero(FC)) # LSLQ point + kfill!(x, zero(FC)) # LSLQ point # Initialize Golub-Kahan process. # β₁ M u₁ = b. - @kcopy!(m, Mu, b) # Mu ← b + kcopy!(m, Mu, b) # Mu ← b MisI || mulorldiv!(u, M, Mu, ldiv) - β₁ = sqrt(@kdotr(m, u, Mu)) + β₁ = sqrt(kdotr(m, u, Mu)) if β₁ == 0 stats.niter = 0 stats.solved, stats.inconsistent = true, false @@ -241,12 +241,12 @@ kwargs_lslq = (:M, :N, :ldiv, :transfer_to_lsqr, :sqd, :λ, :σ, :etol, :utol, : end β = β₁ - @kscal!(m, one(FC)/β₁, u) - MisI || @kscal!(m, one(FC)/β₁, Mu) + kscal!(m, one(FC)/β₁, u) + MisI || kscal!(m, one(FC)/β₁, Mu) mul!(Aᴴu, Aᴴ, u) - @kcopy!(n, Nv, Aᴴu) # Nv ← Aᴴu + kcopy!(n, Nv, Aᴴu) # Nv ← Aᴴu NisI || mulorldiv!(v, N, Nv, ldiv) - α = sqrt(@kdotr(n, v, Nv)) # = α₁ + α = sqrt(kdotr(n, v, Nv)) # = α₁ # Aᴴb = 0 so x = 0 is a minimum least-squares solution if α == 0 @@ -259,8 +259,8 @@ kwargs_lslq = (:M, :N, :ldiv, :transfer_to_lsqr, :sqd, :λ, :σ, :etol, :utol, : stats.status = "x = 0 is a minimum least-squares solution" return solver end - @kscal!(n, one(FC)/α, v) - NisI || @kscal!(n, one(FC)/α, Nv) + kscal!(n, one(FC)/α, v) + NisI || kscal!(n, one(FC)/α, Nv) Anorm = α Anorm² = α * α @@ -275,11 +275,11 @@ kwargs_lslq = (:M, :N, :ldiv, :transfer_to_lsqr, :sqd, :λ, :σ, :etol, :utol, : xcgNorm = zero(T) xcgNorm² = zero(T) - @kcopy!(n, w̄, v) # w̄₁ = v₁ + kcopy!(n, w̄, v) # w̄₁ = v₁ err_lbnd = zero(T) window = length(err_vec) - @kfill!(err_vec, zero(T)) + kfill!(err_vec, zero(T)) complex_error_bnd = false # Initialize other constants. @@ -324,21 +324,21 @@ kwargs_lslq = (:M, :N, :ldiv, :transfer_to_lsqr, :sqd, :λ, :σ, :etol, :utol, : # Generate next Golub-Kahan vectors. # 1. βₖ₊₁Muₖ₊₁ = Avₖ - αₖMuₖ mul!(Av, A, v) - @kaxpby!(m, one(FC), Av, -α, Mu) + kaxpby!(m, one(FC), Av, -α, Mu) MisI || mulorldiv!(u, M, Mu, ldiv) - β = sqrt(@kdotr(m, u, Mu)) + β = sqrt(kdotr(m, u, Mu)) if β ≠ 0 - @kscal!(m, one(FC)/β, u) - MisI || @kscal!(m, one(FC)/β, Mu) + kscal!(m, one(FC)/β, u) + MisI || kscal!(m, one(FC)/β, Mu) # 2. αₖ₊₁Nvₖ₊₁ = Aᴴuₖ₊₁ - βₖ₊₁Nvₖ mul!(Aᴴu, Aᴴ, u) - @kaxpby!(n, one(FC), Aᴴu, -β, Nv) + kaxpby!(n, one(FC), Aᴴu, -β, Nv) NisI || mulorldiv!(v, N, Nv, ldiv) - α = sqrt(@kdotr(n, v, Nv)) + α = sqrt(kdotr(n, v, Nv)) if α ≠ 0 - @kscal!(n, one(FC)/α, v) - NisI || @kscal!(n, one(FC)/α, Nv) + kscal!(n, one(FC)/α, v) + NisI || kscal!(n, one(FC)/α, Nv) end # rotate out regularization term if present @@ -428,11 +428,11 @@ kwargs_lslq = (:M, :N, :ldiv, :transfer_to_lsqr, :sqd, :λ, :σ, :etol, :utol, : tol = btol + atol * Anorm * xlqNorm / β₁ # update LSLQ point for next iteration - @kaxpy!(n, c * ζ, w̄, x) - @kaxpy!(n, s * ζ, v, x) + kaxpy!(n, c * ζ, w̄, x) + kaxpy!(n, s * ζ, v, x) # compute w̄ - @kaxpby!(n, -c, v, s, w̄) + kaxpby!(n, -c, v, s, w̄) xlqNorm² += ζ * ζ xlqNorm = sqrt(xlqNorm²) @@ -440,7 +440,7 @@ kwargs_lslq = (:M, :N, :ldiv, :transfer_to_lsqr, :sqd, :λ, :σ, :etol, :utol, : # check stopping condition based on forward error lower bound err_vec[mod(iter, window) + 1] = ζ if iter ≥ window - err_lbnd = @knrm2(window, err_vec) + err_lbnd = knorm(window, err_vec) history && push!(err_lbnds, err_lbnd) fwd_err_lbnd = err_lbnd ≤ etol * xlqNorm end @@ -479,7 +479,7 @@ kwargs_lslq = (:M, :N, :ldiv, :transfer_to_lsqr, :sqd, :λ, :σ, :etol, :utol, : (verbose > 0) && @printf(iostream, "\n") if transfer_to_lsqr # compute LSQR point - @kaxpy!(n, ζ̄ , w̄, x) + kaxpy!(n, ζ̄ , w̄, x) end # Termination status diff --git a/src/lsmr.jl b/src/lsmr.jl index 36db8b5af..14bb646c7 100644 --- a/src/lsmr.jl +++ b/src/lsmr.jl @@ -196,13 +196,13 @@ kwargs_lsmr = (:M, :N, :ldiv, :sqd, :λ, :radius, :etol, :axtol, :btol, :conlim, v = NisI ? Nv : solver.v ctol = conlim > 0 ? 1/conlim : zero(T) - @kfill!(x, zero(FC)) + kfill!(x, zero(FC)) # Initialize Golub-Kahan process. # β₁ M u₁ = b. - @kcopy!(m, Mu, b) # Mu ← b + kcopy!(m, Mu, b) # Mu ← b MisI || mulorldiv!(u, M, Mu, ldiv) - β₁ = sqrt(@kdotr(m, u, Mu)) + β₁ = sqrt(kdotr(m, u, Mu)) if β₁ == 0 stats.niter = 0 stats.solved, stats.inconsistent = true, false @@ -214,12 +214,12 @@ kwargs_lsmr = (:M, :N, :ldiv, :sqd, :λ, :radius, :etol, :axtol, :btol, :conlim, end β = β₁ - @kscal!(m, one(FC)/β₁, u) - MisI || @kscal!(m, one(FC)/β₁, Mu) + kscal!(m, one(FC)/β₁, u) + MisI || kscal!(m, one(FC)/β₁, Mu) mul!(Aᴴu, Aᴴ, u) - @kcopy!(n, Nv, Aᴴu) # Nv ← Aᴴu + kcopy!(n, Nv, Aᴴu) # Nv ← Aᴴu NisI || mulorldiv!(v, N, Nv, ldiv) - α = sqrt(@kdotr(n, v, Nv)) + α = sqrt(kdotr(n, v, Nv)) ζbar = α * β αbar = α @@ -255,7 +255,7 @@ kwargs_lsmr = (:M, :N, :ldiv, :sqd, :λ, :radius, :etol, :axtol, :btol, :conlim, xENorm² = zero(T) err_lbnd = zero(T) window = length(err_vec) - @kfill!(err_vec, zero(T)) + kfill!(err_vec, zero(T)) iter = 0 itmax == 0 && (itmax = m + n) @@ -271,11 +271,11 @@ kwargs_lsmr = (:M, :N, :ldiv, :sqd, :λ, :radius, :etol, :axtol, :btol, :conlim, stats.status = "x = 0 is a minimum least-squares solution" return solver end - @kscal!(n, one(FC)/α, v) - NisI || @kscal!(n, one(FC)/α, Nv) + kscal!(n, one(FC)/α, v) + NisI || kscal!(n, one(FC)/α, Nv) - @kcopy!(n, h, v) # h ← v - @kfill!(hbar, zero(FC)) + kcopy!(n, h, v) # h ← v + kfill!(hbar, zero(FC)) status = "unknown" on_boundary = false @@ -293,21 +293,21 @@ kwargs_lsmr = (:M, :N, :ldiv, :sqd, :λ, :radius, :etol, :axtol, :btol, :conlim, # Generate next Golub-Kahan vectors. # 1. βₖ₊₁Muₖ₊₁ = Avₖ - αₖMuₖ mul!(Av, A, v) - @kaxpby!(m, one(FC), Av, -α, Mu) + kaxpby!(m, one(FC), Av, -α, Mu) MisI || mulorldiv!(u, M, Mu, ldiv) - β = sqrt(@kdotr(m, u, Mu)) + β = sqrt(kdotr(m, u, Mu)) if β ≠ 0 - @kscal!(m, one(FC)/β, u) - MisI || @kscal!(m, one(FC)/β, Mu) + kscal!(m, one(FC)/β, u) + MisI || kscal!(m, one(FC)/β, Mu) # 2. αₖ₊₁Nvₖ₊₁ = Aᴴuₖ₊₁ - βₖ₊₁Nvₖ mul!(Aᴴu, Aᴴ, u) - @kaxpby!(n, one(FC), Aᴴu, -β, Nv) + kaxpby!(n, one(FC), Aᴴu, -β, Nv) NisI || mulorldiv!(v, N, Nv, ldiv) - α = sqrt(@kdotr(n, v, Nv)) + α = sqrt(kdotr(n, v, Nv)) if α ≠ 0 - @kscal!(n, one(FC)/α, v) - NisI || @kscal!(n, one(FC)/α, Nv) + kscal!(n, one(FC)/α, v) + NisI || kscal!(n, one(FC)/α, Nv) end end @@ -329,11 +329,11 @@ kwargs_lsmr = (:M, :N, :ldiv, :sqd, :λ, :radius, :etol, :axtol, :btol, :conlim, xENorm² = xENorm² + ζ * ζ err_vec[mod(iter, window) + 1] = ζ - iter ≥ window && (err_lbnd = @knrm2(window, err_vec)) + iter ≥ window && (err_lbnd = knorm(window, err_vec)) # Update h, hbar and x. - δ = θbar * ρ / (ρold * ρbarold) # δₖ = θbarₖ * ρₖ / (ρₖ₋₁ * ρbarₖ₋₁) - @kaxpby!(n, one(FC), h, -δ, hbar) # ĥₖ = hₖ - δₖ * ĥₖ₋₁ + δ = θbar * ρ / (ρold * ρbarold) # δₖ = θbarₖ * ρₖ / (ρₖ₋₁ * ρbarₖ₋₁) + kaxpby!(n, one(FC), h, -δ, hbar) # ĥₖ = hₖ - δₖ * ĥₖ₋₁ # if a trust-region constraint is given, compute step to the boundary # the step ϕ/ρ is not necessarily positive @@ -345,8 +345,8 @@ kwargs_lsmr = (:M, :N, :ldiv, :sqd, :λ, :radius, :etol, :axtol, :btol, :conlim, σ = σ > 0 ? min(σ, tmax) : max(σ, tmin) end - @kaxpy!(n, σ, hbar, x) # xₖ = xₖ₋₁ + σₖ * ĥₖ - @kaxpby!(n, one(FC), v, -θnew / ρ, h) # hₖ₊₁ = vₖ₊₁ - (θₖ₊₁/ρₖ) * hₖ + kaxpy!(n, σ, hbar, x) # xₖ = xₖ₋₁ + σₖ * ĥₖ + kaxpby!(n, one(FC), v, -θnew / ρ, h) # hₖ₊₁ = vₖ₊₁ - (θₖ₊₁/ρₖ) * hₖ # Estimate ‖r‖. βacute = chat * βdd @@ -380,7 +380,7 @@ kwargs_lsmr = (:M, :N, :ldiv, :sqd, :λ, :radius, :etol, :axtol, :btol, :conlim, # Test for convergence. ArNorm = abs(ζbar) history && push!(ArNorms, ArNorm) - xNorm = @knrm2(n, x) + xNorm = knorm(n, x) test1 = rNorm / β₁ test2 = ArNorm / (Anorm * rNorm) diff --git a/src/lsqr.jl b/src/lsqr.jl index 120174105..e1493dcce 100644 --- a/src/lsqr.jl +++ b/src/lsqr.jl @@ -193,13 +193,13 @@ kwargs_lsqr = (:M, :N, :ldiv, :sqd, :λ, :radius, :etol, :axtol, :btol, :conlim, λ² = λ * λ ctol = conlim > 0 ? 1/conlim : zero(T) - @kfill!(x, zero(FC)) + kfill!(x, zero(FC)) # Initialize Golub-Kahan process. # β₁ M u₁ = b. - @kcopy!(m, Mu, b) # Mu ← b + kcopy!(m, Mu, b) # Mu ← b MisI || mulorldiv!(u, M, Mu, ldiv) - β₁ = sqrt(@kdotr(m, u, Mu)) + β₁ = sqrt(kdotr(m, u, Mu)) if β₁ == 0 stats.niter = 0 stats.solved, stats.inconsistent = true, false @@ -211,12 +211,12 @@ kwargs_lsqr = (:M, :N, :ldiv, :sqd, :λ, :radius, :etol, :axtol, :btol, :conlim, end β = β₁ - @kscal!(m, one(FC)/β₁, u) - MisI || @kscal!(m, one(FC)/β₁, Mu) + kscal!(m, one(FC)/β₁, u) + MisI || kscal!(m, one(FC)/β₁, Mu) mul!(Aᴴu, Aᴴ, u) - @kcopy!(n, Nv, Aᴴu) # Nv ← Aᴴu + kcopy!(n, Nv, Aᴴu) # Nv ← Aᴴu NisI || mulorldiv!(v, N, Nv, ldiv) - Anorm² = @kdotr(n, v, Nv) + Anorm² = kdotr(n, v, Nv) Anorm = sqrt(Anorm²) α = Anorm Acond = zero(T) @@ -230,7 +230,7 @@ kwargs_lsqr = (:M, :N, :ldiv, :sqd, :λ, :radius, :etol, :axtol, :btol, :conlim, xENorm² = zero(T) err_lbnd = zero(T) window = length(err_vec) - @kfill!(err_vec, zero(T)) + kfill!(err_vec, zero(T)) iter = 0 itmax == 0 && (itmax = m + n) @@ -253,9 +253,9 @@ kwargs_lsqr = (:M, :N, :ldiv, :sqd, :λ, :radius, :etol, :axtol, :btol, :conlim, stats.status = "x = 0 is a minimum least-squares solution" return solver end - @kscal!(n, one(FC)/α, v) - NisI || @kscal!(n, one(FC)/α, Nv) - @kcopy!(n, w, v) # w ← v + kscal!(n, one(FC)/α, v) + NisI || kscal!(n, one(FC)/α, Nv) + kcopy!(n, w, v) # w ← v # Initialize other constants. ϕbar = β₁ @@ -281,23 +281,23 @@ kwargs_lsqr = (:M, :N, :ldiv, :sqd, :λ, :radius, :etol, :axtol, :btol, :conlim, # Generate next Golub-Kahan vectors. # 1. βₖ₊₁Muₖ₊₁ = Avₖ - αₖMuₖ mul!(Av, A, v) - @kaxpby!(m, one(FC), Av, -α, Mu) + kaxpby!(m, one(FC), Av, -α, Mu) MisI || mulorldiv!(u, M, Mu, ldiv) - β = sqrt(@kdotr(m, u, Mu)) + β = sqrt(kdotr(m, u, Mu)) if β ≠ 0 - @kscal!(m, one(FC)/β, u) - MisI || @kscal!(m, one(FC)/β, Mu) + kscal!(m, one(FC)/β, u) + MisI || kscal!(m, one(FC)/β, Mu) Anorm² = Anorm² + α * α + β * β # = ‖B_{k-1}‖² λ > 0 && (Anorm² += λ²) # 2. αₖ₊₁Nvₖ₊₁ = Aᴴuₖ₊₁ - βₖ₊₁Nvₖ mul!(Aᴴu, Aᴴ, u) - @kaxpby!(n, one(FC), Aᴴu, -β, Nv) + kaxpby!(n, one(FC), Aᴴu, -β, Nv) NisI || mulorldiv!(v, N, Nv, ldiv) - α = sqrt(@kdotr(n, v, Nv)) + α = sqrt(kdotr(n, v, Nv)) if α ≠ 0 - @kscal!(n, one(FC)/α, v) - NisI || @kscal!(n, one(FC)/α, Nv) + kscal!(n, one(FC)/α, v) + NisI || kscal!(n, one(FC)/α, Nv) end end @@ -325,12 +325,12 @@ kwargs_lsqr = (:M, :N, :ldiv, :sqd, :λ, :radius, :etol, :axtol, :btol, :conlim, xENorm² = xENorm² + ϕ * ϕ err_vec[mod(iter, window) + 1] = ϕ - iter ≥ window && (err_lbnd = @knrm2(window, err_vec)) + iter ≥ window && (err_lbnd = knorm(window, err_vec)) τ = s * ϕ θ = s * α ρbar = -c * α - dNorm² += @kdotr(n, w, w) / ρ^2 + dNorm² += kdotr(n, w, w) / ρ^2 # if a trust-region constraint is give, compute step to the boundary # the step ϕ/ρ is not necessarily positive @@ -342,8 +342,8 @@ kwargs_lsqr = (:M, :N, :ldiv, :sqd, :λ, :radius, :etol, :axtol, :btol, :conlim, σ = σ > 0 ? min(σ, tmax) : max(σ, tmin) end - @kaxpy!(n, σ, w, x) # x = x + ϕ / ρ * w - @kaxpby!(n, one(FC), v, -θ/ρ, w) # w = v - θ / ρ * w + kaxpy!(n, σ, w, x) # x = x + ϕ / ρ * w + kaxpby!(n, one(FC), v, -θ/ρ, w) # w = v - θ / ρ * w # Use a plane rotation on the right to eliminate the super-diagonal # element (θ) of the upper-bidiagonal matrix. diff --git a/src/minares.jl b/src/minares.jl index 1a185f46f..cce6baa41 100644 --- a/src/minares.jl +++ b/src/minares.jl @@ -131,33 +131,33 @@ kwargs_minares = (:M, :ldiv, :λ, :atol, :rtol, :Artol, :itmax, :timemax, :verbo iter = 0 itmax == 0 && (itmax = 2*n) - @kfill!(x, zero(FC)) # x₀ + kfill!(x, zero(FC)) # x₀ # Initialize the Lanczos process. # β₁v₁ = r₀ if warm_start mul!(vₖ, A, Δx) # r₀ = b - Ax₀ - (λ ≠ 0) && @kaxpy!(n, λ, Δx, vₖ) - @kaxpby!(n, one(FC), b, -one(FC), vₖ) + (λ ≠ 0) && kaxpy!(n, λ, Δx, vₖ) + kaxpby!(n, one(FC), b, -one(FC), vₖ) else - @kcopy!(n, vₖ, b) # r₀ = b + kcopy!(n, vₖ, b) # r₀ = b end - βₖ = @knrm2(n, vₖ) # β₁ = ‖v₁‖ + βₖ = knorm(n, vₖ) # β₁ = ‖v₁‖ if βₖ ≠ 0 - @kscal!(n, one(FC) / βₖ, vₖ) + kscal!(n, one(FC) / βₖ, vₖ) end β₁ = βₖ # β₂v₂ = (A + λI)v₁ - α₁v₁ mul!(vₖ₊₁, A, vₖ) if λ ≠ 0 - @kaxpy!(n, λ, vₖ, vₖ₊₁) + kaxpy!(n, λ, vₖ, vₖ₊₁) end - αₖ = @kdotr(n, vₖ, vₖ₊₁) # α₁ = (vₖ)ᵀ(A + λI)vₖ - @kaxpy!(n, -αₖ, vₖ, vₖ₊₁) - βₖ₊₁ = @knrm2(n, vₖ₊₁) # β₂ = ‖v₂‖ + αₖ = kdotr(n, vₖ, vₖ₊₁) # α₁ = (vₖ)ᵀ(A + λI)vₖ + kaxpy!(n, -αₖ, vₖ, vₖ₊₁) + βₖ₊₁ = knorm(n, vₖ₊₁) # β₂ = ‖v₂‖ if βₖ₊₁ ≠ 0 - @kscal!(n, one(FC) / βₖ₊₁, vₖ₊₁) + kscal!(n, one(FC) / βₖ₊₁, vₖ₊₁) end ξₖ₋₁ = zero(T) @@ -173,10 +173,10 @@ kwargs_minares = (:M, :ldiv, :λ, :atol, :rtol, :Artol, :itmax, :timemax, :verbo c̃₂ₖ₋₂ = s̃₂ₖ₋₂ = zero(T) c̃₂ₖ₋₁ = s̃₂ₖ₋₁ = zero(T) c̃₂ₖ = s̃₂ₖ = zero(T) - @kfill!(wₖ₋₂, zero(FC)) # Column k-2 of Wₖ = Vₖ(Rₖ)⁻¹ - @kfill!(wₖ₋₁, zero(FC)) # Column k-1 of Wₖ = Vₖ(Rₖ)⁻¹ - @kfill!(dₖ₋₂, zero(FC)) # Column k-2 of Dₖ = Wₖ(Uₖ)⁻¹ - @kfill!(dₖ₋₁, zero(FC)) # Column k-1 of Dₖ = Wₖ(Uₖ)⁻¹ + kfill!(wₖ₋₂, zero(FC)) # Column k-2 of Wₖ = Vₖ(Rₖ)⁻¹ + kfill!(wₖ₋₁, zero(FC)) # Column k-1 of Wₖ = Vₖ(Rₖ)⁻¹ + kfill!(dₖ₋₂, zero(FC)) # Column k-2 of Dₖ = Wₖ(Uₖ)⁻¹ + kfill!(dₖ₋₁, zero(FC)) # Column k-1 of Dₖ = Wₖ(Uₖ)⁻¹ β₁α₁ = βₖ * αₖ # Variable used to update zₖ β₁β₂ = βₖ * βₖ₊₁ # Variable used to update zₖ ϵₖ₋₂ = ϵₖ₋₁ = zero(T) @@ -241,23 +241,23 @@ kwargs_minares = (:M, :ldiv, :λ, :atol, :rtol, :Artol, :itmax, :timemax, :verbo # w₁ = v₁ / λ₁ if iter == 1 wₖ = wₖ₋₁ - @kaxpy!(n, one(T), vₖ, wₖ) - @kscal!(n, one(T) / λₖ, wₖ) + kaxpy!(n, one(T), vₖ, wₖ) + kscal!(n, one(T) / λₖ, wₖ) end # w₂ = (v₂ - γ₁w₁) / λ₂ if iter == 2 wₖ = wₖ₋₂ - @kaxpy!(n, -γₖ₋₁, wₖ₋₁, wₖ) - @kaxpy!(n, one(T), vₖ, wₖ) - @kscal!(n, one(T) / λₖ, wₖ) + kaxpy!(n, -γₖ₋₁, wₖ₋₁, wₖ) + kaxpy!(n, one(T), vₖ, wₖ) + kscal!(n, one(T) / λₖ, wₖ) end # wₖ = (vₖ - γₖ₋₁wₖ₋₁ - ϵₖ₋₂wₖ₋₂) / λₖ if iter ≥ 3 - @kscal!(n, -ϵₖ₋₂, wₖ₋₂) + kscal!(n, -ϵₖ₋₂, wₖ₋₂) wₖ = wₖ₋₂ - @kaxpy!(n, -γₖ₋₁, wₖ₋₁, wₖ) - @kaxpy!(n, one(T), vₖ, wₖ) - @kscal!(n, one(T) / λₖ, wₖ) + kaxpy!(n, -γₖ₋₁, wₖ₋₁, wₖ) + kaxpy!(n, one(T), vₖ, wₖ) + kscal!(n, one(T) / λₖ, wₖ) end # Continue the Lanczos process. @@ -265,19 +265,19 @@ kwargs_minares = (:M, :ldiv, :λ, :atol, :rtol, :Artol, :itmax, :timemax, :verbo # βₖ₊₂vₖ₊₂ = M(A + λI)vₖ₊₁ - αₖ₊₁vₖ₊₁ - βₖ₊₁vₖ if iter ≤ ℓ-1 mul!(q, A, vₖ₊₁) # q ← Avₖ - @kaxpby!(n, one(T), q, -βₖ₊₁, vₖ) # Forms vₖ₊₂ : vₖ ← Avₖ₊₁ - βₖ₊₁vₖ + kaxpby!(n, one(T), q, -βₖ₊₁, vₖ) # Forms vₖ₊₂ : vₖ ← Avₖ₊₁ - βₖ₊₁vₖ if λ ≠ 0 - @kaxpy!(n, λ, vₖ₊₁, vₖ) # vₖ ← vₖ + λvₖ₊₁ + kaxpy!(n, λ, vₖ₊₁, vₖ) # vₖ ← vₖ + λvₖ₊₁ end - αₖ₊₁ = @kdotr(n, vₖ, vₖ₊₁) # αₖ₊₁ = ⟨(A + λI)vₖ₊₁ - βₖ₊₁vₖ , vₖ₊₁⟩ - @kaxpy!(n, -αₖ₊₁, vₖ₊₁, vₖ) # vₖ ← vₖ - αₖ₊₁vₖ₊₁ - βₖ₊₂ = @knrm2(n, vₖ) # βₖ₊₂ = ‖vₖ₊₂‖ + αₖ₊₁ = kdotr(n, vₖ, vₖ₊₁) # αₖ₊₁ = ⟨(A + λI)vₖ₊₁ - βₖ₊₁vₖ , vₖ₊₁⟩ + kaxpy!(n, -αₖ₊₁, vₖ₊₁, vₖ) # vₖ ← vₖ - αₖ₊₁vₖ₊₁ + βₖ₊₂ = knorm(n, vₖ) # βₖ₊₂ = ‖vₖ₊₂‖ # Detection of early termination if βₖ₊₂ ≤ btol ℓ = iter + 1 else - @kscal!(n, one(FC) / βₖ₊₂, vₖ) + kscal!(n, one(FC) / βₖ₊₂, vₖ) end end @@ -388,27 +388,27 @@ kwargs_minares = (:M, :ldiv, :λ, :atol, :rtol, :Artol, :itmax, :timemax, :verbo # d₁ = w₁ / μ₁ if iter == 1 dₖ = dₖ₋₁ - @kaxpy!(n, one(T), wₖ, dₖ) - @kscal!(n, one(T) / μₖ, dₖ) + kaxpy!(n, one(T), wₖ, dₖ) + kscal!(n, one(T) / μₖ, dₖ) end # d₂ = (w₂ - ϕ₁d₁) / μ₂ if iter == 2 dₖ = dₖ₋₂ - @kaxpy!(n, -ϕₖ₋₁, dₖ₋₁, dₖ) - @kaxpy!(n, one(T), wₖ, dₖ) - @kscal!(n, one(T) / μₖ, dₖ) + kaxpy!(n, -ϕₖ₋₁, dₖ₋₁, dₖ) + kaxpy!(n, one(T), wₖ, dₖ) + kscal!(n, one(T) / μₖ, dₖ) end # dₖ = (wₖ - ϕₖ₋₁dₖ₋₁ - ρₖ₋₂dₖ₋₂) / μₖ if iter ≥ 3 - @kscal!(n, -ρₖ₋₂, dₖ₋₂) + kscal!(n, -ρₖ₋₂, dₖ₋₂) dₖ = dₖ₋₂ - @kaxpy!(n, -ϕₖ₋₁, dₖ₋₁, dₖ) - @kaxpy!(n, one(T), wₖ, dₖ) - @kscal!(n, one(T) / μₖ, dₖ) + kaxpy!(n, -ϕₖ₋₁, dₖ₋₁, dₖ) + kaxpy!(n, one(T), wₖ, dₖ) + kscal!(n, one(T) / μₖ, dₖ) end # x = Vₖyₖ = Dₖzₖ = x₋₁ + ζₖdₖ - @kaxpy!(n, ζₖ, dₖ, x) + kaxpy!(n, ζₖ, dₖ, x) # Update ‖Arₖ‖ estimate (iter ≤ ℓ-2) && (ArNorm = sqrt(ζbisₖ₊₁^2 + ζbarₖ₊₂^2)) @@ -528,10 +528,10 @@ kwargs_minares = (:M, :ldiv, :λ, :atol, :rtol, :Artol, :itmax, :timemax, :verbo user_requested_exit = callback(solver) :: Bool # Update variables - @kswap(vₖ, vₖ₊₁) + @kswap!(vₖ, vₖ₊₁) if iter ≥ 2 - @kswap(wₖ₋₂, wₖ₋₁) - @kswap(dₖ₋₂, dₖ₋₁) + @kswap!(wₖ₋₂, wₖ₋₁) + @kswap!(dₖ₋₂, dₖ₋₁) ϵₖ₋₂ = ϵₖ₋₁ c̃₂ₖ₋₄ = c̃₂ₖ₋₂ s̃₂ₖ₋₄ = s̃₂ₖ₋₂ @@ -572,7 +572,7 @@ kwargs_minares = (:M, :ldiv, :λ, :atol, :rtol, :Artol, :itmax, :timemax, :verbo overtimed && (status = "time limit exceeded") # Update x - warm_start && @kaxpy!(n, one(FC), Δx, x) + warm_start && kaxpy!(n, one(FC), Δx, x) solver.warm_start = false # Update stats diff --git a/src/minres.jl b/src/minres.jl index 472a8cf2d..2b5f6ce51 100644 --- a/src/minres.jl +++ b/src/minres.jl @@ -159,21 +159,21 @@ kwargs_minres = (:M, :ldiv, :λ, :atol, :rtol, :etol, :conlim, :itmax, :timemax, ctol = conlim > 0 ? 1 / conlim : zero(T) # Initial solution x₀ - @kfill!(x, zero(FC)) + kfill!(x, zero(FC)) if warm_start mul!(r1, A, Δx) - (λ ≠ 0) && @kaxpy!(n, λ, Δx, r1) - @kaxpby!(n, one(FC), b, -one(FC), r1) + (λ ≠ 0) && kaxpy!(n, λ, Δx, r1) + kaxpby!(n, one(FC), b, -one(FC), r1) else - @kcopy!(n, r1, b) # r1 ← b + kcopy!(n, r1, b) # r1 ← b end # Initialize Lanczos process. # β₁ M v₁ = b. - @kcopy!(n, r2, r1) # r2 ← r1 + kcopy!(n, r2, r1) # r2 ← r1 MisI || mulorldiv!(v, M, r1, ldiv) - β₁ = @kdotr(m, r1, v) + β₁ = kdotr(m, r1, v) β₁ < 0 && error("Preconditioner is not positive definite") if β₁ == 0 stats.niter = 0 @@ -201,8 +201,8 @@ kwargs_minres = (:M, :ldiv, :λ, :atol, :rtol, :etol, :conlim, :itmax, :timemax, γmin = T(Inf) cs = -one(T) sn = zero(T) - @kfill!(w1, zero(FC)) - @kfill!(w2, zero(FC)) + kfill!(w1, zero(FC)) + kfill!(w2, zero(FC)) ANorm² = zero(T) ANorm = zero(T) @@ -215,7 +215,7 @@ kwargs_minres = (:M, :ldiv, :λ, :atol, :rtol, :etol, :conlim, :itmax, :timemax, xENorm² = zero(T) err_lbnd = zero(T) window = length(err_vec) - @kfill!(err_vec, zero(T)) + kfill!(err_vec, zero(T)) iter = 0 itmax == 0 && (itmax = 2*n) @@ -237,29 +237,29 @@ kwargs_minres = (:M, :ldiv, :λ, :atol, :rtol, :etol, :conlim, :itmax, :timemax, # Generate next Lanczos vector. mul!(y, A, v) - λ ≠ 0 && @kaxpy!(n, λ, v, y) # (y = y + λ * v) - @kscal!(n, one(FC) / β, y) - iter ≥ 2 && @kaxpy!(n, -β / oldβ, r1, y) # (y = y - β / oldβ * r1) + λ ≠ 0 && kaxpy!(n, λ, v, y) # (y = y + λ * v) + kscal!(n, one(FC) / β, y) + iter ≥ 2 && kaxpy!(n, -β / oldβ, r1, y) # (y = y - β / oldβ * r1) - α = @kdotr(n, v, y) / β - @kaxpy!(n, -α / β, r2, y) # y = y - α / β * r2 + α = kdotr(n, v, y) / β + kaxpy!(n, -α / β, r2, y) # y = y - α / β * r2 # Compute w. δ = cs * δbar + sn * α if iter == 1 w = w2 else - iter ≥ 3 && @kscal!(n, -ϵ, w1) + iter ≥ 3 && kscal!(n, -ϵ, w1) w = w1 - @kaxpy!(n, -δ, w2, w) + kaxpy!(n, -δ, w2, w) end - @kaxpy!(n, one(FC) / β, v, w) + kaxpy!(n, one(FC) / β, v, w) - @kcopy!(n, r1, r2) # r1 ← r2 - @kcopy!(n, r2, y) # r2 ← y + kcopy!(n, r1, r2) # r1 ← r2 + kcopy!(n, r2, y) # r2 ← y MisI || mulorldiv!(v, M, r2, ldiv) oldβ = β - β = @kdotr(n, r2, v) + β = kdotr(n, r2, v) β < 0 && error("Preconditioner is not positive definite") β = sqrt(β) ANorm² = ANorm² + α * α + oldβ * oldβ + β * β @@ -283,20 +283,20 @@ kwargs_minres = (:M, :ldiv, :λ, :atol, :rtol, :etol, :conlim, :itmax, :timemax, ϕbar = sn * ϕbar # Final update of w. - @kscal!(n, one(FC) / γ, w) + kscal!(n, one(FC) / γ, w) # Update x. - @kaxpy!(n, ϕ, w, x) # x = x + ϕ * w + kaxpy!(n, ϕ, w, x) # x = x + ϕ * w xENorm² = xENorm² + ϕ * ϕ # Update directions for x. if iter ≥ 2 - @kswap(w1, w2) + @kswap!(w1, w2) end # Compute lower bound on forward error. err_vec[mod(iter, window) + 1] = ϕ - iter ≥ window && (err_lbnd = @knrm2(window, err_vec)) + iter ≥ window && (err_lbnd = knorm(window, err_vec)) γmax = max(γmax, γ) γmin = min(γmin, γ) @@ -306,7 +306,7 @@ kwargs_minres = (:M, :ldiv, :λ, :atol, :rtol, :etol, :conlim, :itmax, :timemax, # Estimate various norms. ANorm = sqrt(ANorm²) - xNorm = @knrm2(n, x) + xNorm = knorm(n, x) ϵA = ANorm * ϵM ϵx = ANorm * xNorm * ϵM ϵr = ANorm * xNorm * rtol @@ -371,7 +371,7 @@ kwargs_minres = (:M, :ldiv, :λ, :atol, :rtol, :etol, :conlim, :itmax, :timemax, overtimed && (status = "time limit exceeded") # Update x - warm_start && @kaxpy!(n, one(FC), Δx, x) + warm_start && kaxpy!(n, one(FC), Δx, x) solver.warm_start = false # Update stats diff --git a/src/minres_qlp.jl b/src/minres_qlp.jl index b664965c1..085f7d728 100644 --- a/src/minres_qlp.jl +++ b/src/minres_qlp.jl @@ -139,22 +139,22 @@ kwargs_minres_qlp = (:M, :ldiv, :λ, :atol, :rtol, :Artol, :itmax, :timemax, :ve vₖ₊₁ = MisI ? p : M⁻¹vₖ₋₁ # Initial solution x₀ - @kfill!(x, zero(FC)) + kfill!(x, zero(FC)) if warm_start mul!(M⁻¹vₖ, A, Δx) - (λ ≠ 0) && @kaxpy!(n, λ, Δx, M⁻¹vₖ) - @kaxpby!(n, one(FC), b, -one(FC), M⁻¹vₖ) + (λ ≠ 0) && kaxpy!(n, λ, Δx, M⁻¹vₖ) + kaxpby!(n, one(FC), b, -one(FC), M⁻¹vₖ) else - @kcopy!(n, M⁻¹vₖ, b) # M⁻¹vₖ ← b + kcopy!(n, M⁻¹vₖ, b) # M⁻¹vₖ ← b end # β₁v₁ = Mb MisI || mulorldiv!(vₖ, M, M⁻¹vₖ, ldiv) - βₖ = sqrt(@kdotr(n, vₖ, M⁻¹vₖ)) + βₖ = sqrt(kdotr(n, vₖ, M⁻¹vₖ)) if βₖ ≠ 0 - @kscal!(n, one(FC) / βₖ, M⁻¹vₖ) - MisI || @kscal!(n, one(FC) / βₖ, vₖ) + kscal!(n, one(FC) / βₖ, M⁻¹vₖ) + MisI || kscal!(n, one(FC) / βₖ, vₖ) end rNorm = βₖ @@ -183,14 +183,14 @@ kwargs_minres_qlp = (:M, :ldiv, :λ, :atol, :rtol, :Artol, :itmax, :timemax, :ve kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7s %7.1e %7s %8s %7.1e %7.1e %8s %.2fs\n", iter, rNorm, "✗ ✗ ✗ ✗", βₖ, "✗ ✗ ✗ ✗", " ✗ ✗ ✗ ✗", ANorm, Acond, " ✗ ✗ ✗ ✗", ktimer(start_time)) # Set up workspace. - @kfill!(M⁻¹vₖ₋₁, zero(FC)) + kfill!(M⁻¹vₖ₋₁, zero(FC)) ζbarₖ = βₖ ξₖ₋₁ = zero(T) τₖ₋₂ = τₖ₋₁ = τₖ = zero(T) ψbarₖ₋₂ = zero(T) μbisₖ₋₂ = μbarₖ₋₁ = zero(T) - @kfill!(wₖ₋₁, zero(FC)) - @kfill!(wₖ, zero(FC)) + kfill!(wₖ₋₁, zero(FC)) + kfill!(wₖ, zero(FC)) cₖ₋₂ = cₖ₋₁ = cₖ = one(T) # Givens cosines used for the QR factorization of Tₖ₊₁.ₖ sₖ₋₂ = sₖ₋₁ = sₖ = zero(T) # Givens sines used for the QR factorization of Tₖ₊₁.ₖ @@ -216,27 +216,27 @@ kwargs_minres_qlp = (:M, :ldiv, :λ, :atol, :rtol, :Artol, :itmax, :timemax, :ve # M(A + λI)Vₖ = Vₖ₊₁Tₖ₊₁.ₖ # βₖ₊₁vₖ₊₁ = M(A + λI)vₖ - αₖvₖ - βₖvₖ₋₁ - mul!(p, A, vₖ) # p ← Avₖ + mul!(p, A, vₖ) # p ← Avₖ if λ ≠ 0 - @kaxpy!(n, λ, vₖ, p) # p ← p + λvₖ + kaxpy!(n, λ, vₖ, p) # p ← p + λvₖ end if iter ≥ 2 - @kaxpy!(n, -βₖ, M⁻¹vₖ₋₁, p) # p ← p - βₖ * M⁻¹vₖ₋₁ + kaxpy!(n, -βₖ, M⁻¹vₖ₋₁, p) # p ← p - βₖ * M⁻¹vₖ₋₁ end - αₖ = @kdotr(n, vₖ, p) # αₖ = ⟨vₖ,p⟩ + αₖ = kdotr(n, vₖ, p) # αₖ = ⟨vₖ,p⟩ - @kaxpy!(n, -αₖ, M⁻¹vₖ, p) # p ← p - αₖM⁻¹vₖ + kaxpy!(n, -αₖ, M⁻¹vₖ, p) # p ← p - αₖM⁻¹vₖ MisI || mulorldiv!(vₖ₊₁, M, p, ldiv) # βₖ₊₁vₖ₊₁ = MAvₖ - γₖvₖ₋₁ - αₖvₖ - βₖ₊₁ = sqrt(@kdotr(m, vₖ₊₁, p)) + βₖ₊₁ = sqrt(kdotr(m, vₖ₊₁, p)) # βₖ₊₁.ₖ ≠ 0 if βₖ₊₁ > btol - @kscal!(m, one(FC) / βₖ₊₁, vₖ₊₁) - MisI || @kscal!(m, one(FC) / βₖ₊₁, p) + kscal!(m, one(FC) / βₖ₊₁, vₖ₊₁) + MisI || kscal!(m, one(FC) / βₖ₊₁, p) end ANorm² = ANorm² + αₖ * αₖ + βₖ * βₖ + βₖ₊₁ * βₖ₊₁ @@ -348,13 +348,13 @@ kwargs_minres_qlp = (:M, :ldiv, :λ, :atol, :rtol, :Artol, :itmax, :timemax, :ve # Compute directions wₖ₋₂, ẘₖ₋₁ and w̄ₖ, last columns of Wₖ = Vₖ(Pₖ)ᴴ if iter == 1 # w̅₁ = v₁ - @kcopy!(n, wₖ, vₖ) + kcopy!(n, wₖ, vₖ) elseif iter == 2 # [w̅ₖ₋₁ vₖ] [cpₖ spₖ] = [ẘₖ₋₁ w̅ₖ] ⟷ ẘₖ₋₁ = cpₖ * w̅ₖ₋₁ + spₖ * vₖ # [spₖ -cpₖ] ⟷ w̅ₖ = spₖ * w̅ₖ₋₁ - cpₖ * vₖ - @kswap(wₖ₋₁, wₖ) + @kswap!(wₖ₋₁, wₖ) wₖ .= spₖ .* wₖ₋₁ .- cpₖ .* vₖ - @kaxpby!(n, spₖ, vₖ, cpₖ, wₖ₋₁) + kaxpby!(n, spₖ, vₖ, cpₖ, wₖ₋₁) else # [ẘₖ₋₂ w̄ₖ₋₁ vₖ] [cpₖ 0 spₖ] [1 0 0 ] = [wₖ₋₂ ẘₖ₋₁ w̄ₖ] ⟷ wₖ₋₂ = cpₖ * ẘₖ₋₂ + spₖ * vₖ # [ 0 1 0 ] [0 cdₖ sdₖ] ⟷ ẘₖ₋₁ = cdₖ * w̄ₖ₋₁ + sdₖ * (spₖ * ẘₖ₋₂ - cpₖ * vₖ) @@ -362,20 +362,20 @@ kwargs_minres_qlp = (:M, :ldiv, :λ, :atol, :rtol, :Artol, :itmax, :timemax, :ve ẘₖ₋₂ = wₖ₋₁ w̄ₖ₋₁ = wₖ # Update the solution x - @kaxpy!(n, cpₖ * τₖ₋₂, ẘₖ₋₂, x) - @kaxpy!(n, spₖ * τₖ₋₂, vₖ, x) + kaxpy!(n, cpₖ * τₖ₋₂, ẘₖ₋₂, x) + kaxpy!(n, spₖ * τₖ₋₂, vₖ, x) # Compute wₐᵤₓ = spₖ * ẘₖ₋₂ - cpₖ * vₖ - @kaxpby!(n, -cpₖ, vₖ, spₖ, ẘₖ₋₂) + kaxpby!(n, -cpₖ, vₖ, spₖ, ẘₖ₋₂) wₐᵤₓ = ẘₖ₋₂ # Compute ẘₖ₋₁ and w̄ₖ - @kref!(n, w̄ₖ₋₁, wₐᵤₓ, cdₖ, sdₖ) - @kswap(wₖ₋₁, wₖ) + kref!(n, w̄ₖ₋₁, wₐᵤₓ, cdₖ, sdₖ) + @kswap!(wₖ₋₁, wₖ) end # Update vₖ, M⁻¹vₖ₋₁, M⁻¹vₖ - MisI || @kcopy!(n, vₖ, vₖ₊₁) # vₖ ← vₖ₊₁ - @kcopy!(n, M⁻¹vₖ₋₁, M⁻¹vₖ) # M⁻¹vₖ₋₁ ← M⁻¹vₖ - @kcopy!(n, M⁻¹vₖ, p) # M⁻¹vₖ ← p + MisI || kcopy!(n, vₖ, vₖ₊₁) # vₖ ← vₖ₊₁ + kcopy!(n, M⁻¹vₖ₋₁, M⁻¹vₖ) # M⁻¹vₖ₋₁ ← M⁻¹vₖ + kcopy!(n, M⁻¹vₖ, p) # M⁻¹vₖ ← p # Update ‖rₖ‖ estimate # ‖ rₖ ‖ = |ζbarₖ₊₁| @@ -403,7 +403,7 @@ kwargs_minres_qlp = (:M, :ldiv, :λ, :atol, :rtol, :Artol, :itmax, :timemax, :ve end Acond = μmax / μmin history && push!(Aconds, Acond) - xNorm = @knrm2(n, x) + xNorm = knorm(n, x) backward = rNorm / (ANorm * xNorm) # Update stopping criterion. @@ -446,10 +446,10 @@ kwargs_minres_qlp = (:M, :ldiv, :λ, :atol, :rtol, :Artol, :itmax, :timemax, :ve # Finalize the update of x if iter ≥ 2 - @kaxpy!(n, τₖ₋₁, wₖ₋₁, x) + kaxpy!(n, τₖ₋₁, wₖ₋₁, x) end if !inconsistent - @kaxpy!(n, τₖ, wₖ, x) + kaxpy!(n, τₖ, wₖ, x) end # Termination status @@ -462,7 +462,7 @@ kwargs_minres_qlp = (:M, :ldiv, :λ, :atol, :rtol, :Artol, :itmax, :timemax, :ve overtimed && (status = "time limit exceeded") # Update x - warm_start && @kaxpy!(n, one(FC), Δx, x) + warm_start && kaxpy!(n, one(FC), Δx, x) solver.warm_start = false # Update stats diff --git a/src/qmr.jl b/src/qmr.jl index fa82b6a27..a05c052c6 100644 --- a/src/qmr.jl +++ b/src/qmr.jl @@ -154,7 +154,7 @@ kwargs_qmr = (:c, :M, :N, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose, :hist if warm_start mul!(r₀, A, Δx) - @kaxpby!(n, one(FC), b, -one(FC), r₀) + kaxpby!(n, one(FC), b, -one(FC), r₀) end if !MisI mulorldiv!(solver.t, M, r₀, ldiv) @@ -162,8 +162,8 @@ kwargs_qmr = (:c, :M, :N, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose, :hist end # Initial solution x₀ and residual norm ‖r₀‖. - @kfill!(x, zero(FC)) - rNorm = @knrm2(n, r₀) # ‖r₀‖ = ‖b₀ - Ax₀‖ + kfill!(x, zero(FC)) + rNorm = knorm(n, r₀) # ‖r₀‖ = ‖b₀ - Ax₀‖ history && push!(rNorms, rNorm) if rNorm == 0 @@ -180,7 +180,7 @@ kwargs_qmr = (:c, :M, :N, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose, :hist itmax == 0 && (itmax = 2*n) # Initialize the Lanczos biorthogonalization process. - cᴴb = @kdot(n, c, r₀) # ⟨c,r₀⟩ + cᴴb = kdot(n, c, r₀) # ⟨c,r₀⟩ if cᴴb == 0 stats.niter = 0 stats.solved = false @@ -197,16 +197,16 @@ kwargs_qmr = (:c, :M, :N, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose, :hist βₖ = √(abs(cᴴb)) # β₁γ₁ = cᴴ(b - Ax₀) γₖ = cᴴb / βₖ # β₁γ₁ = cᴴ(b - Ax₀) - @kfill!(vₖ₋₁, zero(FC)) # v₀ = 0 - @kfill!(uₖ₋₁, zero(FC)) # u₀ = 0 + kfill!(vₖ₋₁, zero(FC)) # v₀ = 0 + kfill!(uₖ₋₁, zero(FC)) # u₀ = 0 vₖ .= r₀ ./ βₖ # v₁ = (b - Ax₀) / β₁ uₖ .= c ./ conj(γₖ) # u₁ = c / γ̄₁ cₖ₋₂ = cₖ₋₁ = cₖ = zero(T) # Givens cosines used for the QR factorization of Tₖ₊₁.ₖ sₖ₋₂ = sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the QR factorization of Tₖ₊₁.ₖ - @kfill!(wₖ₋₂, zero(FC)) # Column k-2 of Wₖ = Vₖ(Rₖ)⁻¹ - @kfill!(wₖ₋₁, zero(FC)) # Column k-1 of Wₖ = Vₖ(Rₖ)⁻¹ + kfill!(wₖ₋₂, zero(FC)) # Column k-2 of Wₖ = Vₖ(Rₖ)⁻¹ + kfill!(wₖ₋₁, zero(FC)) # Column k-1 of Wₖ = Vₖ(Rₖ)⁻¹ ζbarₖ = βₖ # ζbarₖ is the last component of z̅ₖ = (Qₖ)ᴴβ₁e₁ - τₖ = @kdotr(n, vₖ, vₖ) # τₖ is used for the residual norm estimate + τₖ = kdotr(n, vₖ, vₖ) # τₖ is used for the residual norm estimate # Stopping criterion. solved = rNorm ≤ ε @@ -234,17 +234,17 @@ kwargs_qmr = (:c, :M, :N, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose, :hist mul!(s, Aᴴ, Mᴴuₖ) NisI || mulorldiv!(p, Nᴴ, s, ldiv) - @kaxpy!(n, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁ - @kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - β̄ₖ * uₖ₋₁ + kaxpy!(n, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁ + kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - β̄ₖ * uₖ₋₁ - αₖ = @kdot(n, uₖ, q) # αₖ = ⟨uₖ,q⟩ + αₖ = kdot(n, uₖ, q) # αₖ = ⟨uₖ,q⟩ - @kaxpy!(n, - αₖ , vₖ, q) # q ← q - αₖ * vₖ - @kaxpy!(n, -conj(αₖ), uₖ, p) # p ← p - ᾱₖ * uₖ + kaxpy!(n, - αₖ , vₖ, q) # q ← q - αₖ * vₖ + kaxpy!(n, -conj(αₖ), uₖ, p) # p ← p - ᾱₖ * uₖ - pᴴq = @kdot(n, p, q) # pᴴq = ⟨p,q⟩ - βₖ₊₁ = √(abs(pᴴq)) # βₖ₊₁ = √(|pᴴq|) - γₖ₊₁ = pᴴq / βₖ₊₁ # γₖ₊₁ = pᴴq / βₖ₊₁ + pᴴq = kdot(n, p, q) # pᴴq = ⟨p,q⟩ + βₖ₊₁ = √(abs(pᴴq)) # βₖ₊₁ = √(|pᴴq|) + γₖ₊₁ = pᴴq / βₖ₊₁ # γₖ₊₁ = pᴴq / βₖ₊₁ # Update the QR factorization of Tₖ₊₁.ₖ = Qₖ [ Rₖ ]. # [ Oᵀ ] @@ -304,32 +304,32 @@ kwargs_qmr = (:c, :M, :N, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose, :hist # w₁ = v₁ / δ₁ if iter == 1 wₖ = wₖ₋₁ - @kaxpy!(n, one(FC), vₖ, wₖ) + kaxpy!(n, one(FC), vₖ, wₖ) wₖ .= wₖ ./ δₖ end # w₂ = (v₂ - λ₁w₁) / δ₂ if iter == 2 wₖ = wₖ₋₂ - @kaxpy!(n, -λₖ₋₁, wₖ₋₁, wₖ) - @kaxpy!(n, one(FC), vₖ, wₖ) + kaxpy!(n, -λₖ₋₁, wₖ₋₁, wₖ) + kaxpy!(n, one(FC), vₖ, wₖ) wₖ .= wₖ ./ δₖ end # wₖ = (vₖ - λₖ₋₁wₖ₋₁ - ϵₖ₋₂wₖ₋₂) / δₖ if iter ≥ 3 - @kscal!(n, -ϵₖ₋₂, wₖ₋₂) + kscal!(n, -ϵₖ₋₂, wₖ₋₂) wₖ = wₖ₋₂ - @kaxpy!(n, -λₖ₋₁, wₖ₋₁, wₖ) - @kaxpy!(n, one(FC), vₖ, wₖ) + kaxpy!(n, -λₖ₋₁, wₖ₋₁, wₖ) + kaxpy!(n, one(FC), vₖ, wₖ) wₖ .= wₖ ./ δₖ end # Compute solution xₖ. # xₖ ← xₖ₋₁ + ζₖ * wₖ - @kaxpy!(n, ζₖ, wₖ, x) + kaxpy!(n, ζₖ, wₖ, x) # Compute vₖ₊₁ and uₖ₊₁. - @kcopy!(n, vₖ₋₁, vₖ) # vₖ₋₁ ← vₖ - @kcopy!(n, uₖ₋₁, uₖ) # uₖ₋₁ ← uₖ + kcopy!(n, vₖ₋₁, vₖ) # vₖ₋₁ ← vₖ + kcopy!(n, uₖ₋₁, uₖ) # uₖ₋₁ ← uₖ if pᴴq ≠ zero(FC) vₖ .= q ./ βₖ₊₁ # βₖ₊₁vₖ₊₁ = q @@ -337,7 +337,7 @@ kwargs_qmr = (:c, :M, :N, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose, :hist end # Compute τₖ₊₁ = τₖ + ‖vₖ₊₁‖² - τₖ₊₁ = τₖ + @kdotr(n, vₖ, vₖ) + τₖ₊₁ = τₖ + kdotr(n, vₖ, vₖ) # Compute ‖rₖ‖ ≤ |ζbarₖ₊₁|√τₖ₊₁ rNorm = abs(ζbarₖ₊₁) * √τₖ₊₁ @@ -345,7 +345,7 @@ kwargs_qmr = (:c, :M, :N, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose, :hist # Update directions for x. if iter ≥ 2 - @kswap(wₖ₋₂, wₖ₋₁) + @kswap!(wₖ₋₂, wₖ₋₁) end # Update ζbarₖ, βₖ, γₖ and τₖ. @@ -382,7 +382,7 @@ kwargs_qmr = (:c, :M, :N, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose, :hist copyto!(solver.s, x) mulorldiv!(x, N, solver.s, ldiv) end - warm_start && @kaxpy!(n, one(FC), Δx, x) + warm_start && kaxpy!(n, one(FC), Δx, x) solver.warm_start = false # Update stats diff --git a/src/symmlq.jl b/src/symmlq.jl index de3cf34f9..d19559c42 100644 --- a/src/symmlq.jl +++ b/src/symmlq.jl @@ -147,20 +147,20 @@ kwargs_symmlq = (:M, :ldiv, :transfer_to_cg, :λ, :λest, :atol, :rtol, :etol, : ctol = conlim > 0 ? 1 / conlim : zero(T) # Initial solution x₀ - @kfill!(x, zero(FC)) + kfill!(x, zero(FC)) if warm_start mul!(Mvold, A, Δx) - (λ ≠ 0) && @kaxpy!(n, λ, Δx, Mvold) - @kaxpby!(n, one(FC), b, -one(FC), Mvold) + (λ ≠ 0) && kaxpy!(n, λ, Δx, Mvold) + kaxpby!(n, one(FC), b, -one(FC), Mvold) else - @kcopy!(n, Mvold, b) # Mvold ← b + kcopy!(n, Mvold, b) # Mvold ← b end # Initialize Lanczos process. # β₁ M v₁ = b. MisI || mulorldiv!(vold, M, Mvold, ldiv) - β₁ = @kdotr(m, vold, Mvold) + β₁ = kdotr(m, vold, Mvold) if β₁ == 0 stats.niter = 0 stats.solved = true @@ -175,20 +175,20 @@ kwargs_symmlq = (:M, :ldiv, :transfer_to_cg, :λ, :λest, :atol, :rtol, :etol, : end β₁ = sqrt(β₁) β = β₁ - @kscal!(m, one(FC) / β, vold) - MisI || @kscal!(m, one(FC) / β, Mvold) + kscal!(m, one(FC) / β, vold) + MisI || kscal!(m, one(FC) / β, Mvold) - @kcopy!(n, w̅, vold) # w̅ ← vold + kcopy!(n, w̅, vold) # w̅ ← vold mul!(Mv, A, vold) - α = @kdotr(m, vold, Mv) + λ - @kaxpy!(m, -α, Mvold, Mv) # Mv = Mv - α * Mvold + α = kdotr(m, vold, Mv) + λ + kaxpy!(m, -α, Mvold, Mv) # Mv = Mv - α * Mvold MisI || mulorldiv!(v, M, Mv, ldiv) - β = @kdotr(m, v, Mv) + β = kdotr(m, v, Mv) β < 0 && error("Preconditioner is not positive definite") β = sqrt(β) - @kscal!(m, one(FC) / β, v) - MisI || @kscal!(m, one(FC) / β, Mv) + kscal!(m, one(FC) / β, v) + MisI || kscal!(m, one(FC) / β, Mv) # Start QR factorization γbar = α @@ -225,9 +225,9 @@ kwargs_symmlq = (:M, :ldiv, :transfer_to_cg, :λ, :λest, :atol, :rtol, :etol, : errcg = T(Inf) window = length(clist) - @kfill!(clist, zero(T)) - @kfill!(zlist, zero(T)) - @kfill!(sprod, one(T)) + kfill!(clist, zero(T)) + kfill!(zlist, zero(T)) + kfill!(sprod, one(T)) if λest ≠ 0 # Start QR factorization of Tₖ - λest I @@ -238,7 +238,7 @@ kwargs_symmlq = (:M, :ldiv, :transfer_to_cg, :λ, :λest, :atol, :rtol, :etol, : cw = ρbar / ρ sw = β / ρ - history && push!(errors, abs(β₁/λest)) + history && push!(errors, abs(β₁ / λest)) if γbar ≠ 0 history && push!(errorscg, sqrt(errors[1]^2 - ζbar^2)) else @@ -272,25 +272,25 @@ kwargs_symmlq = (:M, :ldiv, :transfer_to_cg, :λ, :λest, :atol, :rtol, :etol, : # Update SYMMLQ point ηold = η ζ = ηold / γ - @kaxpy!(n, c * ζ, w̅, x) - @kaxpy!(n, s * ζ, v, x) + kaxpy!(n, c * ζ, w̅, x) + kaxpy!(n, s * ζ, v, x) # Update w̅ - @kaxpby!(n, -c, v, s, w̅) + kaxpby!(n, -c, v, s, w̅) # Generate next Lanczos vector oldβ = β mul!(Mv_next, A, v) - α = @kdotr(m, v, Mv_next) + λ - @kaxpy!(m, -oldβ, Mvold, Mv_next) - @kcopy!(m, Mvold, Mv) # Mvold ← Mv - @kaxpy!(m, -α, Mv, Mv_next) - @kcopy!(m, Mv, Mv_next) # Mv ← Mv_next + α = kdotr(m, v, Mv_next) + λ + kaxpy!(m, -oldβ, Mvold, Mv_next) + kcopy!(m, Mvold, Mv) # Mvold ← Mv + kaxpy!(m, -α, Mv, Mv_next) + kcopy!(m, Mv, Mv_next) # Mv ← Mv_next MisI || mulorldiv!(v, M, Mv, ldiv) - β = @kdotr(m, v, Mv) + β = kdotr(m, v, Mv) β < 0 && error("Preconditioner is not positive definite") β = sqrt(β) - @kscal!(m, one(FC) / β, v) - MisI || @kscal!(m, one(FC) / β, Mv) + kscal!(m, one(FC) / β, v) + MisI || kscal!(m, one(FC) / β, Mv) # Continue A norm estimate ANorm² = ANorm² + α * α + oldβ * oldβ + β * β @@ -419,7 +419,7 @@ kwargs_symmlq = (:M, :ldiv, :transfer_to_cg, :λ, :λest, :atol, :rtol, :etol, : # Compute CG point # (xᶜ)ₖ ← (xᴸ)ₖ₋₁ + ζbarₖ * w̅ₖ if solved_cg - @kaxpy!(m, ζbar, w̅, x) + kaxpy!(m, ζbar, w̅, x) end # Termination status @@ -433,7 +433,7 @@ kwargs_symmlq = (:M, :ldiv, :transfer_to_cg, :λ, :λest, :atol, :rtol, :etol, : overtimed && (status = "time limit exceeded") # Update x - warm_start && @kaxpy!(n, one(FC), Δx, x) + warm_start && kaxpy!(n, one(FC), Δx, x) solver.warm_start = false # Update stats diff --git a/src/tricg.jl b/src/tricg.jl index a728cc2bf..66a9ebba0 100644 --- a/src/tricg.jl +++ b/src/tricg.jl @@ -190,54 +190,54 @@ kwargs_tricg = (:M, :N, :ldiv, :spd, :snd, :flip, :τ, :ν, :atol, :rtol, :itmax reset!(stats) # Initial solutions x₀ and y₀. - @kfill!(xₖ, zero(FC)) - @kfill!(yₖ, zero(FC)) + kfill!(xₖ, zero(FC)) + kfill!(yₖ, zero(FC)) iter = 0 itmax == 0 && (itmax = m+n) # Initialize preconditioned orthogonal tridiagonalization process. - @kfill!(M⁻¹vₖ₋₁, zero(FC)) # v₀ = 0 - @kfill!(N⁻¹uₖ₋₁, zero(FC)) # u₀ = 0 + kfill!(M⁻¹vₖ₋₁, zero(FC)) # v₀ = 0 + kfill!(N⁻¹uₖ₋₁, zero(FC)) # u₀ = 0 # [ τI A ] [ xₖ ] = [ b - τΔx - AΔy ] = [ b₀ ] # [ Aᴴ νI ] [ yₖ ] [ c - AᴴΔx - νΔy ] [ c₀ ] if warm_start mul!(b₀, A, Δy) - (τ ≠ 0) && @kaxpy!(m, τ, Δx, b₀) - @kaxpby!(m, one(FC), b, -one(FC), b₀) + (τ ≠ 0) && kaxpy!(m, τ, Δx, b₀) + kaxpby!(m, one(FC), b, -one(FC), b₀) mul!(c₀, Aᴴ, Δx) - (ν ≠ 0) && @kaxpy!(n, ν, Δy, c₀) - @kaxpby!(n, one(FC), c, -one(FC), c₀) + (ν ≠ 0) && kaxpy!(n, ν, Δy, c₀) + kaxpby!(n, one(FC), c, -one(FC), c₀) end # β₁Ev₁ = b ↔ β₁v₁ = Mb - @kcopy!(m, M⁻¹vₖ, b₀) # M⁻¹vₖ ← b₀ + kcopy!(m, M⁻¹vₖ, b₀) # M⁻¹vₖ ← b₀ MisI || mulorldiv!(vₖ, M, M⁻¹vₖ, ldiv) - βₖ = sqrt(@kdotr(m, vₖ, M⁻¹vₖ)) # β₁ = ‖v₁‖_E + βₖ = sqrt(kdotr(m, vₖ, M⁻¹vₖ)) # β₁ = ‖v₁‖_E if βₖ ≠ 0 - @kscal!(m, one(FC) / βₖ, M⁻¹vₖ) - MisI || @kscal!(m, one(FC) / βₖ, vₖ) + kscal!(m, one(FC) / βₖ, M⁻¹vₖ) + MisI || kscal!(m, one(FC) / βₖ, vₖ) else error("b must be nonzero") end # γ₁Fu₁ = c ↔ γ₁u₁ = Nc - @kcopy!(n, N⁻¹uₖ, c₀) # M⁻¹uₖ ← c₀ + kcopy!(n, N⁻¹uₖ, c₀) # M⁻¹uₖ ← c₀ NisI || mulorldiv!(uₖ, N, N⁻¹uₖ, ldiv) - γₖ = sqrt(@kdotr(n, uₖ, N⁻¹uₖ)) # γ₁ = ‖u₁‖_F + γₖ = sqrt(kdotr(n, uₖ, N⁻¹uₖ)) # γ₁ = ‖u₁‖_F if γₖ ≠ 0 - @kscal!(n, one(FC) / γₖ, N⁻¹uₖ) - NisI || @kscal!(n, one(FC) / γₖ, uₖ) + kscal!(n, one(FC) / γₖ, N⁻¹uₖ) + NisI || kscal!(n, one(FC) / γₖ, uₖ) else error("c must be nonzero") end # Initialize directions Gₖ such that L̄ₖ(Gₖ)ᵀ = (Wₖ)ᵀ - @kfill!(gx₂ₖ₋₁, zero(FC)) - @kfill!(gy₂ₖ₋₁, zero(FC)) - @kfill!(gx₂ₖ , zero(FC)) - @kfill!(gy₂ₖ , zero(FC)) + kfill!(gx₂ₖ₋₁, zero(FC)) + kfill!(gy₂ₖ₋₁, zero(FC)) + kfill!(gx₂ₖ , zero(FC)) + kfill!(gy₂ₖ , zero(FC)) # Compute ‖r₀‖² = (γ₁)² + (β₁)² rNorm = sqrt(γₖ^2 + βₖ^2) @@ -275,18 +275,18 @@ kwargs_tricg = (:M, :N, :ldiv, :spd, :snd, :flip, :τ, :ν, :atol, :rtol, :itmax mul!(p, Aᴴ, vₖ) # Forms Fuₖ₊₁ : p ← Aᴴvₖ if iter ≥ 2 - @kaxpy!(m, -γₖ, M⁻¹vₖ₋₁, q) # q ← q - γₖ * M⁻¹vₖ₋₁ - @kaxpy!(n, -βₖ, N⁻¹uₖ₋₁, p) # p ← p - βₖ * N⁻¹uₖ₋₁ + kaxpy!(m, -γₖ, M⁻¹vₖ₋₁, q) # q ← q - γₖ * M⁻¹vₖ₋₁ + kaxpy!(n, -βₖ, N⁻¹uₖ₋₁, p) # p ← p - βₖ * N⁻¹uₖ₋₁ end - αₖ = @kdot(m, vₖ, q) # αₖ = ⟨vₖ,q⟩ + αₖ = kdot(m, vₖ, q) # αₖ = ⟨vₖ,q⟩ - @kaxpy!(m, - αₖ , M⁻¹vₖ, q) # q ← q - αₖ * M⁻¹vₖ - @kaxpy!(n, -conj(αₖ), N⁻¹uₖ, p) # p ← p - ᾱₖ * N⁻¹uₖ + kaxpy!(m, - αₖ , M⁻¹vₖ, q) # q ← q - αₖ * M⁻¹vₖ + kaxpy!(n, -conj(αₖ), N⁻¹uₖ, p) # p ← p - ᾱₖ * N⁻¹uₖ # Update M⁻¹vₖ₋₁ and N⁻¹uₖ₋₁ - @kcopy!(m, M⁻¹vₖ₋₁, M⁻¹vₖ) # M⁻¹vₖ₋₁ ← M⁻¹vₖ - @kcopy!(n, N⁻¹uₖ₋₁, N⁻¹uₖ) # N⁻¹uₖ₋₁ ← N⁻¹uₖ + kcopy!(m, M⁻¹vₖ₋₁, M⁻¹vₖ) # M⁻¹vₖ₋₁ ← M⁻¹vₖ + kcopy!(n, N⁻¹uₖ₋₁, N⁻¹uₖ) # N⁻¹uₖ₋₁ ← N⁻¹uₖ # Notations : Wₖ = [w₁ ••• wₖ] = [v₁ 0 ••• vₖ 0 ] # [0 u₁ ••• 0 uₖ] @@ -348,9 +348,9 @@ kwargs_tricg = (:M, :N, :ldiv, :spd, :snd, :flip, :τ, :ν, :atol, :rtol, :itmax if iter == 1 # [ 1 0 ] [ gx₁ gy₁ ] = [ v₁ 0 ] # [ δ̄₁ 1 ] [ gx₂ gy₂ ] [ 0 u₁ ] - @kcopy!(m, gx₂ₖ₋₁, vₖ) # gx₂ₖ₋₁ ← vₖ + kcopy!(m, gx₂ₖ₋₁, vₖ) # gx₂ₖ₋₁ ← vₖ gx₂ₖ .= -conj(δₖ) .* gx₂ₖ₋₁ - @kcopy!(n, gy₂ₖ, uₖ) # gy₂ₖ ← uₖ + kcopy!(n, gy₂ₖ, uₖ) # gy₂ₖ ← uₖ else # [ 0 σ̄ₖ 1 0 ] [ gx₂ₖ₋₃ gy₂ₖ₋₃ ] = [ vₖ 0 ] # [ η̄ₖ λ̄ₖ δ̄ₖ 1 ] [ gx₂ₖ₋₂ gy₂ₖ₋₂ ] [ 0 uₖ ] @@ -366,40 +366,40 @@ kwargs_tricg = (:M, :N, :ldiv, :spd, :snd, :flip, :τ, :ν, :atol, :rtol, :itmax gy₂ₖ₋₁ .= uₖ .- gy₂ₖ₋₁ .- conj(δₖ) .* gy₂ₖ # g₂ₖ₋₃ == g₂ₖ and g₂ₖ₋₂ == g₂ₖ₋₁ - @kswap(gx₂ₖ₋₁, gx₂ₖ) - @kswap(gy₂ₖ₋₁, gy₂ₖ) + @kswap!(gx₂ₖ₋₁, gx₂ₖ) + @kswap!(gy₂ₖ₋₁, gy₂ₖ) end # Update xₖ = Gxₖ * pₖ - @kaxpy!(m, π₂ₖ₋₁, gx₂ₖ₋₁, xₖ) - @kaxpy!(m, π₂ₖ , gx₂ₖ , xₖ) + kaxpy!(m, π₂ₖ₋₁, gx₂ₖ₋₁, xₖ) + kaxpy!(m, π₂ₖ , gx₂ₖ , xₖ) # Update yₖ = Gyₖ * pₖ - @kaxpy!(n, π₂ₖ₋₁, gy₂ₖ₋₁, yₖ) - @kaxpy!(n, π₂ₖ , gy₂ₖ , yₖ) + kaxpy!(n, π₂ₖ₋₁, gy₂ₖ₋₁, yₖ) + kaxpy!(n, π₂ₖ , gy₂ₖ , yₖ) # Compute vₖ₊₁ and uₖ₊₁ MisI || mulorldiv!(vₖ₊₁, M, q, ldiv) # βₖ₊₁vₖ₊₁ = MAuₖ - γₖvₖ₋₁ - αₖvₖ NisI || mulorldiv!(uₖ₊₁, N, p, ldiv) # γₖ₊₁uₖ₊₁ = NAᴴvₖ - βₖuₖ₋₁ - ᾱₖuₖ - βₖ₊₁ = sqrt(@kdotr(m, vₖ₊₁, q)) # βₖ₊₁ = ‖vₖ₊₁‖_E - γₖ₊₁ = sqrt(@kdotr(n, uₖ₊₁, p)) # γₖ₊₁ = ‖uₖ₊₁‖_F + βₖ₊₁ = sqrt(kdotr(m, vₖ₊₁, q)) # βₖ₊₁ = ‖vₖ₊₁‖_E + γₖ₊₁ = sqrt(kdotr(n, uₖ₊₁, p)) # γₖ₊₁ = ‖uₖ₊₁‖_F # βₖ₊₁ ≠ 0 if βₖ₊₁ > btol - @kscal!(m, one(FC) / βₖ₊₁, q) - MisI || @kscal!(m, one(FC) / βₖ₊₁, vₖ₊₁) + kscal!(m, one(FC) / βₖ₊₁, q) + MisI || kscal!(m, one(FC) / βₖ₊₁, vₖ₊₁) end # γₖ₊₁ ≠ 0 if γₖ₊₁ > btol - @kscal!(n, one(FC) / γₖ₊₁, p) - NisI || @kscal!(n, one(FC) / γₖ₊₁, uₖ₊₁) + kscal!(n, one(FC) / γₖ₊₁, p) + NisI || kscal!(n, one(FC) / γₖ₊₁, uₖ₊₁) end # Update M⁻¹vₖ and N⁻¹uₖ - @kcopy!(m, M⁻¹vₖ, q) # M⁻¹vₖ ← q - @kcopy!(n, N⁻¹uₖ, p) # N⁻¹uₖ ← p + kcopy!(m, M⁻¹vₖ, q) # M⁻¹vₖ ← q + kcopy!(n, N⁻¹uₖ, p) # N⁻¹uₖ ← p # Compute ‖rₖ‖² = |γₖ₊₁ζ₂ₖ₋₁|² + |βₖ₊₁ζ₂ₖ|² ζ₂ₖ₋₁ = π₂ₖ₋₁ - conj(δₖ) * π₂ₖ @@ -440,8 +440,8 @@ kwargs_tricg = (:M, :N, :ldiv, :spd, :snd, :flip, :τ, :ν, :atol, :rtol, :itmax overtimed && (status = "time limit exceeded") # Update x and y - warm_start && @kaxpy!(m, one(FC), Δx, xₖ) - warm_start && @kaxpy!(n, one(FC), Δy, yₖ) + warm_start && kaxpy!(m, one(FC), Δx, xₖ) + warm_start && kaxpy!(n, one(FC), Δy, yₖ) solver.warm_start = false # Update stats diff --git a/src/trilqr.jl b/src/trilqr.jl index 7ae154f32..b609cd914 100644 --- a/src/trilqr.jl +++ b/src/trilqr.jl @@ -135,18 +135,18 @@ kwargs_trilqr = (:transfer_to_usymcg, :atol, :rtol, :itmax, :timemax, :verbose, if warm_start mul!(r₀, A, Δx) - @kaxpby!(n, one(FC), b, -one(FC), r₀) + kaxpby!(n, one(FC), b, -one(FC), r₀) mul!(s₀, Aᴴ, Δy) - @kaxpby!(n, one(FC), c, -one(FC), s₀) + kaxpby!(n, one(FC), c, -one(FC), s₀) end # Initial solution x₀ and residual r₀ = b - Ax₀. - @kfill!(x, zero(FC)) # x₀ - bNorm = @knrm2(m, r₀) # rNorm = ‖r₀‖ + kfill!(x, zero(FC)) # x₀ + bNorm = knorm(m, r₀) # rNorm = ‖r₀‖ # Initial solution y₀ and residual s₀ = c - Aᴴy₀. - @kfill!(t, zero(FC)) # t₀ - cNorm = @knrm2(n, s₀) # sNorm = ‖s₀‖ + kfill!(t, zero(FC)) # t₀ + cNorm = knorm(n, s₀) # sNorm = ‖s₀‖ iter = 0 itmax == 0 && (itmax = m+n) @@ -160,10 +160,10 @@ kwargs_trilqr = (:transfer_to_usymcg, :atol, :rtol, :itmax, :timemax, :verbose, kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %.2fs\n", iter, bNorm, cNorm, ktimer(start_time)) # Set up workspace. - βₖ = @knrm2(m, r₀) # β₁ = ‖r₀‖ = ‖v₁‖ - γₖ = @knrm2(n, s₀) # γ₁ = ‖s₀‖ = ‖u₁‖ - @kfill!(vₖ₋₁, zero(FC)) # v₀ = 0 - @kfill!(uₖ₋₁, zero(FC)) # u₀ = 0 + βₖ = knorm(m, r₀) # β₁ = ‖r₀‖ = ‖v₁‖ + γₖ = knorm(n, s₀) # γ₁ = ‖s₀‖ = ‖u₁‖ + kfill!(vₖ₋₁, zero(FC)) # v₀ = 0 + kfill!(uₖ₋₁, zero(FC)) # u₀ = 0 vₖ .= r₀ ./ βₖ # v₁ = (b - Ax₀) / β₁ uₖ .= s₀ ./ γₖ # u₁ = (c - Aᴴy₀) / γ₁ cₖ₋₁ = cₖ = -one(T) # Givens cosines used for the LQ factorization of Tₖ @@ -174,8 +174,8 @@ kwargs_trilqr = (:transfer_to_usymcg, :atol, :rtol, :itmax, :timemax, :verbose, δbarₖ₋₁ = δbarₖ = zero(FC) # Coefficients of Lₖ₋₁ and L̅ₖ modified over the course of two iterations ψbarₖ₋₁ = ψₖ₋₁ = zero(FC) # ψₖ₋₁ and ψbarₖ are the last components of h̅ₖ = Qₖγ₁e₁ ϵₖ₋₃ = λₖ₋₂ = zero(FC) # Components of Lₖ₋₁ - @kfill!(wₖ₋₃, zero(FC)) # Column k-3 of Wₖ = Vₖ(Lₖ)⁻ᴴ - @kfill!(wₖ₋₂, zero(FC)) # Column k-2 of Wₖ = Vₖ(Lₖ)⁻ᴴ + kfill!(wₖ₋₃, zero(FC)) # Column k-3 of Wₖ = Vₖ(Lₖ)⁻ᴴ + kfill!(wₖ₋₂, zero(FC)) # Column k-2 of Wₖ = Vₖ(Lₖ)⁻ᴴ # Stopping criterion. inconsistent = false @@ -201,16 +201,16 @@ kwargs_trilqr = (:transfer_to_usymcg, :atol, :rtol, :itmax, :timemax, :verbose, mul!(q, A , uₖ) # Forms vₖ₊₁ : q ← Auₖ mul!(p, Aᴴ, vₖ) # Forms uₖ₊₁ : p ← Aᴴvₖ - @kaxpy!(m, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁ - @kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - βₖ * uₖ₋₁ + kaxpy!(m, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁ + kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - βₖ * uₖ₋₁ - αₖ = @kdot(m, vₖ, q) # αₖ = ⟨vₖ,q⟩ + αₖ = kdot(m, vₖ, q) # αₖ = ⟨vₖ,q⟩ - @kaxpy!(m, - αₖ , vₖ, q) # q ← q - αₖ * vₖ - @kaxpy!(n, -conj(αₖ), uₖ, p) # p ← p - ᾱₖ * uₖ + kaxpy!(m, - αₖ , vₖ, q) # q ← q - αₖ * vₖ + kaxpy!(n, -conj(αₖ), uₖ, p) # p ← p - ᾱₖ * uₖ - βₖ₊₁ = @knrm2(m, q) # βₖ₊₁ = ‖q‖ - γₖ₊₁ = @knrm2(n, p) # γₖ₊₁ = ‖p‖ + βₖ₊₁ = knorm(m, q) # βₖ₊₁ = ‖q‖ + γₖ₊₁ = knorm(n, p) # γₖ₊₁ = ‖p‖ # Update the LQ factorization of Tₖ = L̅ₖQₖ. # [ α₁ γ₂ 0 • • • 0 ] [ δ₁ 0 • • • • 0 ] @@ -272,17 +272,17 @@ kwargs_trilqr = (:transfer_to_usymcg, :atol, :rtol, :itmax, :timemax, :verbose, if iter ≥ 2 # Compute solution xₖ. # (xᴸ)ₖ ← (xᴸ)ₖ₋₁ + ζₖ₋₁ * dₖ₋₁ - @kaxpy!(n, ζₖ₋₁ * cₖ, d̅, x) - @kaxpy!(n, ζₖ₋₁ * sₖ, uₖ, x) + kaxpy!(n, ζₖ₋₁ * cₖ, d̅, x) + kaxpy!(n, ζₖ₋₁ * sₖ, uₖ, x) end # Compute d̅ₖ. if iter == 1 # d̅₁ = u₁ - @kcopy!(n, d̅, uₖ) # d̅ ← uₖ + kcopy!(n, d̅, uₖ) # d̅ ← uₖ else # d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * uₖ - @kaxpby!(n, -cₖ, uₖ, conj(sₖ), d̅) + kaxpby!(n, -cₖ, uₖ, conj(sₖ), d̅) end # Compute USYMLQ residual norm @@ -329,34 +329,34 @@ kwargs_trilqr = (:transfer_to_usymcg, :atol, :rtol, :itmax, :timemax, :verbose, # w₁ = v₁ / δ̄₁ if iter == 2 wₖ₋₁ = wₖ₋₂ - @kaxpy!(m, one(FC), vₖ₋₁, wₖ₋₁) + kaxpy!(m, one(FC), vₖ₋₁, wₖ₋₁) wₖ₋₁ .= vₖ₋₁ ./ conj(δₖ₋₁) end # w₂ = (v₂ - λ̄₁w₁) / δ̄₂ if iter == 3 wₖ₋₁ = wₖ₋₃ - @kaxpy!(m, one(FC), vₖ₋₁, wₖ₋₁) - @kaxpy!(m, -conj(λₖ₋₂), wₖ₋₂, wₖ₋₁) + kaxpy!(m, one(FC), vₖ₋₁, wₖ₋₁) + kaxpy!(m, -conj(λₖ₋₂), wₖ₋₂, wₖ₋₁) wₖ₋₁ .= wₖ₋₁ ./ conj(δₖ₋₁) end # wₖ₋₁ = (vₖ₋₁ - λ̄ₖ₋₂wₖ₋₂ - ϵ̄ₖ₋₃wₖ₋₃) / δ̄ₖ₋₁ if iter ≥ 4 - @kscal!(m, -conj(ϵₖ₋₃), wₖ₋₃) + kscal!(m, -conj(ϵₖ₋₃), wₖ₋₃) wₖ₋₁ = wₖ₋₃ - @kaxpy!(m, one(FC), vₖ₋₁, wₖ₋₁) - @kaxpy!(m, -conj(λₖ₋₂), wₖ₋₂, wₖ₋₁) + kaxpy!(m, one(FC), vₖ₋₁, wₖ₋₁) + kaxpy!(m, -conj(λₖ₋₂), wₖ₋₂, wₖ₋₁) wₖ₋₁ .= wₖ₋₁ ./ conj(δₖ₋₁) end if iter ≥ 3 # Swap pointers. - @kswap(wₖ₋₃, wₖ₋₂) + @kswap!(wₖ₋₃, wₖ₋₂) end if iter ≥ 2 # Compute solution tₖ₋₁. # tₖ₋₁ ← tₖ₋₂ + ψₖ₋₁ * wₖ₋₁ - @kaxpy!(m, ψₖ₋₁, wₖ₋₁, t) + kaxpy!(m, ψₖ₋₁, wₖ₋₁, t) end # Update ψbarₖ₋₁ @@ -378,8 +378,8 @@ kwargs_trilqr = (:transfer_to_usymcg, :atol, :rtol, :itmax, :timemax, :verbose, end # Compute uₖ₊₁ and uₖ₊₁. - @kcopy!(m, vₖ₋₁, vₖ) # vₖ₋₁ ← vₖ - @kcopy!(n, uₖ₋₁, uₖ) # uₖ₋₁ ← uₖ + kcopy!(m, vₖ₋₁, vₖ) # vₖ₋₁ ← vₖ + kcopy!(n, uₖ₋₁, uₖ) # uₖ₋₁ ← uₖ if βₖ₊₁ ≠ zero(T) vₖ .= q ./ βₖ₊₁ # βₖ₊₁vₖ₊₁ = q @@ -415,7 +415,7 @@ kwargs_trilqr = (:transfer_to_usymcg, :atol, :rtol, :itmax, :timemax, :verbose, # Compute USYMCG point # (xᶜ)ₖ ← (xᴸ)ₖ₋₁ + ζbarₖ * d̅ₖ if solved_cg - @kaxpy!(n, ζbarₖ, d̅, x) + kaxpy!(n, ζbarₖ, d̅, x) end # Termination status @@ -438,8 +438,8 @@ kwargs_trilqr = (:transfer_to_usymcg, :atol, :rtol, :itmax, :timemax, :verbose, overtimed && (status = "time limit exceeded") # Update x and y - warm_start && @kaxpy!(n, one(FC), Δx, x) - warm_start && @kaxpy!(m, one(FC), Δy, t) + warm_start && kaxpy!(n, one(FC), Δx, x) + warm_start && kaxpy!(m, one(FC), Δy, t) solver.warm_start = false # Update stats diff --git a/src/trimr.jl b/src/trimr.jl index 8177c8fa2..5b0124e8b 100644 --- a/src/trimr.jl +++ b/src/trimr.jl @@ -196,58 +196,58 @@ kwargs_trimr = (:M, :N, :ldiv, :spd, :snd, :flip, :sp, :τ, :ν, :atol, :rtol, : reset!(stats) # Initial solutions x₀ and y₀. - @kfill!(xₖ, zero(FC)) - @kfill!(yₖ, zero(FC)) + kfill!(xₖ, zero(FC)) + kfill!(yₖ, zero(FC)) iter = 0 itmax == 0 && (itmax = m+n) # Initialize preconditioned orthogonal tridiagonalization process. - @kfill!(M⁻¹vₖ₋₁, zero(FC)) # v₀ = 0 - @kfill!(N⁻¹uₖ₋₁, zero(FC)) # u₀ = 0 + kfill!(M⁻¹vₖ₋₁, zero(FC)) # v₀ = 0 + kfill!(N⁻¹uₖ₋₁, zero(FC)) # u₀ = 0 # [ τI A ] [ xₖ ] = [ b - τΔx - AΔy ] = [ b₀ ] # [ Aᴴ νI ] [ yₖ ] [ c - AᴴΔx - νΔy ] [ c₀ ] if warm_start mul!(b₀, A, Δy) - (τ ≠ 0) && @kaxpy!(m, τ, Δx, b₀) - @kaxpby!(m, one(FC), b, -one(FC), b₀) + (τ ≠ 0) && kaxpy!(m, τ, Δx, b₀) + kaxpby!(m, one(FC), b, -one(FC), b₀) mul!(c₀, Aᴴ, Δx) - (ν ≠ 0) && @kaxpy!(n, ν, Δy, c₀) - @kaxpby!(n, one(FC), c, -one(FC), c₀) + (ν ≠ 0) && kaxpy!(n, ν, Δy, c₀) + kaxpby!(n, one(FC), c, -one(FC), c₀) end # β₁Ev₁ = b ↔ β₁v₁ = Mb - @kcopy!(m, M⁻¹vₖ, b₀) # M⁻¹vₖ ← b₀ + kcopy!(m, M⁻¹vₖ, b₀) # M⁻¹vₖ ← b₀ MisI || mulorldiv!(vₖ, M, M⁻¹vₖ, ldiv) - βₖ = sqrt(@kdotr(m, vₖ, M⁻¹vₖ)) # β₁ = ‖v₁‖_E + βₖ = sqrt(kdotr(m, vₖ, M⁻¹vₖ)) # β₁ = ‖v₁‖_E if βₖ ≠ 0 - @kscal!(m, one(FC) / βₖ, M⁻¹vₖ) - MisI || @kscal!(m, one(FC) / βₖ, vₖ) + kscal!(m, one(FC) / βₖ, M⁻¹vₖ) + MisI || kscal!(m, one(FC) / βₖ, vₖ) else error("b must be nonzero") end # γ₁Fu₁ = c ↔ γ₁u₁ = Nc - @kcopy!(n, N⁻¹uₖ, c₀) # N⁻¹uₖ ← c₀ + kcopy!(n, N⁻¹uₖ, c₀) # N⁻¹uₖ ← c₀ NisI || mulorldiv!(uₖ, N, N⁻¹uₖ, ldiv) - γₖ = sqrt(@kdotr(n, uₖ, N⁻¹uₖ)) # γ₁ = ‖u₁‖_F + γₖ = sqrt(kdotr(n, uₖ, N⁻¹uₖ)) # γ₁ = ‖u₁‖_F if γₖ ≠ 0 - @kscal!(n, one(FC) / γₖ, N⁻¹uₖ) - NisI || @kscal!(n, one(FC) / γₖ, uₖ) + kscal!(n, one(FC) / γₖ, N⁻¹uₖ) + NisI || kscal!(n, one(FC) / γₖ, uₖ) else error("c must be nonzero") end # Initialize directions Gₖ such that (GₖRₖ)ᵀ = (Wₖ)ᵀ. - @kfill!(gx₂ₖ₋₃, zero(FC)) - @kfill!(gy₂ₖ₋₃, zero(FC)) - @kfill!(gx₂ₖ₋₂, zero(FC)) - @kfill!(gy₂ₖ₋₂, zero(FC)) - @kfill!(gx₂ₖ₋₁, zero(FC)) - @kfill!(gy₂ₖ₋₁, zero(FC)) - @kfill!(gx₂ₖ , zero(FC)) - @kfill!(gy₂ₖ , zero(FC)) + kfill!(gx₂ₖ₋₃, zero(FC)) + kfill!(gy₂ₖ₋₃, zero(FC)) + kfill!(gx₂ₖ₋₂, zero(FC)) + kfill!(gy₂ₖ₋₂, zero(FC)) + kfill!(gx₂ₖ₋₁, zero(FC)) + kfill!(gy₂ₖ₋₁, zero(FC)) + kfill!(gx₂ₖ , zero(FC)) + kfill!(gy₂ₖ , zero(FC)) # Compute ‖r₀‖² = (γ₁)² + (β₁)² rNorm = sqrt(γₖ^2 + βₖ^2) @@ -289,32 +289,32 @@ kwargs_trimr = (:M, :N, :ldiv, :spd, :snd, :flip, :sp, :τ, :ν, :atol, :rtol, : mul!(p, Aᴴ, vₖ) # Forms Fuₖ₊₁ : p ← Aᴴvₖ if iter ≥ 2 - @kaxpy!(m, -γₖ, M⁻¹vₖ₋₁, q) # q ← q - γₖ * M⁻¹vₖ₋₁ - @kaxpy!(n, -βₖ, N⁻¹uₖ₋₁, p) # p ← p - βₖ * N⁻¹uₖ₋₁ + kaxpy!(m, -γₖ, M⁻¹vₖ₋₁, q) # q ← q - γₖ * M⁻¹vₖ₋₁ + kaxpy!(n, -βₖ, N⁻¹uₖ₋₁, p) # p ← p - βₖ * N⁻¹uₖ₋₁ end - αₖ = @kdot(m, vₖ, q) # αₖ = ⟨vₖ,q⟩ + αₖ = kdot(m, vₖ, q) # αₖ = ⟨vₖ,q⟩ - @kaxpy!(m, - αₖ , M⁻¹vₖ, q) # q ← q - αₖ * M⁻¹vₖ - @kaxpy!(n, -conj(αₖ), N⁻¹uₖ, p) # p ← p - ᾱₖ * N⁻¹uₖ + kaxpy!(m, - αₖ , M⁻¹vₖ, q) # q ← q - αₖ * M⁻¹vₖ + kaxpy!(n, -conj(αₖ), N⁻¹uₖ, p) # p ← p - ᾱₖ * N⁻¹uₖ # Compute vₖ₊₁ and uₖ₊₁ MisI || mulorldiv!(vₖ₊₁, M, q, ldiv) # βₖ₊₁vₖ₊₁ = MAuₖ - γₖvₖ₋₁ - αₖvₖ NisI || mulorldiv!(uₖ₊₁, N, p, ldiv) # γₖ₊₁uₖ₊₁ = NAᴴvₖ - βₖuₖ₋₁ - ᾱₖuₖ - βₖ₊₁ = sqrt(@kdotr(m, vₖ₊₁, q)) # βₖ₊₁ = ‖vₖ₊₁‖_E - γₖ₊₁ = sqrt(@kdotr(n, uₖ₊₁, p)) # γₖ₊₁ = ‖uₖ₊₁‖_F + βₖ₊₁ = sqrt(kdotr(m, vₖ₊₁, q)) # βₖ₊₁ = ‖vₖ₊₁‖_E + γₖ₊₁ = sqrt(kdotr(n, uₖ₊₁, p)) # γₖ₊₁ = ‖uₖ₊₁‖_F # βₖ₊₁ ≠ 0 if βₖ₊₁ > btol - @kscal!(m, one(FC) / βₖ₊₁, q) - MisI || @kscal!(m, one(FC) / βₖ₊₁, vₖ₊₁) + kscal!(m, one(FC) / βₖ₊₁, q) + MisI || kscal!(m, one(FC) / βₖ₊₁, vₖ₊₁) end # γₖ₊₁ ≠ 0 if γₖ₊₁ > btol - @kscal!(n, one(FC) / γₖ₊₁, p) - NisI || @kscal!(n, one(FC) / γₖ₊₁, uₖ₊₁) + kscal!(n, one(FC) / γₖ₊₁, p) + NisI || kscal!(n, one(FC) / γₖ₊₁, uₖ₊₁) end # Notations : Wₖ = [w₁ ••• wₖ] = [v₁ 0 ••• vₖ 0 ] @@ -434,9 +434,9 @@ kwargs_trimr = (:M, :N, :ldiv, :spd, :snd, :flip, :sp, :τ, :ν, :atol, :rtol, : # [ λ₁ η₂ σ₃ δ₄ ] [ gx₂ gy₂ ] [ 0 u₂ ] # [ gx₃ gy₃ ] # [ gx₄ gy₄ ] - @kswap(gx₂ₖ₋₃, gx₂ₖ₋₁) - @kswap(gx₂ₖ₋₂, gx₂ₖ) - @kswap(gy₂ₖ₋₂, gy₂ₖ) + @kswap!(gx₂ₖ₋₃, gx₂ₖ₋₁) + @kswap!(gx₂ₖ₋₂, gx₂ₖ) + @kswap!(gy₂ₖ₋₂, gy₂ₖ) gx₂ₖ₋₁ .= (vₖ .- η₂ₖ₋₃ .* gx₂ₖ₋₃ .- σ₂ₖ₋₂ .* gx₂ₖ₋₂ ) ./ δ₂ₖ₋₁ gx₂ₖ .= ( .- λ₂ₖ₋₃ .* gx₂ₖ₋₃ .- η₂ₖ₋₂ .* gx₂ₖ₋₂ .- σ₂ₖ₋₁ .* gx₂ₖ₋₁) ./ δ₂ₖ gy₂ₖ₋₁ .= ( .- η₂ₖ₋₃ .* gy₂ₖ₋₃ .- σ₂ₖ₋₂ .* gy₂ₖ₋₂ ) ./ δ₂ₖ₋₁ @@ -447,15 +447,15 @@ kwargs_trimr = (:M, :N, :ldiv, :spd, :snd, :flip, :sp, :τ, :ν, :atol, :rtol, : g₂ₖ₋₁ = g₂ₖ₋₅ = gx₂ₖ₋₃; g₂ₖ = g₂ₖ₋₄ = gx₂ₖ₋₂; g₂ₖ₋₃ = gx₂ₖ₋₁; g₂ₖ₋₂ = gx₂ₖ g₂ₖ₋₁ .= (vₖ .- μ₂ₖ₋₅ .* g₂ₖ₋₅ .- λ₂ₖ₋₄ .* g₂ₖ₋₄ .- η₂ₖ₋₃ .* g₂ₖ₋₃ .- σ₂ₖ₋₂ .* g₂ₖ₋₂ ) ./ δ₂ₖ₋₁ g₂ₖ .= ( .- μ₂ₖ₋₄ .* g₂ₖ₋₄ .- λ₂ₖ₋₃ .* g₂ₖ₋₃ .- η₂ₖ₋₂ .* g₂ₖ₋₂ .- σ₂ₖ₋₁ .* g₂ₖ₋₁) ./ δ₂ₖ - @kswap(gx₂ₖ₋₃, gx₂ₖ₋₁) - @kswap(gx₂ₖ₋₂, gx₂ₖ) + @kswap!(gx₂ₖ₋₃, gx₂ₖ₋₁) + @kswap!(gx₂ₖ₋₂, gx₂ₖ) # μ₂ₖ₋₅ * gy₂ₖ₋₅ + λ₂ₖ₋₄ * gy₂ₖ₋₄ + η₂ₖ₋₃ * gy₂ₖ₋₃ + σ₂ₖ₋₂ * gy₂ₖ₋₂ + δ₂ₖ₋₁ * gy₂ₖ₋₁ = 0 # μ₂ₖ₋₄ * gy₂ₖ₋₄ + λ₂ₖ₋₃ * gy₂ₖ₋₃ + η₂ₖ₋₂ * gy₂ₖ₋₂ + σ₂ₖ₋₁ * gy₂ₖ₋₁ + δ₂ₖ * gy₂ₖ = uₖ g₂ₖ₋₁ = g₂ₖ₋₅ = gy₂ₖ₋₃; g₂ₖ = g₂ₖ₋₄ = gy₂ₖ₋₂; g₂ₖ₋₃ = gy₂ₖ₋₁; g₂ₖ₋₂ = gy₂ₖ g₂ₖ₋₁ .= ( .- μ₂ₖ₋₅ .* g₂ₖ₋₅ .- λ₂ₖ₋₄ .* g₂ₖ₋₄ .- η₂ₖ₋₃ .* g₂ₖ₋₃ .- σ₂ₖ₋₂ .* g₂ₖ₋₂ ) ./ δ₂ₖ₋₁ g₂ₖ .= (uₖ .- μ₂ₖ₋₄ .* g₂ₖ₋₄ .- λ₂ₖ₋₃ .* g₂ₖ₋₃ .- η₂ₖ₋₂ .* g₂ₖ₋₂ .- σ₂ₖ₋₁ .* g₂ₖ₋₁) ./ δ₂ₖ - @kswap(gy₂ₖ₋₃, gy₂ₖ₋₁) - @kswap(gy₂ₖ₋₂, gy₂ₖ) + @kswap!(gy₂ₖ₋₃, gy₂ₖ₋₁) + @kswap!(gy₂ₖ₋₂, gy₂ₖ) end # Update p̅ₖ = (Qₖ)ᴴ * (β₁e₁ + γ₁e₂) @@ -472,28 +472,28 @@ kwargs_trimr = (:M, :N, :ldiv, :spd, :snd, :flip, :sp, :τ, :ν, :atol, :rtol, : πbar₂ₖ₊₁ = conj(s₄ₖ) * πtmp₂ₖ # Update xₖ = Gxₖ * pₖ - @kaxpy!(m, π₂ₖ₋₁, gx₂ₖ₋₁, xₖ) - @kaxpy!(m, π₂ₖ , gx₂ₖ , xₖ) + kaxpy!(m, π₂ₖ₋₁, gx₂ₖ₋₁, xₖ) + kaxpy!(m, π₂ₖ , gx₂ₖ , xₖ) # Update yₖ = Gyₖ * pₖ - @kaxpy!(n, π₂ₖ₋₁, gy₂ₖ₋₁, yₖ) - @kaxpy!(n, π₂ₖ , gy₂ₖ , yₖ) + kaxpy!(n, π₂ₖ₋₁, gy₂ₖ₋₁, yₖ) + kaxpy!(n, π₂ₖ , gy₂ₖ , yₖ) # Compute ‖rₖ‖² = |πbar₂ₖ₊₁|² + |πbar₂ₖ₊₂|² rNorm = sqrt(abs2(πbar₂ₖ₊₁) + abs2(πbar₂ₖ₊₂)) history && push!(rNorms, rNorm) # Update vₖ and uₖ - MisI || @kcopy!(m, vₖ, vₖ₊₁) # vₖ ← vₖ₊₁ - NisI || @kcopy!(n, uₖ, uₖ₊₁) # uₖ ← uₖ₊₁ + MisI || kcopy!(m, vₖ, vₖ₊₁) # vₖ ← vₖ₊₁ + NisI || kcopy!(n, uₖ, uₖ₊₁) # uₖ ← uₖ₊₁ # Update M⁻¹vₖ₋₁ and N⁻¹uₖ₋₁ - @kcopy!(m, M⁻¹vₖ₋₁, M⁻¹vₖ) # M⁻¹vₖ₋₁ ← M⁻¹vₖ - @kcopy!(n, N⁻¹uₖ₋₁, N⁻¹uₖ) # N⁻¹uₖ₋₁ ← N⁻¹uₖ + kcopy!(m, M⁻¹vₖ₋₁, M⁻¹vₖ) # M⁻¹vₖ₋₁ ← M⁻¹vₖ + kcopy!(n, N⁻¹uₖ₋₁, N⁻¹uₖ) # N⁻¹uₖ₋₁ ← N⁻¹uₖ # Update M⁻¹vₖ and N⁻¹uₖ - @kcopy!(m, M⁻¹vₖ, q) # M⁻¹vₖ ← q - @kcopy!(n, N⁻¹uₖ, p) # N⁻¹uₖ ← p + kcopy!(m, M⁻¹vₖ, q) # M⁻¹vₖ ← q + kcopy!(n, N⁻¹uₖ, p) # N⁻¹uₖ ← p # Update cosines and sines old_s₁ₖ = s₁ₖ @@ -543,8 +543,8 @@ kwargs_trimr = (:M, :N, :ldiv, :spd, :snd, :flip, :sp, :τ, :ν, :atol, :rtol, : overtimed && (status = "time limit exceeded") # Update x and y - warm_start && @kaxpy!(m, one(FC), Δx, xₖ) - warm_start && @kaxpy!(n, one(FC), Δy, yₖ) + warm_start && kaxpy!(m, one(FC), Δx, xₖ) + warm_start && kaxpy!(n, one(FC), Δy, yₖ) solver.warm_start = false # Update stats diff --git a/src/usymlq.jl b/src/usymlq.jl index ab4c24939..db7569e4a 100644 --- a/src/usymlq.jl +++ b/src/usymlq.jl @@ -142,12 +142,12 @@ kwargs_usymlq = (:transfer_to_usymcg, :atol, :rtol, :itmax, :timemax, :verbose, if warm_start mul!(r₀, A, Δx) - @kaxpby!(n, one(FC), b, -one(FC), r₀) + kaxpby!(n, one(FC), b, -one(FC), r₀) end # Initial solution x₀ and residual norm ‖r₀‖. kfill!(x, zero(FC)) - bNorm = @knrm2(m, r₀) + bNorm = knorm(m, r₀) history && push!(rNorms, bNorm) if bNorm == 0 stats.niter = 0 @@ -166,15 +166,15 @@ kwargs_usymlq = (:transfer_to_usymcg, :atol, :rtol, :itmax, :timemax, :verbose, (verbose > 0) && @printf(iostream, "%5s %7s %5s\n", "k", "‖rₖ‖", "timer") kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %.2fs\n", iter, bNorm, ktimer(start_time)) - βₖ = @knrm2(m, r₀) # β₁ = ‖v₁‖ = ‖r₀‖ - γₖ = @knrm2(n, c) # γ₁ = ‖u₁‖ = ‖c‖ - @kfill!(vₖ₋₁, zero(FC)) # v₀ = 0 - @kfill!(uₖ₋₁, zero(FC)) # u₀ = 0 + βₖ = knorm(m, r₀) # β₁ = ‖v₁‖ = ‖r₀‖ + γₖ = knorm(n, c) # γ₁ = ‖u₁‖ = ‖c‖ + kfill!(vₖ₋₁, zero(FC)) # v₀ = 0 + kfill!(uₖ₋₁, zero(FC)) # u₀ = 0 vₖ .= r₀ ./ βₖ # v₁ = (b - Ax₀) / β₁ uₖ .= c ./ γₖ # u₁ = c / γ₁ cₖ₋₁ = cₖ = -one(T) # Givens cosines used for the LQ factorization of Tₖ sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the LQ factorization of Tₖ - @kfill!(d̅, zero(FC)) # Last column of D̅ₖ = Uₖ(Qₖ)ᴴ + kfill!(d̅, zero(FC)) # Last column of D̅ₖ = Uₖ(Qₖ)ᴴ ζₖ₋₁ = ζbarₖ = zero(FC) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁ ζₖ₋₂ = ηₖ = zero(FC) # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ δbarₖ₋₁ = δbarₖ = zero(FC) # Coefficients of Lₖ₋₁ and Lₖ modified over the course of two iterations @@ -198,16 +198,16 @@ kwargs_usymlq = (:transfer_to_usymcg, :atol, :rtol, :itmax, :timemax, :verbose, mul!(q, A , uₖ) # Forms vₖ₊₁ : q ← Auₖ mul!(p, Aᴴ, vₖ) # Forms uₖ₊₁ : p ← Aᴴvₖ - @kaxpy!(m, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁ - @kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - βₖ * uₖ₋₁ + kaxpy!(m, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁ + kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - βₖ * uₖ₋₁ - αₖ = @kdot(m, vₖ, q) # αₖ = ⟨vₖ,q⟩ + αₖ = kdot(m, vₖ, q) # αₖ = ⟨vₖ,q⟩ - @kaxpy!(m, - αₖ , vₖ, q) # q ← q - αₖ * vₖ - @kaxpy!(n, -conj(αₖ), uₖ, p) # p ← p - ᾱₖ * uₖ + kaxpy!(m, - αₖ , vₖ, q) # q ← q - αₖ * vₖ + kaxpy!(n, -conj(αₖ), uₖ, p) # p ← p - ᾱₖ * uₖ - βₖ₊₁ = @knrm2(m, q) # βₖ₊₁ = ‖q‖ - γₖ₊₁ = @knrm2(n, p) # γₖ₊₁ = ‖p‖ + βₖ₊₁ = knorm(m, q) # βₖ₊₁ = ‖q‖ + γₖ₊₁ = knorm(n, p) # γₖ₊₁ = ‖p‖ # Update the LQ factorization of Tₖ = L̅ₖQₖ. # [ α₁ γ₂ 0 • • • 0 ] [ δ₁ 0 • • • • 0 ] @@ -268,22 +268,22 @@ kwargs_usymlq = (:transfer_to_usymcg, :atol, :rtol, :itmax, :timemax, :verbose, if iter ≥ 2 # Compute solution xₖ. # (xᴸ)ₖ₋₁ ← (xᴸ)ₖ₋₂ + ζₖ₋₁ * dₖ₋₁ - @kaxpy!(n, ζₖ₋₁ * cₖ, d̅, x) - @kaxpy!(n, ζₖ₋₁ * sₖ, uₖ, x) + kaxpy!(n, ζₖ₋₁ * cₖ, d̅, x) + kaxpy!(n, ζₖ₋₁ * sₖ, uₖ, x) end # Compute d̅ₖ. if iter == 1 # d̅₁ = u₁ - @kcopy!(n, d̅, uₖ) # d̅ ← vₖ + kcopy!(n, d̅, uₖ) # d̅ ← vₖ else # d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * uₖ - @kaxpby!(n, -cₖ, uₖ, conj(sₖ), d̅) + kaxpby!(n, -cₖ, uₖ, conj(sₖ), d̅) end # Compute uₖ₊₁ and uₖ₊₁. - @kcopy!(m, vₖ₋₁, vₖ) # vₖ₋₁ ← vₖ - @kcopy!(n, uₖ₋₁, uₖ) # uₖ₋₁ ← uₖ + kcopy!(m, vₖ₋₁, vₖ) # vₖ₋₁ ← vₖ + kcopy!(n, uₖ₋₁, uₖ) # uₖ₋₁ ← uₖ if βₖ₊₁ ≠ zero(T) vₖ .= q ./ βₖ₊₁ # βₖ₊₁vₖ₊₁ = q @@ -332,7 +332,7 @@ kwargs_usymlq = (:transfer_to_usymcg, :atol, :rtol, :itmax, :timemax, :verbose, # Compute USYMCG point # (xᶜ)ₖ ← (xᴸ)ₖ₋₁ + ζbarₖ * d̅ₖ if solved_cg - @kaxpy!(n, ζbarₖ, d̅, x) + kaxpy!(n, ζbarₖ, d̅, x) end # Termination status @@ -343,7 +343,7 @@ kwargs_usymlq = (:transfer_to_usymcg, :atol, :rtol, :itmax, :timemax, :verbose, overtimed && (status = "time limit exceeded") # Update x - warm_start && @kaxpy!(n, one(FC), Δx, x) + warm_start && kaxpy!(n, one(FC), Δx, x) solver.warm_start = false # Update stats diff --git a/src/usymqr.jl b/src/usymqr.jl index 8ad92134f..812f30c81 100644 --- a/src/usymqr.jl +++ b/src/usymqr.jl @@ -145,12 +145,12 @@ kwargs_usymqr = (:atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, if warm_start mul!(r₀, A, Δx) - @kaxpby!(n, one(FC), b, -one(FC), r₀) + kaxpby!(n, one(FC), b, -one(FC), r₀) end # Initial solution x₀ and residual norm ‖r₀‖. - @kfill!(x, zero(FC)) - rNorm = @knrm2(m, r₀) + kfill!(x, zero(FC)) + rNorm = knorm(m, r₀) history && push!(rNorms, rNorm) if rNorm == 0 stats.niter = 0 @@ -170,16 +170,16 @@ kwargs_usymqr = (:atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, (verbose > 0) && @printf(iostream, "%5s %7s %8s %5s\n", "k", "‖rₖ‖", "‖Aᴴrₖ₋₁‖", "timer") kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %8s %.2fs\n", iter, rNorm, " ✗ ✗ ✗ ✗", ktimer(start_time)) - βₖ = @knrm2(m, r₀) # β₁ = ‖v₁‖ = ‖r₀‖ - γₖ = @knrm2(n, c) # γ₁ = ‖u₁‖ = ‖c‖ - @kfill!(vₖ₋₁, zero(FC)) # v₀ = 0 - @kfill!(uₖ₋₁, zero(FC)) # u₀ = 0 + βₖ = knorm(m, r₀) # β₁ = ‖v₁‖ = ‖r₀‖ + γₖ = knorm(n, c) # γ₁ = ‖u₁‖ = ‖c‖ + kfill!(vₖ₋₁, zero(FC)) # v₀ = 0 + kfill!(uₖ₋₁, zero(FC)) # u₀ = 0 vₖ .= r₀ ./ βₖ # v₁ = (b - Ax₀) / β₁ uₖ .= c ./ γₖ # u₁ = c / γ₁ cₖ₋₂ = cₖ₋₁ = cₖ = one(T) # Givens cosines used for the QR factorization of Tₖ₊₁.ₖ sₖ₋₂ = sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the QR factorization of Tₖ₊₁.ₖ - @kfill!(wₖ₋₂, zero(FC)) # Column k-2 of Wₖ = Uₖ(Rₖ)⁻¹ - @kfill!(wₖ₋₁, zero(FC)) # Column k-1 of Wₖ = Uₖ(Rₖ)⁻¹ + kfill!(wₖ₋₂, zero(FC)) # Column k-2 of Wₖ = Uₖ(Rₖ)⁻¹ + kfill!(wₖ₋₁, zero(FC)) # Column k-1 of Wₖ = Uₖ(Rₖ)⁻¹ ζbarₖ = βₖ # ζbarₖ is the last component of z̅ₖ = (Qₖ)ᴴβ₁e₁ # Stopping criterion. @@ -201,16 +201,16 @@ kwargs_usymqr = (:atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, mul!(q, A , uₖ) # Forms vₖ₊₁ : q ← Auₖ mul!(p, Aᴴ, vₖ) # Forms uₖ₊₁ : p ← Aᴴvₖ - @kaxpy!(m, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁ - @kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - βₖ * uₖ₋₁ + kaxpy!(m, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁ + kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - βₖ * uₖ₋₁ - αₖ = @kdot(m, vₖ, q) # αₖ = ⟨vₖ,q⟩ + αₖ = kdot(m, vₖ, q) # αₖ = ⟨vₖ,q⟩ - @kaxpy!(m, - αₖ , vₖ, q) # q ← q - αₖ * vₖ - @kaxpy!(n, -conj(αₖ), uₖ, p) # p ← p - ᾱₖ * uₖ + kaxpy!(m, - αₖ , vₖ, q) # q ← q - αₖ * vₖ + kaxpy!(n, -conj(αₖ), uₖ, p) # p ← p - ᾱₖ * uₖ - βₖ₊₁ = @knrm2(m, q) # βₖ₊₁ = ‖q‖ - γₖ₊₁ = @knrm2(n, p) # γₖ₊₁ = ‖p‖ + βₖ₊₁ = knorm(m, q) # βₖ₊₁ = ‖q‖ + γₖ₊₁ = knorm(n, p) # γₖ₊₁ = ‖p‖ # Update the QR factorization of Tₖ₊₁.ₖ = Qₖ [ Rₖ ]. # [ Oᵀ ] @@ -262,28 +262,28 @@ kwargs_usymqr = (:atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, # w₁ = u₁ / δ₁ if iter == 1 wₖ = wₖ₋₁ - @kaxpy!(n, one(FC), uₖ, wₖ) + kaxpy!(n, one(FC), uₖ, wₖ) wₖ .= wₖ ./ δₖ end # w₂ = (u₂ - λ₁w₁) / δ₂ if iter == 2 wₖ = wₖ₋₂ - @kaxpy!(n, -λₖ₋₁, wₖ₋₁, wₖ) - @kaxpy!(n, one(FC), uₖ, wₖ) + kaxpy!(n, -λₖ₋₁, wₖ₋₁, wₖ) + kaxpy!(n, one(FC), uₖ, wₖ) wₖ .= wₖ ./ δₖ end # wₖ = (uₖ - λₖ₋₁wₖ₋₁ - ϵₖ₋₂wₖ₋₂) / δₖ if iter ≥ 3 - @kscal!(n, -ϵₖ₋₂, wₖ₋₂) + kscal!(n, -ϵₖ₋₂, wₖ₋₂) wₖ = wₖ₋₂ - @kaxpy!(n, -λₖ₋₁, wₖ₋₁, wₖ) - @kaxpy!(n, one(FC), uₖ, wₖ) + kaxpy!(n, -λₖ₋₁, wₖ₋₁, wₖ) + kaxpy!(n, one(FC), uₖ, wₖ) wₖ .= wₖ ./ δₖ end # Compute solution xₖ. # xₖ ← xₖ₋₁ + ζₖ * wₖ - @kaxpy!(n, ζₖ, wₖ, x) + kaxpy!(n, ζₖ, wₖ, x) # Compute ‖rₖ‖ = |ζbarₖ₊₁|. rNorm = abs(ζbarₖ₊₁) @@ -294,19 +294,19 @@ kwargs_usymqr = (:atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, history && push!(AᴴrNorms, AᴴrNorm) # Compute uₖ₊₁ and uₖ₊₁. - @kcopy!(m, vₖ₋₁, vₖ) # vₖ₋₁ ← vₖ - @kcopy!(n, uₖ₋₁, uₖ) # uₖ₋₁ ← uₖ + kcopy!(m, vₖ₋₁, vₖ) # vₖ₋₁ ← vₖ + kcopy!(n, uₖ₋₁, uₖ) # uₖ₋₁ ← uₖ if βₖ₊₁ ≠ zero(T) - vₖ .= q ./ βₖ₊₁ # βₖ₊₁vₖ₊₁ = q + vₖ .= q ./ βₖ₊₁ # βₖ₊₁vₖ₊₁ = q end if γₖ₊₁ ≠ zero(T) - uₖ .= p ./ γₖ₊₁ # γₖ₊₁uₖ₊₁ = p + uₖ .= p ./ γₖ₊₁ # γₖ₊₁uₖ₊₁ = p end # Update directions for x. if iter ≥ 2 - @kswap(wₖ₋₂, wₖ₋₁) + @kswap!(wₖ₋₂, wₖ₋₁) end # Update sₖ₋₂, cₖ₋₂, sₖ₋₁, cₖ₋₁, ζbarₖ, γₖ, βₖ. @@ -339,7 +339,7 @@ kwargs_usymqr = (:atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, overtimed && (status = "time limit exceeded") # Update x - warm_start && @kaxpy!(n, one(FC), Δx, x) + warm_start && kaxpy!(n, one(FC), Δx, x) solver.warm_start = false # Update stats diff --git a/test/callback_utils.jl b/test/callback_utils.jl index 8cfdb42d5..9996fe929 100644 --- a/test/callback_utils.jl +++ b/test/callback_utils.jl @@ -35,7 +35,7 @@ function get_x_restarted_gmres!(solver::GmresSolver{T,FC,S}, A, # Form xₖ = N⁻¹Vₖyₖ for i = 1 : solver.inner_iter - Krylov.@kaxpy!(n, y2[i], V[i], x2) + Krylov.kaxpy!(n, y2[i], V[i], x2) end if !NisI p2 .= solver.p diff --git a/test/gpu/amd.jl b/test/gpu/amd.jl index 0c7dad24e..3b595f289 100644 --- a/test/gpu/amd.jl +++ b/test/gpu/amd.jl @@ -52,39 +52,39 @@ include("gpu.jl") c = rand(T) @testset "kdot -- $FC" begin - Krylov.@kdot(n, x, y) + Krylov.kdot(n, x, y) end @testset "kdotr -- $FC" begin - Krylov.@kdotr(n, x, y) + Krylov.kdotr(n, x, y) end - @testset "knrm2 -- $FC" begin - Krylov.@knrm2(n, x) + @testset "knorm -- $FC" begin + Krylov.knorm(n, x) end @testset "kaxpy! -- $FC" begin - Krylov.@kaxpy!(n, a, x, y) - Krylov.@kaxpy!(n, a2, x, y) + Krylov.kaxpy!(n, a, x, y) + Krylov.kaxpy!(n, a2, x, y) end @testset "kaxpby! -- $FC" begin - Krylov.@kaxpby!(n, a, x, b, y) - Krylov.@kaxpby!(n, a2, x, b, y) - Krylov.@kaxpby!(n, a, x, b2, y) - Krylov.@kaxpby!(n, a2, x, b2, y) + Krylov.kaxpby!(n, a, x, b, y) + Krylov.kaxpby!(n, a2, x, b, y) + Krylov.kaxpby!(n, a, x, b2, y) + Krylov.kaxpby!(n, a2, x, b2, y) end @testset "kcopy! -- $FC" begin - Krylov.@kcopy!(n, y, x) + Krylov.kcopy!(n, y, x) end @testset "kswap -- $FC" begin - Krylov.@kswap(x, y) + Krylov.@kswap!(x, y) end @testset "kref! -- $FC" begin - Krylov.@kref!(n, x, y, c, s) + Krylov.kref!(n, x, y, c, s) end @testset "conversion -- $FC" begin diff --git a/test/gpu/intel.jl b/test/gpu/intel.jl index 3f65774b5..c3ee22e4f 100644 --- a/test/gpu/intel.jl +++ b/test/gpu/intel.jl @@ -35,39 +35,39 @@ include("gpu.jl") c = rand(T) @testset "kdot -- $FC" begin - Krylov.@kdot(n, x, y) + Krylov.kdot(n, x, y) end @testset "kdotr -- $FC" begin - Krylov.@kdotr(n, x, y) + Krylov.kdotr(n, x, y) end - @testset "knrm2 -- $FC" begin - Krylov.@knrm2(n, x) + @testset "knorm -- $FC" begin + Krylov.knorm(n, x) end @testset "kaxpy! -- $FC" begin - Krylov.@kaxpy!(n, a, x, y) - Krylov.@kaxpy!(n, a2, x, y) + Krylov.kaxpy!(n, a, x, y) + Krylov.kaxpy!(n, a2, x, y) end @testset "kaxpby! -- $FC" begin - Krylov.@kaxpby!(n, a, x, b, y) - Krylov.@kaxpby!(n, a2, x, b, y) - Krylov.@kaxpby!(n, a, x, b2, y) - Krylov.@kaxpby!(n, a2, x, b2, y) + Krylov.kaxpby!(n, a, x, b, y) + Krylov.kaxpby!(n, a2, x, b, y) + Krylov.kaxpby!(n, a, x, b2, y) + Krylov.kaxpby!(n, a2, x, b2, y) end @testset "kcopy! -- $FC" begin - Krylov.@kcopy!(n, y, x) + Krylov.kcopy!(n, y, x) end @testset "kswap -- $FC" begin - Krylov.@kswap(x, y) + Krylov.@kswap!(x, y) end @testset "kref! -- $FC" begin - Krylov.@kref!(n, x, y, c, s) + Krylov.kref!(n, x, y, c, s) end @testset "conversion -- $FC" begin diff --git a/test/gpu/metal.jl b/test/gpu/metal.jl index 42be0023d..5c90845c4 100644 --- a/test/gpu/metal.jl +++ b/test/gpu/metal.jl @@ -35,39 +35,39 @@ include("gpu.jl") c = rand(T) @testset "kdot -- $FC" begin - Krylov.@kdot(n, x, y) + Krylov.kdot(n, x, y) end @testset "kdotr -- $FC" begin - Krylov.@kdotr(n, x, y) + Krylov.kdotr(n, x, y) end - @testset "knrm2 -- $FC" begin - Krylov.@knrm2(n, x) + @testset "knorm -- $FC" begin + Krylov.knorm(n, x) end @testset "kaxpy! -- $FC" begin - Krylov.@kaxpy!(n, a, x, y) - Krylov.@kaxpy!(n, a2, x, y) + Krylov.kaxpy!(n, a, x, y) + Krylov.kaxpy!(n, a2, x, y) end @testset "kaxpby! -- $FC" begin - Krylov.@kaxpby!(n, a, x, b, y) - Krylov.@kaxpby!(n, a2, x, b, y) - Krylov.@kaxpby!(n, a, x, b2, y) - Krylov.@kaxpby!(n, a2, x, b2, y) + Krylov.kaxpby!(n, a, x, b, y) + Krylov.kaxpby!(n, a2, x, b, y) + Krylov.kaxpby!(n, a, x, b2, y) + Krylov.kaxpby!(n, a2, x, b2, y) end @testset "kcopy! -- $FC" begin - Krylov.@kcopy!(n, y, x) + Krylov.kcopy!(n, y, x) end @testset "kswap -- $FC" begin - Krylov.@kswap(x, y) + Krylov.@kswap!(x, y) end @testset "kref! -- $FC" begin - Krylov.@kref!(n, x, y, c, s) + Krylov.kref!(n, x, y, c, s) end @testset "conversion -- $FC" begin diff --git a/test/gpu/nvidia.jl b/test/gpu/nvidia.jl index c691e8a4e..980234387 100644 --- a/test/gpu/nvidia.jl +++ b/test/gpu/nvidia.jl @@ -135,39 +135,39 @@ include("gpu.jl") c = rand(T) @testset "kdot -- $FC" begin - Krylov.@kdot(n, x, y) + Krylov.kdot(n, x, y) end @testset "kdotr -- $FC" begin - Krylov.@kdotr(n, x, y) + Krylov.kdotr(n, x, y) end - @testset "knrm2 -- $FC" begin - Krylov.@knrm2(n, x) + @testset "knorm -- $FC" begin + Krylov.knorm(n, x) end @testset "kaxpy! -- $FC" begin - Krylov.@kaxpy!(n, a, x, y) - Krylov.@kaxpy!(n, a2, x, y) + Krylov.kaxpy!(n, a, x, y) + Krylov.kaxpy!(n, a2, x, y) end @testset "kaxpby! -- $FC" begin - Krylov.@kaxpby!(n, a, x, b, y) - Krylov.@kaxpby!(n, a2, x, b, y) - Krylov.@kaxpby!(n, a, x, b2, y) - Krylov.@kaxpby!(n, a2, x, b2, y) + Krylov.kaxpby!(n, a, x, b, y) + Krylov.kaxpby!(n, a2, x, b, y) + Krylov.kaxpby!(n, a, x, b2, y) + Krylov.kaxpby!(n, a2, x, b2, y) end @testset "kcopy! -- $FC" begin - Krylov.@kcopy!(n, y, x) + Krylov.kcopy!(n, y, x) end @testset "kswap -- $FC" begin - Krylov.@kswap(x, y) + Krylov.@kswap!(x, y) end @testset "kref! -- $FC" begin - Krylov.@kref!(n, x, y, c, s) + Krylov.kref!(n, x, y, c, s) end @testset "conversion -- $FC" begin diff --git a/test/test_aux.jl b/test/test_aux.jl index 5e3d5cf17..114461bf1 100644 --- a/test/test_aux.jl +++ b/test/test_aux.jl @@ -180,27 +180,27 @@ a2 = rand(T) b2 = rand(T) - Krylov.@kdot(n, x, y) + Krylov.kdot(n, x, y) - Krylov.@kdotr(n, x, y) + Krylov.kdotr(n, x, y) - Krylov.@knrm2(n, x) + Krylov.knorm(n, x) - Krylov.@kaxpy!(n, a, x, y) - Krylov.@kaxpy!(n, a2, x, y) + Krylov.kaxpy!(n, a, x, y) + Krylov.kaxpy!(n, a2, x, y) - Krylov.@kaxpby!(n, a, x, b, y) - Krylov.@kaxpby!(n, a2, x, b, y) - Krylov.@kaxpby!(n, a, x, b2, y) - Krylov.@kaxpby!(n, a2, x, b2, y) + Krylov.kaxpby!(n, a, x, b, y) + Krylov.kaxpby!(n, a2, x, b, y) + Krylov.kaxpby!(n, a, x, b2, y) + Krylov.kaxpby!(n, a2, x, b2, y) - Krylov.@kcopy!(n, y, x) + Krylov.kcopy!(n, y, x) - Krylov.@kfill!(x, a) + Krylov.kfill!(x, a) - Krylov.@kswap(x, y) + Krylov.@kswap!(x, y) - Krylov.@kref!(n, x, y, c, s) + Krylov.kref!(n, x, y, c, s) end end end