-
Notifications
You must be signed in to change notification settings - Fork 41
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Make covariance and correlation work for iterators, skipmissing in particular. #34
base: master
Are you sure you want to change the base?
Changes from 6 commits
1cdf046
f3e9641
2f9c4f8
52c18ea
4620247
b86ddba
0221557
e3bc3cc
3493ed2
b940ae1
8b49745
2b28908
e42c0b0
cb3020c
36734bf
2f1c404
4279703
b9f8f96
14c5701
11bd8f5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
|
@@ -495,6 +495,7 @@ unscaled_covzm(x::AbstractMatrix, y::AbstractMatrix, vardim::Int) = | |||||||
|
||||||||
# covzm (with centered data) | ||||||||
|
||||||||
nalimilan marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||
covzm(itr::Any; corrected::Bool = true) = covzm(collect(itr); corrected = corrected) | ||||||||
covzm(x::AbstractVector; corrected::Bool=true) = unscaled_covzm(x) / (length(x) - Int(corrected)) | ||||||||
function covzm(x::AbstractMatrix, vardim::Int=1; corrected::Bool=true) | ||||||||
C = unscaled_covzm(x, vardim) | ||||||||
|
@@ -504,6 +505,8 @@ function covzm(x::AbstractMatrix, vardim::Int=1; corrected::Bool=true) | |||||||
A .= A .* b | ||||||||
return A | ||||||||
end | ||||||||
covzm(x::Any, y::Any; corrected::Bool = true) = | ||||||||
covzm(collect(x), collect(y); corrected = corrected) | ||||||||
covzm(x::AbstractVector, y::AbstractVector; corrected::Bool=true) = | ||||||||
unscaled_covzm(x, y) / (length(x) - Int(corrected)) | ||||||||
function covzm(x::AbstractVecOrMat, y::AbstractVecOrMat, vardim::Int=1; corrected::Bool=true) | ||||||||
|
@@ -518,22 +521,32 @@ end | |||||||
# covm (with provided mean) | ||||||||
## Use map(t -> t - xmean, x) instead of x .- xmean to allow for Vector{Vector} | ||||||||
## which can't be handled by broadcast | ||||||||
covm(itr::Any, itrmean; corrected::Bool=true) = | ||||||||
covm(collect(itr), itrmean; corrected=corrected) | ||||||||
covm(x::AbstractVector, xmean; corrected::Bool=true) = | ||||||||
covzm(map(t -> t - xmean, x); corrected=corrected) | ||||||||
covm(x::AbstractMatrix, xmean, vardim::Int=1; corrected::Bool=true) = | ||||||||
covzm(x .- xmean, vardim; corrected=corrected) | ||||||||
covm(x::Any, xmean, y::Any, ymean; corrected::Bool=true) = | ||||||||
covzm(map(t -> t - xmean, x), map(t -> t - ymean, y); corrected=corrected) | ||||||||
covm(x::AbstractVector, xmean, y::AbstractVector, ymean; corrected::Bool=true) = | ||||||||
covzm(map(t -> t - xmean, x), map(t -> t - ymean, y); corrected=corrected) | ||||||||
covm(x::AbstractVecOrMat, xmean, y::AbstractVecOrMat, ymean, vardim::Int=1; corrected::Bool=true) = | ||||||||
covzm(x .- xmean, y .- ymean, vardim; corrected=corrected) | ||||||||
|
||||||||
# cov (API) | ||||||||
""" | ||||||||
cov(x::AbstractVector; corrected::Bool=true) | ||||||||
cov(itr::Any; corrected::Bool=true) | ||||||||
pdeffebach marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||
|
||||||||
Compute the variance of the vector `x`. If `corrected` is `true` (the default) then the sum | ||||||||
is scaled with `n-1`, whereas the sum is scaled with `n` if `corrected` is `false` where `n = length(x)`. | ||||||||
Compute the variance of the iterator `itr`. If `corrected` is `true` (the default) then the sum | ||||||||
is scaled with `n-1`, whereas the sum is scaled with `n` if `corrected` is `false` where | ||||||||
``n`` is the number of elements. | ||||||||
""" | ||||||||
function cov(itr::Any; corrected::Bool=true) | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we want to allow 0 or more than 2 dimensional arrays here? |
||||||||
x = collect(itr) | ||||||||
meanx = mean(x) | ||||||||
covzm(map!(t -> t - meanx, x, x); corrected=corrected) | ||||||||
end | ||||||||
cov(x::AbstractVector; corrected::Bool=true) = covm(x, mean(x); corrected=corrected) | ||||||||
|
||||||||
""" | ||||||||
|
@@ -546,14 +559,24 @@ if `corrected` is `false` where `n = size(X, dims)`. | |||||||
cov(X::AbstractMatrix; dims::Int=1, corrected::Bool=true) = | ||||||||
covm(X, _vmean(X, dims), dims; corrected=corrected) | ||||||||
|
||||||||
|
||||||||
pdeffebach marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||
""" | ||||||||
cov(x::AbstractVector, y::AbstractVector; corrected::Bool=true) | ||||||||
cov(x::Any, y::Any; corrected::Bool=true) | ||||||||
|
||||||||
Compute the covariance between the vectors `x` and `y`. If `corrected` is `true` (the | ||||||||
Compute the covariance between the iterators `x` and `y`. If `corrected` is `true` (the | ||||||||
default), computes ``\\frac{1}{n-1}\\sum_{i=1}^n (x_i-\\bar x) (y_i-\\bar y)^*`` where | ||||||||
``*`` denotes the complex conjugate and `n = length(x) = length(y)`. If `corrected` is | ||||||||
``*`` denotes the complex conjugate and ``n`` the number of elements. If `corrected` is | ||||||||
`false`, computes ``\\frac{1}{n}\\sum_{i=1}^n (x_i-\\bar x) (y_i-\\bar y)^*``. | ||||||||
""" | ||||||||
function cov(x::Any, y::Any; corrected::Bool=true) | ||||||||
cx = collect(x) | ||||||||
cy = collect(y) | ||||||||
meanx = mean(cx) | ||||||||
meany = mean(cy) | ||||||||
dx = map!(t -> t - meanx, cx, cx) | ||||||||
dy = map!(t -> t - meany, cy, cy) | ||||||||
covzm(dx, dy; corrected=corrected) | ||||||||
end | ||||||||
cov(x::AbstractVector, y::AbstractVector; corrected::Bool=true) = | ||||||||
covm(x, mean(x), y, mean(y); corrected=corrected) | ||||||||
|
||||||||
|
@@ -630,7 +653,13 @@ function cov2cor!(C::AbstractMatrix, xsd::AbstractArray, ysd::AbstractArray) | |||||||
end | ||||||||
|
||||||||
# corzm (non-exported, with centered data) | ||||||||
|
||||||||
function corzm(itr::Any) | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you put this code in an internal method which will be called by all functions that need it? It's repeated three times. Also:
Suggested change
|
||||||||
if Base.IteratorEltype(itr) isa Base.HasEltype && isconcrete(eltype(itr)) | ||||||||
return one(real(eltype(itr))) | ||||||||
else | ||||||||
return one(real(eltype(collect(itr)))) | ||||||||
end | ||||||||
end | ||||||||
corzm(x::AbstractVector{T}) where {T} = one(real(T)) | ||||||||
function corzm(x::AbstractMatrix, vardim::Int=1) | ||||||||
c = unscaled_covzm(x, vardim) | ||||||||
|
@@ -644,9 +673,16 @@ corzm(x::AbstractMatrix, y::AbstractMatrix, vardim::Int=1) = | |||||||
cov2cor!(unscaled_covzm(x, y, vardim), sqrt!(sum(abs2, x, dims=vardim)), sqrt!(sum(abs2, y, dims=vardim))) | ||||||||
|
||||||||
# corm | ||||||||
|
||||||||
function corm(itr::Any, itrmean) | ||||||||
pdeffebach marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||
if Base.IteratorEltype(itr) isa Base.HasEltype && isconcrete(eltype(itr)) | ||||||||
return one(real(eltype(itr))) | ||||||||
else | ||||||||
return one(real(eltype(collect(itr)))) | ||||||||
end | ||||||||
end | ||||||||
corm(x::AbstractVector{T}, xmean) where {T} = one(real(T)) | ||||||||
corm(x::AbstractMatrix, xmean, vardim::Int=1) = corzm(x .- xmean, vardim) | ||||||||
corm(x::Any, mx, y::Any, my) = corm(collect(x), mx, collect(y), my) | ||||||||
function corm(x::AbstractVector, mx, y::AbstractVector, my) | ||||||||
require_one_based_indexing(x, y) | ||||||||
n = length(x) | ||||||||
|
@@ -675,10 +711,17 @@ corm(x::AbstractVecOrMat, xmean, y::AbstractVecOrMat, ymean, vardim::Int=1) = | |||||||
|
||||||||
# cor | ||||||||
""" | ||||||||
cor(x::AbstractVector) | ||||||||
cor(itr::Any) | ||||||||
|
||||||||
Return the number one. | ||||||||
""" | ||||||||
function cor(itr::Any) | ||||||||
if Base.IteratorEltype(itr) isa Base.HasEltype && isconcrete(eltype(itr)) | ||||||||
return one(real(eltype(itr))) | ||||||||
else | ||||||||
return one(real(eltype(collect(itr)))) | ||||||||
end | ||||||||
end | ||||||||
cor(x::AbstractVector) = one(real(eltype(x))) | ||||||||
|
||||||||
""" | ||||||||
|
@@ -688,6 +731,18 @@ Compute the Pearson correlation matrix of the matrix `X` along the dimension `di | |||||||
""" | ||||||||
cor(X::AbstractMatrix; dims::Int=1) = corm(X, _vmean(X, dims), dims) | ||||||||
|
||||||||
""" | ||||||||
cor(x::AbstractVector, y::AbstractVector) | ||||||||
|
||||||||
Compute the Pearson correlation between the vectors `x` and `y`. | ||||||||
pdeffebach marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||
""" | ||||||||
function cor(x::Any, y::Any) | ||||||||
cx = collect(x) | ||||||||
cy = collect(y) | ||||||||
|
||||||||
corm(cx, mean(cx), cy, mean(cy)) | ||||||||
end | ||||||||
|
||||||||
""" | ||||||||
cor(x::AbstractVector, y::AbstractVector) | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remove this docstring which is a special case of the previous one. |
||||||||
|
||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Intentional?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes. It's a hack to make sure julia knows to load this folder, it's described here for Pkg.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Normally the Travis script does that automatically, so you can revert this: https://github.com/JuliaLang/Statistics.jl/blob/master/.travis.yml#L24
Though you need it to run tests locally.