Code
using DrWatson
@quickactivate
include(srcdir("main.jl"));
References: - Associations.jl
Notes:
ChatterjeeCorrelation
, association(x, y) == association(x, log(y))
┌ Warning: automatically converting Arrow.Timestamp with precision = MICROSECOND to `DateTime` which only supports millisecond precision; conversion may be lossy; to avoid converting, pass `Arrow.Table(source; convert=false)
└ @ Arrow /Users/zijin/.julia/packages/Arrow/5pHqZ/src/eltypes.jl:273
┌ Warning: automatically converting Arrow.Timestamp with precision = NANOSECOND to `DateTime` which only supports millisecond precision; conversion may be lossy; to avoid converting, pass `Arrow.Table(source; convert=false)
└ @ Arrow /Users/zijin/.julia/packages/Arrow/5pHqZ/src/eltypes.jl:273
using Associations
DEFAULT_ESTIMATOR = ChatterjeeCorrelation
DistanceCorrelation # very time consuming
function corr_test(df, xsym, ysym, zsym=missing; estimator = DEFAULT_ESTIMATOR, kwargs...)
info = string("x: ", xsym, " y: ", ysym)
x = df[!, xsym]
y = df[!, ysym]
if ismissing(zsym)
sum = association(estimator(; kwargs...), x, y)
else
info = string(info, " z: ", zsym)
z = df[!, zsym]
sum = association(estimator(; kwargs...), x, y, z)
end
return info, estimator, sum
end
corr_test (generic function with 2 methods)
function corr_test_l_j(df)
pairs = (:j0_k_norm, :L_k_norm), (:j0_k, :L_k)
log_pairs = (:j0_k_norm_log, :L_k_norm_log), (:j0_k_log, :L_k_log)
for (x, y) in pairs
corr_test(df, x, y; estimator = ChatterjeeCorrelation) |> println
end
for (x, y) in (pairs..., log_pairs...)
corr_test(df, x, y; estimator = PearsonCorrelation) |> println
end
end
println("Juno data")
corr_test_l_j(jno_df)
println("Wind data")
corr_test_l_j(wind_df)
Juno data
("x: j0_k_norm y: L_k_norm", ChatterjeeCorrelation, 0.469578733320144)
("x: j0_k y: L_k", ChatterjeeCorrelation, 0.4084058631919789)
("x: j0_k_norm y: L_k_norm", PearsonCorrelation, -0.013730210216596121)
("x: j0_k y: L_k", PearsonCorrelation, -0.024698316436114216)
("x: j0_k_norm_log y: L_k_norm_log", PearsonCorrelation, -0.845015541927149)
("x: j0_k_log y: L_k_log", PearsonCorrelation, -0.7979749527198012)
Wind data
("x: j0_k_norm y: L_k_norm", ChatterjeeCorrelation, 0.4843330252682627)
("x: j0_k y: L_k", ChatterjeeCorrelation, 0.43044080032864773)
("x: j0_k_norm y: L_k_norm", PearsonCorrelation, -0.002752478482145651)
("x: j0_k y: L_k", PearsonCorrelation, -0.00431558633673097)
("x: j0_k_norm_log y: L_k_norm_log", PearsonCorrelation, -0.8466345313192889)
("x: j0_k_log y: L_k_log", PearsonCorrelation, -0.8089190956799742)
Juno data
("x: j0_k_norm y: β", ChatterjeeCorrelation, 0.006972986251434499)
("x: j0_k y: β", ChatterjeeCorrelation, 0.0008875786583665279)
("x: L_k y: β", ChatterjeeCorrelation, 0.0014486692398745227)
("x: L_k_norm y: β", ChatterjeeCorrelation, 0.021791037531811774)
Wind data
("x: j0_k_norm y: β", ChatterjeeCorrelation, 0.0203298757692707)
("x: j0_k y: β", ChatterjeeCorrelation, 0.006315672406412509)
("x: L_k y: β", ChatterjeeCorrelation, -0.00022850771324800512)
("x: L_k_norm y: β", ChatterjeeCorrelation, 0.0016102421092075714)
function corr_test_τ(df; pairs=β_pairs, estimator = DEFAULT_ESTIMATOR)
τ = waiting_time(df)
v = norm.(eachrow(df[:, [:v_x, :v_y, :v_z]]), 2)
β_mean = (df[1:end-1, :β] + df[2:end, :β]) ./ 2
v_mean = (v[1:end-1] + v[2:end]) ./ 2
τ_β = association(estimator(), τ, β_mean)
τ_v = association(estimator(), τ, v_mean)
return :τ_β => τ_β, :τ_v => τ_v
end
wind_df_s1 = subset(wind_df, :time => t -> year.(t) .== 2013);
wind_df_s2 = subset(wind_df, :time => t -> month.(t) .< 2);
corr_test_τ(wind_df) |> println
corr_test_τ(jno_df) |> println
corr_test_τ(wind_df_s1) |> println
corr_test_τ(wind_df_s2) |> println
(:τ_β => 0.009166807640753949, :τ_v => 0.01218960529470925)
(:τ_β => 0.0003285049564558662, :τ_v => 0.014788469613764899)
(:τ_β => 0.01696468852507227, :τ_v => -4.855370740552978e-5)
(:τ_β => 0.006330369529195079, :τ_v => 0.010935013228649093)
using Dates
wind_day_df = @chain wind_df begin
@transform(:groupid = yearmonthday.(:time))
groupby(:groupid)
combine(
nrow => :n,
:v_x => mean ∘ ByRow(abs) => :v_mean,
)
@transform(:n = float(:n))
end
corr_test(wind_day_df, :n, :v_mean) |> println
corr_test(wind_day_df, :n, :v_mean; estimator = PearsonCorrelation) |> println
("x: n y: v_mean", ChatterjeeCorrelation, 0.002637848961297262)
("x: n y: v_mean", PearsonCorrelation, -0.12266454740087952)
┌ Info: Saved /Users/zijin/projects/ids_spatial_evolution_juno/figures/ocr/ocr_wind_daily_v.svg
└ @ Beforerr /Users/zijin/.julia/dev/Beforerr/src/utils/makie.jl:48
using Statistics
function plot_cor(df, cols; colormap=:RdBu, colorrange=(-1, 1))
M = cor(Matrix(df[!, cols])) # correlation matrix
(n, m) = size(M)
axis = (
xticks=(1:m, string.(cols)),
yticks=(1:m, string.(cols)),
)
fig, axis, plot = heatmap(M; axis=axis, colormap=colormap, colorrange=colorrange)
Colorbar(fig[1, 2], plot)
[text!(axis, "$(round(M[i,j],digits=3))", position=(i, j), align=(:center, :center)) for i in 1:n for j in 1:m]
fig
end
plot_cor (generic function with 1 method)