Association analysis

References: - Associations.jl

Notes:

Code
using DrWatson
@quickactivate
include(srcdir("main.jl"));
Code
wind_df = load_wind();
jno_df = load_jno();
┌ Warning: automatically converting Arrow.Timestamp with precision = MICROSECOND to `DateTime` which only supports millisecond precision; conversion may be lossy; to avoid converting, pass `Arrow.Table(source; convert=false)
└ @ Arrow /Users/zijin/.julia/packages/Arrow/5pHqZ/src/eltypes.jl:273
┌ Warning: automatically converting Arrow.Timestamp with precision = NANOSECOND to `DateTime` which only supports millisecond precision; conversion may be lossy; to avoid converting, pass `Arrow.Table(source; convert=false)
└ @ Arrow /Users/zijin/.julia/packages/Arrow/5pHqZ/src/eltypes.jl:273
Code
columns2log = [:L_k, :j0_k, :L_k_norm, :j0_k_norm, :β]
logize!(df, columns = columns2log) = transform!(df, columns .=> ByRow(log))

jno_df |> logize!;
wind_df |> logize!;
Code
using Associations

DEFAULT_ESTIMATOR = ChatterjeeCorrelation
DistanceCorrelation # very time consuming
function corr_test(df, xsym, ysym, zsym=missing; estimator = DEFAULT_ESTIMATOR, kwargs...)
    info = string("x: ", xsym, " y: ", ysym)

    x = df[!, xsym]
    y = df[!, ysym]
    if ismissing(zsym)
        sum = association(estimator(; kwargs...), x, y)
    else
        info = string(info, " z: ", zsym)
        z = df[!, zsym]
        sum = association(estimator(; kwargs...), x, y, z)
    end
    return info, estimator, sum
end
corr_test (generic function with 2 methods)
Code
function corr_test_l_j(df) 
    pairs = (:j0_k_norm, :L_k_norm), (:j0_k, :L_k)
    log_pairs = (:j0_k_norm_log, :L_k_norm_log), (:j0_k_log, :L_k_log)

    for (x, y) in pairs
        corr_test(df, x, y; estimator = ChatterjeeCorrelation) |> println
    end

    for (x, y) in (pairs..., log_pairs...)
        corr_test(df, x, y; estimator = PearsonCorrelation) |> println
    end
end

println("Juno data")
corr_test_l_j(jno_df)
println("Wind data")
corr_test_l_j(wind_df)
Juno data
("x: j0_k_norm y: L_k_norm", ChatterjeeCorrelation, 0.469578733320144)
("x: j0_k y: L_k", ChatterjeeCorrelation, 0.4084058631919789)
("x: j0_k_norm y: L_k_norm", PearsonCorrelation, -0.013730210216596121)
("x: j0_k y: L_k", PearsonCorrelation, -0.024698316436114216)
("x: j0_k_norm_log y: L_k_norm_log", PearsonCorrelation, -0.845015541927149)
("x: j0_k_log y: L_k_log", PearsonCorrelation, -0.7979749527198012)
Wind data
("x: j0_k_norm y: L_k_norm", ChatterjeeCorrelation, 0.4843330252682627)
("x: j0_k y: L_k", ChatterjeeCorrelation, 0.43044080032864773)
("x: j0_k_norm y: L_k_norm", PearsonCorrelation, -0.002752478482145651)
("x: j0_k y: L_k", PearsonCorrelation, -0.00431558633673097)
("x: j0_k_norm_log y: L_k_norm_log", PearsonCorrelation, -0.8466345313192889)
("x: j0_k_log y: L_k_log", PearsonCorrelation, -0.8089190956799742)
Test dependence on β
β_pairs = (:j0_k_norm, :β), (:j0_k, :β), (:L_k, :β), (:L_k_norm, :β)
function corr_test_β(df; pairs=β_pairs)
    for (x, y) in pairs
        corr_test(df, x, y) |> println
    end
end

println("Juno data")
corr_test_β(jno_df)
println("Wind data")
corr_test_β(wind_df)
Juno data
("x: j0_k_norm y: β", ChatterjeeCorrelation, 0.006972986251434499)
("x: j0_k y: β", ChatterjeeCorrelation, 0.0008875786583665279)
("x: L_k y: β", ChatterjeeCorrelation, 0.0014486692398745227)
("x: L_k_norm y: β", ChatterjeeCorrelation, 0.021791037531811774)
Wind data
("x: j0_k_norm y: β", ChatterjeeCorrelation, 0.0203298757692707)
("x: j0_k y: β", ChatterjeeCorrelation, 0.006315672406412509)
("x: L_k y: β", ChatterjeeCorrelation, -0.00022850771324800512)
("x: L_k_norm y: β", ChatterjeeCorrelation, 0.0016102421092075714)
Test waiting time dependence
function corr_test_τ(df; pairs=β_pairs, estimator = DEFAULT_ESTIMATOR)
    τ = waiting_time(df)
    v = norm.(eachrow(df[:, [:v_x, :v_y, :v_z]]), 2)
    β_mean = (df[1:end-1, :β] + df[2:end, :β]) ./ 2
    v_mean = (v[1:end-1] + v[2:end]) ./ 2
    
    τ_β = association(estimator(), τ, β_mean)
    τ_v = association(estimator(), τ, v_mean)
    return :τ_β => τ_β, :τ_v => τ_v
end

wind_df_s1 = subset(wind_df, :time => t -> year.(t) .== 2013);
wind_df_s2 = subset(wind_df, :time => t -> month.(t) .< 2);

corr_test_τ(wind_df) |> println
corr_test_τ(jno_df) |> println
corr_test_τ(wind_df_s1) |> println
corr_test_τ(wind_df_s2) |> println
(:τ_β => 0.009166807640753949, :τ_v => 0.01218960529470925)
(:τ_β => 0.0003285049564558662, :τ_v => 0.014788469613764899)
(:τ_β => 0.01696468852507227, :τ_v => -4.855370740552978e-5)
(:τ_β => 0.006330369529195079, :τ_v => 0.010935013228649093)
Code
using Dates

wind_day_df = @chain wind_df begin
    @transform(:groupid = yearmonthday.(:time))
    groupby(:groupid)
    combine(
        nrow => :n,
        :v_x => mean  ByRow(abs) => :v_mean,
    )
    @transform(:n = float(:n))
end

corr_test(wind_day_df, :n, :v_mean) |> println
corr_test(wind_day_df, :n, :v_mean; estimator = PearsonCorrelation) |> println
("x: n y: v_mean", ChatterjeeCorrelation, 0.002637848961297262)
("x: n y: v_mean", PearsonCorrelation, -0.12266454740087952)
Code
# Distribution of daily averages of DD per day over v with the fit of an linear function.
specs = data(wind_day_df) * mapping(:v_mean, :n) * (visual(Scatter) + AlgebraOfGraphics.linear())
draw(specs)
easy_save("ocr/ocr_wind_daily_v")
┌ Info: Saved /Users/zijin/projects/ids_spatial_evolution_juno/figures/ocr/ocr_wind_daily_v.svg
└ @ Beforerr /Users/zijin/.julia/dev/Beforerr/src/utils/makie.jl:48

Correlation

Code
using Statistics

function plot_cor(df, cols; colormap=:RdBu, colorrange=(-1, 1))
    M = cor(Matrix(df[!, cols]))       # correlation matrix

    (n, m) = size(M)
    axis = (
        xticks=(1:m, string.(cols)),
        yticks=(1:m, string.(cols)),
    )
    fig, axis, plot = heatmap(M; axis=axis, colormap=colormap, colorrange=colorrange)
    Colorbar(fig[1, 2], plot)
    [text!(axis, "$(round(M[i,j],digits=3))", position=(i, j), align=(:center, :center)) for i in 1:n for j in 1:m]
    fig
end
plot_cor (generic function with 1 method)
Code
plot_cor(jno_df, [:r, :β, :L_k_norm, :j0_k_norm, :L_k, :j0_k]) |> display
plot_cor(jno_df, [:r, :β, :L_k_log, :j0_k_log]) |> display
plot_cor(jno_df, [:r, :β, :L_k_norm_log, :j0_k_norm_log])