stats

In [0]:

using Dates
using StatsBase, Statistics
using PSPAnisotropy
import Discontinuity as DC
using Discontinuity: var_mapping, compute_params!, filter_low_mva_quality
using Discontinuity: compute_Alfvenicity_params!, Alfven_velocity
using DimensionalData
using DataFrames, DataFramesMeta
using CairoMakie
using Beforerr: _theme_legend, easy_save, hide_facet_labels!, add_labels!
using Beforerr
using CategoricalArrays
using AlgebraOfGraphics
using AlgebraOfGraphics: data
import AlgebraOfGraphics as AoG
using LinearAlgebra
using Accessors
using Unitful
Unitful.preferunits(u"nA", u"km")

includet("../src/plot.jl")

# https://github.com/MakieOrg/AlgebraOfGraphics.jl/issues/464
# foreach([:log10, :log2, :log]) do f
# @eval import Base: $f
# @eval $f(x::Unitful.Quantity) = $f(ustrip(x))
# end
set_Z_theme!()

In [0]:

𝐧 = :mva
𝒎 = var_mapping(; 𝐧)

taus = Second.(2 .^ (1:6))
# taus = Second.(2 .^ (0:6))
wdf = workload(taus)

_float64(x) = passmissing(Float64)(x)

A_lens = @optic _.model.A

df = @chain wdf begin
    DC.compute_Alfvenicity_params!()
    DC.compute_params!()
    @transform @astable begin
        :Δ𝐁 = @. norm(:B_us - :B_ds)
        :Δ𝐁_fit = @. 2 * passmissing(A_lens)(:model) |> abs
        :Δ𝐁_norm = :Δ𝐁 ./ :B_mag
        :Δ𝐁_fit_norm = :Δ𝐁_fit ./ :B_mag
        :B_n_mva_norm = abs.(:B_n_mva ./ :B_mag)
        :duration = _float64.(ustrip.(:duration .|> u"s"))
        :L_n_mva = _float64.(ustrip.(:L_n_mva))
        :J_m_max_mva = _float64.(ustrip.(:J_m_max_mva))
        :L_n_cross = _float64.(ustrip.(:L_n_cross))
        :J_m_max_cross = _float64.(ustrip.(:J_m_max_cross))
    end
    DC.classify!(:B_n_mva_norm)
end

# Count the number of events for each type
df_event_counts = @chain df begin
    groupby(:type)
    combine(nrow)
end

thm_df = @rsubset(df, :id == "THEMIS"; view=true)
thm_df.id .= "ARTEMIS"
df.id = categorical(df.id)
levels!(df.id, ["PSP", "ARTEMIS", "Wind"])

subset_𝐧(df, 𝐧) = 𝐧 == :mva ? filter_low_mva_quality(df) : df
const datalimits = x -> quantile(x, [0.01, 0.99])

Associations and independence test

In [0]:


println("\n" * "="^60)
println("ASSOCIATION ANALYSIS FOR ALFVÉNICITY")
println("="^60)

# Define the variable pairs you want to analyze with transformations embedded
alfvenicity_pairs = [
    (:duration, :Q_sonnerup => abs, "Duration vs Alfvénicity (Q_sonnerup)"),
    (:duration, :V_l_ratio, "Duration vs Velocity Ratio (V_l_ratio)"),
    (:B_n_mva_norm, :Q_sonnerup => abs, "Magnetic Field Normal vs Alfvénicity"),
    (:σ_c, :Q_sonnerup, "Cross helicity vs Alfvénicity"),
    (:σ_r, :Q_sonnerup),
    (:β, :Q_sonnerup => abs),
    (:A_He => x -> ismissing(x) ? missing : only(x), :Q_sonnerup => abs, "Helium Abundance vs Alfvénicity"),
    (:J_m_max_mva_norm, :Q_sonnerup => abs, "Current density vs Alfvénicity"),
    (:L_n_mva_norm, :Q_sonnerup => abs, "Thickness vs Alfvénicity"),
    (:A_He => x -> ismissing(x) ? missing : only(x), :J_m_max_mva_norm),
    (:A_He => x -> ismissing(x) ? missing : only(x), :L_n_mva_norm),
    (:β, :J_m_max_mva_norm),
    (:β, :L_n_mva_norm),
    (:β, :A_He),
]

# Run overall analysis
println("OVERALL ASSOCIATION ANALYSIS")
println("="^50)
analyze_associations(df, alfvenicity_pairs)

# Grouped analysis
# println("GROUPED ASSOCIATION ANALYSIS (by $(group_by))")
println("="^50)
analyze_associations(df, alfvenicity_pairs; group_by=:id)

Association Measure Interpretation:
Values range from 0 (no association) to 1 (perfect association):
- 0.0 - 0.1: Very weak association
- 0.1 - 0.3: Weak association  
- 0.3 - 0.5: Moderate association
- 0.5 - 0.7: Strong association
- 0.7 - 1.0: Very strong association

Association Methods Used:
1. Distance Correlation: Non-parametric measure detecting linear & nonlinear dependencies
2. Pearson Correlation: Standard linear correlation coefficient  
3. Spearman Correlation: Rank-based correlation (monotonic relationships)
4. Chatterjee Correlation: New robust measure for general dependencies
5. Mutual Information: Information-theoretic measure of shared information

Note: These are association strengths, not statistical significance tests.
For independence testing, use with caution - consider both effect size and sample size.


🔎 Step 1. General observations
    •   Cross helicity vs Alfvénicity absolutely dominates:
    •   Pearson = 0.84 overall (0.92 in PSP, 0.76 in Wind).
    •   Chatterjee = 0.47–0.53.
    •   MI = 0.58–0.97.
→ This is a very strong dependence by any metric, which is expected since Alfvénicity is conceptually related to cross helicity.
    •   Magnetic field normal vs Alfvénicity shows weak-to-moderate positive linear correlation (~0.17–0.19), but Chatterjee and MI stay low. Suggests a weak linear trend, not a strong monotonic or nonlinear relationship.
    •   Σ_r vs Q_sonnerup:
    •   Small negative Pearson (~ -0.1).
    •   But Chatterjee 0.09–0.15 and MI up to 0.24 in PSP → weak, possibly nonlinear dependence.
    •   This might indicate subtle structure missed by linear correlation.
    •   Duration, velocity ratio, thickness, helium abundance, current density:
    •   All show weak or negligible correlations (|Pearson| < 0.25, Chatterjee mostly < 0.06, MI close to 0).
    •   Suggests near independence, with only minor dataset-specific effects (e.g., PSP helium abundance at -0.24).

⸻

🔎 Step 2. Dataset comparison (PSP vs Wind)
    •   Cross helicity vs Alfvénicity is strong in both spacecraft, but even stronger in PSP (r ~ 0.92 vs 0.76).
    •   Magnetic field normal vs Alfvénicity is similar across PSP/Wind (r ~ 0.17–0.18).
    •   Σ_r vs Q_sonnerup is stronger in PSP (MI ~0.24 vs 0.05 in Wind).
    •   Helium abundance vs Alfvénicity:
    •   PSP: moderate negative Pearson (-0.24).
    •   Wind: near zero.
→ Suggests PSP-specific dependence (possibly tied to solar wind composition closer to the Sun).
    •   The rest (duration, velocity ratio, thickness, current density) are consistently weak in both datasets.

⸻

🔎 Step 3. Practical interpretation
    1.  Cross helicity is the defining feature of Alfvénicity.
The high correlations confirm that your Alfvénicity measure (Q_sonnerup) essentially tracks cross helicity. This is expected, but the strength of correlation (0.8–0.9) validates consistency across datasets.
    2.  Other plasma/structural properties are only weakly linked.
Magnetic field orientation (normal) shows a weak positive association, but not much more.
    3.  Σ_r (residual energy measure?) has a modest but interesting nonlinear association with Alfvénicity, especially in PSP. Could hint at a subtle dependence worth exploring with scatter plots or conditional analysis.
    4.  Helium abundance only matters in PSP.
The -0.24 Pearson suggests Alfvénicity tends to be lower in He-rich regions, but this trend does not appear in Wind. That could reflect heliocentric distance effects.

⸻

✅ Summary of main findings:
    •   Strong dependence: Cross helicity ↔ Alfvénicity (core relationship).
    •   Weak but consistent: Magnetic field normal, Σ_r (especially in PSP).
    •   Dataset-specific: Helium abundance (negative in PSP, absent in Wind).
    •   Effectively independent: Duration, velocity ratio, thickness, current density.

⸻

Would you like me to help you visualize these dependencies (scatter plots, density plots, MI heatmaps) to confirm the weak vs strong associations, or do you just need a written interpretation for now?

Thickness and current density

Density distributions of (normalized) thickness and current density

In [0]:

fname = "properties_hist"

update_theme!(;
    Legend=(; framevisible=false, padding=(0, 0, 0, 0)),
)

fs = map([:mva, :cross]) do 𝐧
    plot_properties_hist(df, 𝐧)
    # subset df by plasma beta
    # plot_properties_hist(@rsubset(df, :β < 1), 𝐧)
    # plot_properties_hist(@rsubset(df, :β > 3), 𝐧)
    easy_save("$fname-$𝐧"; force=true)
    # easy_save("$fname-$𝐧")
end
fs[1]

Duration (temporal thickness)

Now we analyze what is the distribution of the duration of the current sheet, and how does it vary with the different tau we use.

Plot duration distributions for different missions and time periods.

In [0]:

let tdf = dropmissing(df, :duration)
    psp_df = @chain tdf begin
        @rsubset(:id == "PSP"; view=true)
        @transform(:duration = :duration .* 4, :id = "PSP (4x)")
    end
    fdf = vcat(tdf, psp_df)
    axis = (; yscale=log10)
    layer = mapping(:duration) * AoG.density(; datalimits=x -> quantile(x, [0.01, 0.99])) * visual(Lines)
    plt = data(fdf) * mapping(color=:id) * layer
    plt = plt + mapping(2 .^ (0:5)) * visual(VLines, color=:gray, linestyle=:dash)
    fg = draw(plt; axis)

    easy_save("duration_dist"; force=true)
end

Plot duration distributions faceted by different taus:

In [0]:

let df = dropmissing(df, :duration)
    layer = AoG.density(; datalimits=x -> quantile(x, [0.02, 0.99])) * visual(Lines)
    plt = data(df) * mapping(:duration, linestyle=:enc, color=:id) * layer
    plt *= mapping(layout=:tau)
    draw(plt, axis=(; yscale=log10), facet=(; linkxaxes=:none, linkyaxes=:none))
    easy_save("duration_dist_tau")
end

Alfvenicity

Notes: V_l_ratio_max and ΔV_ratio are pretty much similar

In [0]:

update_theme!(;
    Legend=(; framevisible=false, padding=(0, 0, -16, 0)),
)
let
    𝒎 = var_mapping(; 𝐧)
    datalimits = x -> quantile(x, [0.01, 0.99])
    tdf = @chain df begin
        dropmissing(:V_l_ratio)
        @subset(.!isnan.(:V_l_ratio) .&& .!isnan.(:ΔV_cosθ); view=true)
        subset_𝐧(𝐧)
    end

    ΔV_cosθ = :ΔV_cosθ => identity => L"\cos\;θ"
    ΔV_ratio = :ΔV_ratio => identity => L"|Δ𝐕| / |Δ𝐕_A|"
    dB_over_B = :dB_over_B => log10 => L"\text{Log } ΔB / ⟨B\;⟩"
    dB_max_over_B = :dB_max_over_B => log10 => L"\text{Log } ΔB_{\text{max}} / ⟨B\;⟩"
    Q_sonnerup = :Q_sonnerup => abs => ""

    spec = AoG.data(tdf) * mapping(color=:id) *
           AoG.density(; datalimits) * visual(Lines)

    fig = Figure()
    ga = draw!(fig[1, 1], spec * mapping(ΔV_ratio))
    gb = draw!(fig[1, 2], spec * mapping(ΔV_cosθ))
    gc = draw!(fig[2, 1], spec * mapping(dB_over_B))
    gd = draw!(fig[2, 2], spec * mapping(dB_max_over_B))
    add_labels!(fig; valign=:top, position=Left(), padding=(0, 37, 0, 0))
    display(fig)
    pretty_legend!(fig, ga)
    easy_save("Alfvenicities"; force=true)
    # vars = [ΔV_cosθ, ΔV_ratio, dB_over_B, dB_max_over_B]
    # plt = spec * mapping(vars, layout=AoG.dims(1) => renamer(labels))
    # draw(plt, axis=(; yscale=log10), facet=(; linkyaxes=:minimal))
end

Joint distributions

Thickness and current density

In [0]:

function alpha_layer(df, alpha)
    psp_df = @rsubset(df, :id == "PSP"; view=true)
    thm_df = @rsubset(df, :id ∈ ("ARTEMIS", "THEMIS"); view=true)
    wind_df = @rsubset(df, :id == "Wind"; view=true)
    AoG.data(psp_df) + (AoG.data(thm_df) + AoG.data(wind_df) * visual(; alpha))
end

In [0]:

for 𝐧 in [:mva, :cross]
    view = true
    𝒎 = var_mapping(; 𝐧)

    l_sym = first(𝒎.l_norm)

    tdf = @chain df begin
        dropmissing(l_sym)
        subset_𝐧(𝐧)
    end

    plt = alpha_layer(tdf, 0.3) *
          visual(Scatter; markersize=6, legend=(; alpha=0.6)) *
          mapping(color=:id, marker=:id, layout=:id => layout_func)

    fig = Figure(; size=(400, 800))
    axis = (; yscale=log10, xscale=log10)

    spec1 = plt * mapping(𝒎.l, 𝒎.j)
    spec2 = plt * mapping(𝒎.l_norm, 𝒎.j_norm)
    B_l_spec = plt * mapping(𝒎.l_norm, 𝒎.Δ𝐁_norm) # Not strong correlation
    B_j_spec = plt * mapping(𝒎.j_norm, 𝒎.Δ𝐁_norm) # Not strong correlation

    d_lims = (10^(-2.5), 10^3.5)
    j_lims = (10^(-3.6), 10^2.4)
    B_lims = (10^(-3), 10^3)

    grid1 = draw!(fig[1, 1], spec1; axis)
    grid2 = draw!(fig[2, 1], spec2; axis=(; axis..., limits=(d_lims, j_lims)))
    grid3 = draw!(fig[3, 1], B_l_spec; axis=(; axis..., limits=(d_lims, nothing)))
    grid4 = draw!(fig[4, 1], B_j_spec; axis=(; axis..., limits=(j_lims, nothing)))

    hide_facet_labels!.((grid2, grid3, grid4))
    rowgap!(fig.layout, 4)

    add_labels!([fig[1, 1], fig[2, 1], fig[3, 1], fig[4, 1]]; position=Left(), padding=(0, 37, 0, 0))
    display(fig)
    easy_save("joint_properties-$𝐧"; force=true)
    # easy_save("joint_properties-$𝐧")
end

In-plane rotation angle, Bn

and

In [0]:

for 𝐧 in [:mva, :cross], subset in (true)
    normalization = :column
    𝒎 = var_mapping(; 𝐧)
    B_sym = Symbol(:B_n_, 𝐧)
    tdf = dropmissing(df, B_sym)
    # tdf = subset ? subset_𝐧(tdf, 𝐧) : tdf
    tdf = subset_𝐧(tdf, 𝐧)
    plt = AoG.data(tdf) * mapping(col=:id => layout_func)
    B_n_ω_spec = plt * mapping(𝒎.bn, 𝒎.ω_in) * fhist(; binedges=(0:0.03:0.99, 0:6:180), normalization)
    f = draw(B_n_ω_spec)
    colgap!(f.figure.layout, 10)

    den_base = AoG.data(tdf) * mapping(color=:id => layout_func) * AoG.density(; datalimits) * visual(Lines)
    ff = f.figure[0, 1:3]
    grid1 = draw!(ff[1, 1], den_base * mapping(𝒎.ω_in))
    grid2 = draw!(ff[1, 2], den_base * mapping(𝒎.bn); axis=(; ylabel="", yscale=log10))
    legend!(ff[1, 1], grid1; tellheight=false, tellwidth=false, halign=:right, valign=:top)
    rowgap!(f.figure.layout, 4)
    add_labels!([ff[1, 1], ff[1, 2], f.figure[1, 1:end]]; position=Left(), padding=(0, 40, 0, -5))
    easy_save("B_n_ω_-$𝐧-subset=$subset"; force=true)
    # easy_save("B_n_ω_-$𝐧-subset=$subset")
end

and

In [0]:

for 𝐧 in [:mva], subset in (true, false)
    normalization = :column
    𝒎 = var_mapping(; 𝐧)
    tdf = dropmissing(df, :B_n_mva_norm)
    tdf = subset ? subset_𝐧(tdf, 𝐧) : tdf
    plt = AoG.data(tdf) * mapping(col=:id => layout_func)
    duration_spec = plt * mapping(:duration, 𝒎.ω_in)
    f = draw(duration_spec)
    display(f)
end

β vs A_He

There are two populations for beta and alpha particles abundance, corresponding to different sources of solubility. Correlation exist.

In [0]:

for 𝐧 in [:mva]
    view = true
    𝒎 = var_mapping(; 𝐧)

    tdf = @chain df begin
        @rsubset(all(!isnan, (:B_n_mva_norm, :Q_sonnerup, :σ_c, :ΔV_ratio, :ΔV_cosθ)); view=true)
        dropmissing([:B_n_mva_norm, :duration, :σ_c])
        @transform!(:duration = Float64.(:duration))
        subset_𝐧(𝐧)
    end

    β_A_He_df = @chain df begin
        @rsubset(all(!isnan, (:β, :A_He)); view=true)
        dropmissing([:β, :A_He])
        @transform!(:log_β = log10.(:β))
        @rsubset(:A_He < 20, :log_β > -1.5)
    end


    layout = mapping(row=:id, col=:enc)

    plt = data(tdf) * layout
    vis = visual(Scatter; markersize=6)
    fig = Figure(; size=(1200, 800))
    axis = (;)


    ΔV_cosθ_group = :ΔV_cosθ => >(0)
    ΔV_cosθ_map = mapping(group=ΔV_cosθ_group, color=ΔV_cosθ_group)

    ΔV_spec = plt * mapping(:ΔV_ratio, :ΔV_cosθ) * (vis + AoG.linear() * ΔV_cosθ_map)
    σc_σr_spec = plt * mapping(:σ_c, :σ_r) * (vis + AoG.smooth() * visual(color=:red))

    A_He_group = mapping(group=:A_He => >(1.5), color=:A_He => >(1.5))

    β_A_He_spec = data(β_A_He_df) * layout * mapping(:log_β, :A_He) * (vis + AoG.smooth(; degree=1) * A_He_group)

    draw!(fig[1, 2], ΔV_spec; axis=(; limits=((0, 2), (-1, 1),)))
    draw!(fig[2, 1], σc_σr_spec)
    draw!(fig[2, 2], β_A_He_spec)

    # pretty_legend!(fig, grid1)
    # add_labels!([ga, gb, gc])
    display(fig)
end

Q_sonnerup and other variables

In [0]:

𝒎 = var_mapping(; 𝐧)

fig = Figure(; size=(500, 800))
let vars = [:Q_sonnerup, :duration, :σ_c]
    tdf = @chain df begin
        dropmissing(vars)
        @rsubset(all(!isnan, (:ΔV_cosθ, :Q_sonnerup, :σ_c)); view=true)
    end
    plt = AoG.data(tdf) * mapping(col=:id => layout_func)

    Q_limit = (-0.992, 0.992) # mainly for plotting so not show (-1, 1) in ticks
    ΔV_spec = plt * mapping(𝒎.ΔV_ratio, 𝒎.ΔV_cosθ) * AoG.density(; datalimits=((0, 1.1), (-1, 1)))
    # duration_Q_spec = plt * mapping(:duration, 𝒎.Q) * AoG.density(; datalimits=((0, 3.1), Q_limit))
    duration_Q_spec = plt * mapping(:duration => log10 => "Log Duration (s)", 𝒎.Q) * AoG.density(; datalimits=((-0.9, 1.1), Q_limit))
    # duration_Q_spec = plt * mapping(:duration => log10, 𝒎.Q) * fhist(; binedges=(-0.9:0.1:1.1, -0.99:0.1:0.99), normalization=:column)
    σc_Q_spec = plt * mapping(:σ_c => L"σ_c", 𝒎.Q) * AoG.density()
    σr_Q_spec = plt * mapping(:σ_r => L"σ_r", 𝒎.Q) * AoG.density(; datalimits=(Q_limit, Q_limit))

    ff11, ff12, ff21, ff22 = fig[1, 1], fig[2, 1], fig[3, 1], fig[4, 1]
    grid1 = cdraw!(ff11, ΔV_spec)
    grid3 = cdraw!(ff12, σc_Q_spec)
    grid4 = cdraw!(ff21, σr_Q_spec)
    grid2 = cdraw!(ff22, duration_Q_spec)

    for f in (ff11, ff12, ff21, ff22)
        colgap!(Makie.content(f[1, 1]), 6)
    end

    rowgap!(fig.layout, 4)
    for g in (grid3, grid4)
        ax = g[1].axis
        ax.xlabelpadding[] = 1
        ax.xlabelsize[] = 16
    end
    add_labels!([fig[1, 1], fig[2, 1], fig[3, 1], fig[4, 1]]; position=Left(), padding=(0, 0, 0, -5))
    hide_facet_labels!.((grid2, grid3, grid4))
    easy_save("Q_sonnerup_joint_dist_den"; force=true)
end

Q_sonnerup vs B_n

Two populations for Q_sonnerup vs B_n: large B_n has larger Q_sonnerup. No strong correlation for small B_n groups.

In [0]:

let 𝐧 = :mva
    𝒎 = var_mapping(; 𝐧)
    Bn_df = @chain df begin
        @rsubset(all(isfinite, (:B_n_mva_norm, :Q_sonnerup)); view=true)
        subset_𝐧(𝐧)
    end
    Bn_Q_spec = data(Bn_df) * mapping(𝒎.bn, :Q_sonnerup => L"Q", col=:id => layout_func) * AoG.density()
    draw(Bn_Q_spec)
end

Q_sonnerup vs A_He

We observe very weak correlation for PSP when \(A_{He}<1\), and no correlation for Wind.

In [0]:

let
    𝒎 = var_mapping()
    A_He_df = @rsubset(df, :A_He > 0, isfinite(:Q_sonnerup))
    A_He_Q_spec = data(A_He_df) * mapping(:A_He, :Q_sonnerup, col=:id => layout_func) * AoG.density(; datalimits=((0, 5), (-1, 1)))
    draw(A_He_Q_spec)
    easy_save("A_He_Q_sonnerup")
end

Q_sonnerup vs β

No obvious correlation for PSP and Wind .

In [0]:

let
    β_df = @rsubset(df, all(isfinite, (:β, :Q_sonnerup)); view=true)
    layout = mapping(row=:id)
    β_Q_spec = data(β_df) * mapping(:β => log10, :Q_sonnerup) * layout * AoG.density(; datalimits=((-1, 1), (-1, 1)))
    draw(β_Q_spec)
    easy_save("β_Q_sonnerup")
end

Duration vs Alfvénicity Correlation

Let’s analyze the relationship between duration and Alfvénicity to see if they are correlated.

In [0]:

using HypothesisTests
using NaNStatistics
using PairPlots

# Filter for clean data with both duration and Alfvénicity measures
vars = [:duration, :Q_sonnerup, :V_l_ratio, :B_n_mva_norm]

df_corr = @chain df begin
    filter_low_mva_quality()
    dropmissing(vars)
    @transform!(:duration = Float64.(ustrip.(:duration .|> u"s")))
    @rsubset(all(!isnan, (:Q_sonnerup, :V_l_ratio, :duration, :σ_c)))
end

# Overall correlation matrix
println("Overall correlation matrix:")
cor_vars = [:Q_sonnerup, :duration, :B_n_mva_norm, :σ_c, :σ_r, :L_n_mva_norm, :J_m_max_mva_norm]
_cor_func = df -> nancor(Matrix(df[!, cor_vars]))
overall_corr = _cor_func(df_corr)
display(overall_corr)
# # Statistical significance tests
# test_q = CorrelationTest(df_corr.duration_s, df_corr.Q_sonnerup)
# test_vl = CorrelationTest(df_corr.duration_s, collect(skipmissing(df_corr.V_l_ratio)))

# println("Statistical significance:")
# println("Duration vs Q_sonnerup: p-value = $(round(pvalue(test_q), digits=6))")
# println("Duration vs V_l_ratio: p-value = $(round(pvalue(test_vl), digits=6))")

Correlation Strength: - Duration vs Q_sonnerup: r = -0.0253 (very weak negative correlation) - Duration vs V_l_ratio: r = -0.0134 (very weak negative correlation)

Statistical Significance: - Duration vs Q_sonnerup: p = 0.009 (statistically significant, p < 0.05) - Duration vs V_l_ratio: p = 0.166 (not statistically significant, p > 0.05)

Interpretation: 1. There is a very weak negative correlation between duration and Alfvénicity - meaning longer events tend to be slightly less Alfvénic 2. The effect size is tiny (correlations ~-0.02 to -0.03) - practically negligible

Conclusion: While there’s a statistically significant but extremely weak tendency for longer-duration current sheets to be slightly less Alfvénic, the correlation is so small it’s essentially negligible in practical terms. Duration and Alfvénicity are largely independent of each other in this dataset.

Unused Code

In [0]:

# Visualization: Scatter plot with trend line
f_corr = Figure(size=(1400, 600))

attrs = (alpha=0.6, markersize=6)

# Duration vs Q_sonnerup
ax1 = Axis(f_corr[1, 1],
    xscale=log10,
    xlabel="Duration (seconds)",
    ylabel="Q_sonnerup (Alfvénicity)",
    title="Duration vs Alfvénicity")

scatter!(ax1, df_corr.duration_s, abs.(df_corr.Q_sonnerup); color=:blue, attrs...)

# Add trend line (remove any remaining NaNs)
valid_idx = .!isnan.(df_corr.duration_s) .& .!isnan.(df_corr.Q_sonnerup)
if sum(valid_idx) > 0
    x_valid = df_corr.duration_s[valid_idx]
    y_valid = abs.(df_corr.Q_sonnerup[valid_idx])
    x_range = range(minimum(x_valid), maximum(x_valid), length=100)
    X = hcat(ones(length(x_valid)), x_valid)
    β = X \ y_valid
    trend_y = β[1] .+ β[2] .* x_range
    lines!(ax1, x_range, trend_y, color=:red, linewidth=2, linestyle=:dash)
end

# text!(ax1, 0.05, 0.95, "r = $(round(corr_duration_q, digits=4))", space=:relative, fontsize=12, color=:black)

# Duration vs V_l_ratio  
df_vl = dropmissing(df_corr, :V_l_ratio)
ax2 = Axis(f_corr[1, 2],
    xscale=log10,
    xlabel="Duration (seconds)",
    ylabel="V_l_ratio",
    title="Duration vs Velocity Ratio")

scatter!(ax2, df_vl.duration_s, df_vl.V_l_ratio,
    color=:green, alpha=0.6, markersize=6)

# Add trend line for V_l_ratio (remove any remaining NaNs)
valid_idx2 = .!isnan.(df_vl.duration_s) .& .!isnan.(df_vl.V_l_ratio)
if sum(valid_idx2) > 0
    x_valid2 = df_vl.duration_s[valid_idx2]
    y_valid2 = df_vl.V_l_ratio[valid_idx2]
    x_range2 = range(minimum(x_valid2), maximum(x_valid2), length=100)
    X2 = hcat(ones(length(x_valid2)), x_valid2)
    β2 = X2 \ y_valid2
    trend_y2 = β2[1] .+ β2[2] .* x_range2
    lines!(ax2, x_range2, trend_y2, color=:red, linewidth=2, linestyle=:dash)
end
ylims!(ax2, 0, 1)
# text!(ax2, 0.05, 0.95, "r = $(round(corr_duration_vl, digits=4))", space=:relative, fontsize=12, color=:black)

f_corr

In [0]:

using CategoricalArrays
using NaNStatistics
# Binned analysis
duration_bins = [0, 2, 5, 10, 20, 50, maximum(df_corr.duration_s)]
df_binned = @transform(df_corr,
    :duration_bin = cut(:duration_s, duration_bins, labels=["0-2s", "2-5s", "5-10s", "10-20s", "20-50s", ">50s"]))

binned_stats = @chain df_binned begin
    dropmissing([:Q_sonnerup, :V_l_ratio])
    groupby(:duration_bin)
    @combine(
        :count = length(:duration_s),
        :mean_duration = nanmean(:duration_s),
        :mean_Q_sonnerup = nanmean(abs.(:Q_sonnerup)),
        :std_Q_sonnerup = nanstd(abs.(:Q_sonnerup)),
        :mean_V_l_ratio = nanmean(:V_l_ratio),
        :std_V_l_ratio = nanstd(:V_l_ratio)
    )
end

println("Mean Alfvénicity by Duration Bins:")
select(binned_stats, :duration_bin, :count, :mean_Q_sonnerup, :mean_V_l_ratio)

In [0]:

# Add binned averages to the scatter plots
for (i, row) in enumerate(eachrow(binned_stats))
    if i < nrow(binned_stats)
        # Get bin boundaries
        bin_start = duration_bins[i]
        bin_end = duration_bins[i+1]
        bin_center = (bin_start + bin_end) / 2

        # Add mean points with error bars to Q_sonnerup plot
        scatter!(ax1, [bin_center], [row.mean_Q_sonnerup],
            color=:red, markersize=12, marker=:diamond)
        errorbars!(ax1, [bin_center], [row.mean_Q_sonnerup], [row.std_Q_sonnerup],
            color=:red, whiskerwidth=8, linewidth=2)

        # Add mean points with error bars to V_l_ratio plot  
        scatter!(ax2, [bin_center], [row.mean_V_l_ratio],
            color=:red, markersize=12, marker=:diamond)
        errorbars!(ax2, [bin_center], [row.mean_V_l_ratio], [row.std_V_l_ratio],
            color=:red, whiskerwidth=8, linewidth=2)
    else
        # Handle the last bin (>50s) - use a representative position
        bin_center = 60  # Approximate position for >50s bin

        scatter!(ax1, [bin_center], [row.mean_Q_sonnerup],
            color=:red, markersize=12, marker=:diamond)
        errorbars!(ax1, [bin_center], [row.mean_Q_sonnerup], [row.std_Q_sonnerup],
            color=:red, whiskerwidth=8, linewidth=2)

        scatter!(ax2, [bin_center], [row.mean_V_l_ratio],
            color=:red, markersize=12, marker=:diamond)
        errorbars!(ax2, [bin_center], [row.mean_V_l_ratio], [row.std_V_l_ratio],
            color=:red, whiskerwidth=8, linewidth=2)
    end
end

# Add legend explaining the red diamonds
Legend(f_corr[2, :], [MarkerElement(color=:red, marker=:diamond, markersize=12)],
    ["Binned Averages ± Std"], orientation=:horizontal, tellwidth=false, tellheight=true)

f_corr

Current sheet statistics

Current sheet statistics

Associations and independence test

Thickness and current density

Duration (temporal thickness)

Alfvenicity

Joint distributions

Thickness and current density

In-plane rotation angle, Bn

β vs A_He

Q_sonnerup and other variables

Q_sonnerup vs B_n

Q_sonnerup vs A_He

Q_sonnerup vs β

Duration vs Alfvénicity Correlation

Unused Code