Waiting time analysis

Archived notebook using Python and R

Notes

  • Occurence rate is related to the waiting time distribution.
  • Group the data by the radial distance for WT analysis is not good as the radial distance is not a monotonic function of time.

It is hard to remove duplicate events when combining multiple datasets with different τ. So when using the combined dataset, the occurence rate would be overestimated.

An analysiswas carried out to determine the probability distribution governing the time interval between successive discontinuities. The times at which the discontinuities occurred,obtainedfromthe identificationprogram, were used to computethe time difference, \(τ = T_j - T_{j-1}\), the so-called interarrival interval. The number of cases were then tabulated corresponding to discreteranges of τ, and a histogramwas prepared.By properly normalizing the numberof casesin each range a probability distribution function giving the relative frequency of occurrencewas obtained. A similar study was previously carried out by Burlaga[1969].

References

\[ f(x; \alpha, \theta) = \frac{\alpha}{\theta} \left( \frac{x}{\theta} \right)^{\alpha-1} e^{-(x/\theta)^\alpha}, \quad x \ge 0 \]

Code
using DrWatson
@quickactivate
include(srcdir("main.jl"))

using CairoMakie
Code
# j_events_taus = load_taus(60:-10:20);
wind_df = load_wind();
# jno_df = load(datadir("updated_events_JNO_method=fit_tau=0:01:00_ts=0:00:01.arrow"));
jno_df = load_jno();
┌ Warning: automatically converting Arrow.Timestamp with precision = MICROSECOND to `DateTime` which only supports millisecond precision; conversion may be lossy; to avoid converting, pass `Arrow.Table(source; convert=false)
└ @ Arrow /Users/zijin/.julia/packages/Arrow/5pHqZ/src/eltypes.jl:273
┌ Warning: automatically converting Arrow.Timestamp with precision = NANOSECOND to `DateTime` which only supports millisecond precision; conversion may be lossy; to avoid converting, pass `Arrow.Table(source; convert=false)
└ @ Arrow /Users/zijin/.julia/packages/Arrow/5pHqZ/src/eltypes.jl:273
Code
subset_jno(df, r) = subset(df, :radial_distance => x -> round_c.(x) .== r)
subset_time(df, t) = subset(df, :time => x -> x .< t)
subset_time (generic function with 1 method)
Code
bin(x, bin_size) = @. round(x / bin_size) * bin_size
bin(bin_size) = x -> bin(x, bin_size)
bin (generic function with 2 methods)
Code
df = wind_df;
df_s1 = subset(df, :time => t -> year.(t) .== 2013);
df_s2 = subset(df_s1, :time => t -> month.(t) .< 2);

jno_df_s1 = subset(jno_df, :time => t -> year.(t) .== 2011);
jno_df_s2 = subset(jno_df, :time => t -> year.(t) .== 2016);
Code
using Distributions

get_fit_params(x; dist=Weibull, func=mean) = func(fit(dist, x))
get_wt_fit_params(time; kwargs...) = get_fit_params(waiting_time(time); kwargs...)
get_wt_fit_params (generic function with 1 method)
Code
jno_ρ_df = @chain jno_df begin
    @transform!(
        :time_bin = (:time - minimum(:time)) ./ time_interval .|> round,
        :l = :L_k .* (abs  cosd).(:θ_vn) , 
        :l_n = :L_k .* (abs  cos  atan).(:Vn_y ./ :Vn_x )
    )
end;
Code
time_interval = Day(160)

jno_ρ_df = @chain jno_df begin
    @transform(
        :time_bin = (:time - minimum(:time)) ./ time_interval .|> round,
        :l = :L_k .* (abs  cosd).(:θ_vn) , 
        :l_n = :L_k .* (abs  cos  atan).(:Vn_y ./ :Vn_x )
    )
    @transform(
        :l_ratio = :l ./ :r ,
        :l_n_ratio = :l_n ./ :r
    )
    groupby(:time_bin)
    combine(
        nrow,
        :time => get_wt_fit_params => :τ,
        [:r :l :l_ratio :l_n_ratio] .=> mean
    )
    # subset(:nrow => x -> x .> 2000)
    @transform(
        :ρ = 1 ./ :τ,
    )
    @transform(
        :ρ_l = :ρ ./ :l_ratio_mean * maximum(:l_ratio_mean),
    )
    sort(:r_mean)
end
12×9 DataFrame
Row time_bin nrow τ r_mean l_mean l_ratio_mean l_n_ratio_mean ρ ρ_l
Float64 Int64 Float64 Float64 Float64 Float64 Float64 Float64 Float64
1 0.0 4590 25.0674 1.13503 975.866 858.723 1017.08 0.0398924 0.0398924
2 5.0 4994 28.4264 1.35794 969.428 730.797 894.068 0.0351785 0.0413366
3 4.0 3457 30.4534 1.61512 1170.85 724.87 862.256 0.032837 0.0389007
4 1.0 8532 26.5818 1.73547 1181.37 686.237 811.427 0.0376198 0.0470756
5 3.0 5424 38.6809 2.05865 1292.96 625.945 726.419 0.0258525 0.0354666
6 2.0 3775 43.9766 2.17965 1422.86 652.024 769.166 0.0227394 0.029948
7 6.0 5152 40.4849 2.39058 1424.45 606.314 712.156 0.0247006 0.0349834
8 7.0 2767 72.0427 3.61141 1483.87 410.034 476.189 0.0138807 0.0290699
9 8.0 2187 97.5659 4.37364 1545.91 354.351 423.781 0.0102495 0.0248383
10 9.0 1382 151.467 4.92611 2035.49 412.162 471.483 0.0066021 0.0137552
11 10.0 1394 148.241 5.26001 1939.71 369.062 416.874 0.00674575 0.0156958
12 11.0 1043 116.979 5.39234 2225.85 412.717 461.778 0.00854852 0.0177865
Code
wind_ρ_df = @chain wind_df begin
    @transform!(
        :time_bin = (:time - minimum(:time)) ./ time_interval .|> round,
    )
    groupby(:time_bin)
    combine(
        nrow,
        :time => get_wt_fit_params => :τ,
    )
    @transform!(
        :ρ = 1 ./ :τ,
    )
end
12×4 DataFrame
Row time_bin nrow τ ρ
Float64 Int64 Float64 Float64
1 0.0 4129 27.9398 0.0357912
2 1.0 8625 26.6836 0.0374762
3 2.0 8002 28.8769 0.0346297
4 3.0 9325 24.7558 0.0403946
5 4.0 8248 27.9737 0.0357479
6 5.0 8721 26.4539 0.0378016
7 6.0 8573 26.9517 0.0371034
8 7.0 6834 28.9811 0.0345052
9 8.0 8451 27.2753 0.0366633
10 9.0 8226 28.0696 0.0356257
11 10.0 8248 27.9723 0.0357496
12 11.0 4783 27.2311 0.0367227
Code
df = leftjoin(jno_ρ_df, wind_ρ_df, on = :time_bin, makeunique=true)
df.ρ_ratio = @. df.ρ / df.ρ_1

@transform!(df, 
    :ratio = :ρ_ratio ./ :l_ratio_mean,
    :ratio_n = :ρ_ratio ./ :l_n_ratio_mean,
)
@transform!(
    df, 
    :ratio = :ratio ./ mean(:ratio),
    :ratio_n = :ratio_n ./ mean(:ratio_n)
)

df
12×15 DataFrame
Row time_bin nrow τ r_mean l_mean l_ratio_mean l_n_ratio_mean ρ ρ_l nrow_1 τ_1 ρ_1 ρ_ratio ratio ratio_n
Float64 Int64 Float64 Float64 Float64 Float64 Float64 Float64 Float64 Int64? Float64? Float64? Float64 Float64 Float64
1 0.0 4590 25.0674 1.13503 975.866 858.723 1017.08 0.0398924 0.0398924 4129 27.9398 0.0357912 1.11459 1.32853 1.31993
2 1.0 8532 26.5818 1.73547 1181.37 686.237 811.427 0.0376198 0.0470756 8625 26.6836 0.0374762 1.00383 1.49726 1.49006
3 2.0 3775 43.9766 2.17965 1422.86 652.024 769.166 0.0227394 0.029948 8002 28.8769 0.0346297 0.656644 1.0308 1.02826
4 3.0 5424 38.6809 2.05865 1292.96 625.945 726.419 0.0258525 0.0354666 9325 24.7558 0.0403946 0.64 1.04653 1.06117
5 4.0 3457 30.4534 1.61512 1170.85 724.87 862.256 0.032837 0.0389007 8248 27.9737 0.0357479 0.918573 1.29707 1.28313
6 5.0 4994 28.4264 1.35794 969.428 730.797 894.068 0.0351785 0.0413366 8721 26.4539 0.0378016 0.93061 1.30341 1.25369
7 6.0 5152 40.4849 2.39058 1424.45 606.314 712.156 0.0247006 0.0349834 8573 26.9517 0.0371034 0.665723 1.12384 1.12593
8 7.0 2767 72.0427 3.61141 1483.87 410.034 476.189 0.0138807 0.0290699 6834 28.9811 0.0345052 0.402277 1.00419 1.01751
9 8.0 2187 97.5659 4.37364 1545.91 354.351 423.781 0.0102495 0.0248383 8451 27.2753 0.0366633 0.279557 0.807508 0.794551
10 9.0 1382 151.467 4.92611 2035.49 412.162 471.483 0.0066021 0.0137552 8226 28.0696 0.0356257 0.185318 0.460214 0.473418
11 10.0 1394 148.241 5.26001 1939.71 369.062 416.874 0.00674575 0.0156958 8248 27.9723 0.0357496 0.188694 0.523323 0.545189
12 11.0 1043 116.979 5.39234 2225.85 412.717 461.778 0.00854852 0.0177865 4783 27.2311 0.0367227 0.232786 0.577317 0.607178
Code
x = :time_bin => "Time"
legends = ["Juno", "Wind"]
specs = data(df) * mapping(x, [:ρ_l, :ρ_1] .=> "Normalized Occurence rates") * mapping(color=dims(1) => renamer(legends)) 
draw(specs)
easy_save("ocr/ocr_normalized_by_r")
┌ Info: Saved /Users/zijin/projects/ids_spatial_evolution_juno/figures/ocr/ocr_normalized_by_r.svg
└ @ Beforerr /Users/zijin/.julia/dev/Beforerr/src/utils/makie.jl:48
Code
x = :r_mean => r_lab
specs = data(df) * mapping(x, :ρ)
draw(specs)
Code
y = [:ratio_n, :ratio]
specs = data(df) * mapping(x) * mapping(y, color=dims(1) => renamer(string.(y))) 
plt2 = draw(specs)
Code
function plot_wt_pdf_lim(df; xlims = (1, 1000), ylims = (1e-5, 8e-2), kwargs...)
    f = plot_wt_pdf(df; kwargs...)
    xlims!(xlims...)
    ylims!(ylims...)
    return f
end

using Printf
Base.show(io::IO, f::Float64) = @printf(io, "%.2f", f)
Code
dfs = [ jno_df, jno_df_s1, jno_df_s2 ]
fgs_e = plot_wt_pdf_lim.(dfs, dist=Exponential)
fgs_w = plot_wt_pdf_lim.(dfs, dist=Weibull)
display.(fgs_e);
display.(fgs_w);

Compare the exponential and Weibull fits for the waiting time distributions of Wind dataset

Code
f1 = plot_wt_pdf_lim(wind_df, ylims=(1e-6, 1e-1),)
f2 = plot_wt_pdf_lim(wind_df, dist=Weibull, ylims=(1e-6, 1e-1))
display.([f1, f2]);
Code
df1 = subset_time(jno_df, Date(2011, 9, 30))
df2 = subset_time(wind_df, Date(2011, 9, 30))
dfs = [ df1, df2 ]
fgs = dfs .|> plot_wt_pdf_lim 
display.(fgs);