Time series correlation

Code
import pyspedas
from pytplot import get_data
from pyspedas.analysis.tvectot import tvectot

import xarray as xr
import polars as pl
import polars.selectors as cs

import hvplot.polars  # noqa

import scipy.stats
import dcor

import astropy.units as u
Code
from datetimerange import DateTimeRange
from sunpy.time import TimeRange
from numpy import timedelta64
from datetime import timedelta
Code
start = "2019-04-06T12:00"
end = "2019-04-07T12:00"

earth_start = "2019-04-09"
earth_end = "2019-04-14"
Code
psp_timerange = TimeRange(start, end)

timerange_earth = TimeRange(earth_start, earth_end)
Code
def validate(timerange):
    if isinstance(timerange, DateTimeRange):
        return [timerange.get_start_time_str(), timerange.get_end_time_str()]
    if isinstance(timerange, TimeRange):
        return [timerange.start.to_string(), timerange.end.to_string()]

Estimate of arrival time

Code
psp_spi_vars = pyspedas.psp.spi(trange=validate(psp_timerange), time_clip=True)

tvar = tvectot("psp_spi_VEL_RTN_SUN")
psp_ion_speed: xr.DataArray = get_data(tvar, xarray=True)

r_psp = get_data("psp_spi_SUN_DIST", xarray=True)
# r_psp = pl.DataFrame(r_psp.to_dataframe().reset_index())
27-Mar-24 07:53:23: Downloading remote index: https://spdf.gsfc.nasa.gov/pub/data/psp/sweap/spi/l3/spi_sf00_l3_mom/2021/
Using LEVEL=L3
27-Mar-24 07:53:24: Downloading https://spdf.gsfc.nasa.gov/pub/data/psp/sweap/spi/l3/spi_sf00_l3_mom/2021/psp_swp_spi_sf00_l3_mom_20210117_v04.cdf to /Users/zijin/data/psp/sweap/spi/l3/spi_sf00_l3_mom/2021/psp_swp_spi_sf00_l3_mom_20210117_v04.cdf
27-Mar-24 07:53:27: Download complete: /Users/zijin/data/psp/sweap/spi/l3/spi_sf00_l3_mom/2021/psp_swp_spi_sf00_l3_mom_20210117_v04.cdf
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[10], line 1
----> 1 psp_spi_vars = pyspedas.psp.spi(trange=validate(psp_timerange), time_clip=True)
      3 tvar = tvectot("psp_spi_VEL_RTN_SUN")
      4 psp_ion_speed: xr.DataArray = get_data(tvar, xarray=True)

File ~/micromamba/envs/psp_conjunction/lib/python3.11/site-packages/pyspedas/psp/__init__.py:436, in spi(trange, datatype, level, suffix, get_support_data, varformat, varnames, downloadonly, notplot, no_update, time_clip, username, password, last_version)
    433     level = 'l3'
    434     print("Using LEVEL=L3")
--> 436 return load(instrument='spi', trange=trange, datatype=datatype, level=level, 
    437     suffix=suffix, get_support_data=get_support_data, varformat=varformat, varnames=varnames, 
    438     downloadonly=downloadonly, notplot=notplot, time_clip=time_clip, no_update=no_update, 
    439     username=username, password=password,last_version=last_version
    440     )

File ~/micromamba/envs/psp_conjunction/lib/python3.11/site-packages/pyspedas/psp/load.py:296, in load(trange, instrument, datatype, spec_types, level, suffix, get_support_data, varformat, varnames, downloadonly, notplot, no_update, time_clip, username, password, last_version)
    293     # we'll need the support data for the quality flags
    294     get_support_data = True
--> 296 tvars = cdf_to_tplot(out_files, suffix=suffix, prefix=prefix, get_support_data=get_support_data, 
    297                     varformat=varformat, varnames=varnames, notplot=notplot)
    299 if notplot:
    300     return tvars

File ~/micromamba/envs/psp_conjunction/lib/python3.11/site-packages/pytplot/importers/cdf_to_tplot.py:161, in cdf_to_tplot(filenames, mastercdf, varformat, exclude_format, get_support_data, get_metadata, get_ignore_data, string_encoding, prefix, suffix, plot, merge, center_measurement, notplot, varnames)
    159 cdf_file.string_encoding = string_encoding
    160 cdf_info = cdf_file.cdf_info()
--> 161 all_cdf_variables = cdf_info['rVariables'] + cdf_info['zVariables']
    162 logging.debug("all_cdf_variables: " + str(all_cdf_variables))
    163 if not mastercdf_flag:
    164     # If not using a master CDF, each CDF is its own master

TypeError: 'CDFInfo' object is not subscriptable
Code
df_info = pl.DataFrame(
    r_psp.to_dataframe().join(psp_ion_speed.to_dataframe()).reset_index()
)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[11], line 1
----> 1 df_info = pl.DataFrame(r_psp.to_dataframe().join(psp_ion_speed.to_dataframe()).reset_index())

NameError: name 'r_psp' is not defined
Code
km2au = u.km.to(u.AU)
r_earth = 1 * u.AU.to(u.km)
v_sw_slow = 250 * u.km / u.s
v_sw_fast = 500 * u.km / u.s

df_info.sort("time").group_by_dynamic("time", every="1h").agg(
    cs.float().mean()
).with_columns(
    distance2sun=pl.col("psp_spi_SUN_DIST"),
    distance2earth=r_earth - pl.col("psp_spi_SUN_DIST"),
).with_columns(
    dt2arrival=pl.duration(
        seconds=pl.col("distance2earth") / pl.col("psp_spi_VEL_RTN_SUN_tot")
    ),
    dt2arrival_slow=pl.duration(seconds=pl.col("distance2earth") / v_sw_slow),
    dt2arrival_fast=pl.duration(seconds=pl.col("distance2earth") / v_sw_fast),
).with_columns(
    time2arrival=pl.col("time") + pl.col("dt2arrival"),
    time2arrival_slow=pl.col("time") + pl.col("dt2arrival_slow"),
    time2arrival_fast=pl.col("time") + pl.col("dt2arrival_fast"),
).with_columns(
    distance2sun=pl.col("distance2sun") * km2au,
    distance2earth=pl.col("distance2earth") * km2au,
)
shape: (24, 11)
time psp_spi_SUN_DIST psp_spi_VEL_RTN_SUN_tot distance2sun distance2earth dt2arrival dt2arrival_slow dt2arrival_fast time2arrival time2arrival_slow time2arrival_fast
datetime[ns] f64 f32 f64 f64 duration[μs] duration[μs] duration[μs] datetime[μs] datetime[μs] datetime[μs]
2019-04-06 12:00:00 2.6162e7 289.611786 0.174885 0.825115 4d 22h 23m 30s 5d 17h 9m 1s 2d 20h 34m 30s 2019-04-11 10:23:30 2019-04-12 05:09:01 2019-04-09 08:34:30
2019-04-06 13:00:00 2.6231e7 273.515167 0.175347 0.824653 5d 5h 17m 20s 5d 17h 4m 25s 2d 20h 32m 12s 2019-04-11 18:17:20 2019-04-12 06:04:25 2019-04-09 09:32:12
2019-04-06 14:00:00 2.6302e7 255.006729 0.175819 0.824181 5d 14h 18m 20s 5d 16h 59m 43s 2d 20h 29m 51s 2019-04-12 04:18:20 2019-04-12 06:59:43 2019-04-09 10:29:51
2019-04-06 15:00:00 2.6374e7 243.91748 0.1763 0.8237 5d 20h 19m 46s 5d 16h 54m 55s 2d 20h 27m 27s 2019-04-12 11:19:46 2019-04-12 07:54:55 2019-04-09 11:27:27
2019-04-06 16:00:00 2.6448e7 244.805847 0.176796 0.823204 5d 19h 44m 9s 5d 16h 49m 58s 2d 20h 24m 59s 2019-04-12 11:44:09 2019-04-12 08:49:58 2019-04-09 12:24:59
2019-04-06 17:00:00 2.6523e7 232.988617 0.177294 0.822706 6d 2h 44m 4s 5d 16h 45m 2d 20h 22m 30s 2019-04-12 19:44:04 2019-04-12 09:45:00 2019-04-09 13:22:30
2019-04-06 18:00:00 2.6599e7 224.188614 0.177805 0.822195 6d 8h 23m 58s 5d 16h 39m 54s 2d 20h 19m 57s 2019-04-13 02:23:58 2019-04-12 10:39:54 2019-04-09 14:19:57
2019-04-06 19:00:00 2.6677e7 222.26709 0.178326 0.821674 6d 9h 37m 11s 5d 16h 34m 42s 2d 20h 17m 21s 2019-04-13 04:37:11 2019-04-12 11:34:42 2019-04-09 15:17:21
2019-04-06 20:00:00 2.6757e7 234.130203 0.178857 0.821143 6d 1h 44m 30s 5d 16h 29m 25s 2d 20h 14m 42s 2019-04-12 21:44:30 2019-04-12 12:29:25 2019-04-09 16:14:42
2019-04-06 21:00:00 2.6837e7 237.631592 0.179396 0.820604 5d 23h 30m 5d 16h 24m 2s 2d 20h 12m 1s 2019-04-12 20:30:00 2019-04-12 13:24:02 2019-04-09 17:12:01
2019-04-06 22:00:00 2.6919e7 307.80896 0.179945 0.820055 4d 14h 42m 34s 5d 16h 18m 34s 2d 20h 9m 17s 2019-04-11 12:42:34 2019-04-12 14:18:34 2019-04-09 18:09:17
2019-04-06 23:00:00 2.7003e7 344.912201 0.180502 0.819498 4d 2h 43m 58s 5d 16h 13m 2d 20h 6m 30s 2019-04-11 01:43:58 2019-04-12 15:13:00 2019-04-09 19:06:30
2019-04-07 00:00:00 2.7087e7 355.850494 0.181069 0.818931 3d 23h 37m 54s 5d 16h 7m 21s 2d 20h 3m 40s 2019-04-10 23:37:54 2019-04-12 16:07:21 2019-04-09 20:03:40
2019-04-07 01:00:00 2.7174e7 392.575317 0.181644 0.818356 3d 14h 37m 29s 5d 16h 1m 37s 2d 20h 48s 2019-04-10 15:37:29 2019-04-12 17:01:37 2019-04-09 21:00:48
2019-04-07 02:00:00 2.7261e7 362.412781 0.182228 0.817772 3d 21h 46m 2s 5d 15h 55m 47s 2d 19h 57m 53s 2019-04-10 23:46:02 2019-04-12 17:55:47 2019-04-09 21:57:53
2019-04-07 03:00:00 2.7349e7 379.650299 0.182819 0.817181 3d 17h 26m 42s 5d 15h 49m 53s 2d 19h 54m 56s 2019-04-10 20:26:42 2019-04-12 18:49:53 2019-04-09 22:54:56
2019-04-07 04:00:00 2.7440e7 395.427307 0.183425 0.816575 3d 13h 48m 46s 5d 15h 43m 51s 2d 19h 51m 55s 2019-04-10 17:48:46 2019-04-12 19:43:51 2019-04-09 23:51:55
2019-04-07 05:00:00 2.7530e7 390.717957 0.184029 0.815971 3d 14h 46m 58s 5d 15h 37m 49s 2d 19h 48m 54s 2019-04-10 19:46:58 2019-04-12 20:37:49 2019-04-10 00:48:54
2019-04-07 06:00:00 2.7623e7 403.930115 0.184646 0.815354 3d 11h 52m 51s 5d 15h 31m 40s 2d 19h 45m 50s 2019-04-10 17:52:51 2019-04-12 21:31:40 2019-04-10 01:45:50
2019-04-07 07:00:00 2.7716e7 413.059479 0.185271 0.814729 3d 9h 57m 50s 5d 15h 25m 26s 2d 19h 42m 43s 2019-04-10 16:57:50 2019-04-12 22:25:26 2019-04-10 02:42:43
2019-04-07 08:00:00 2.7811e7 388.398651 0.185904 0.814096 3d 15h 6m 2s 5d 15h 19m 8s 2d 19h 39m 34s 2019-04-10 23:06:02 2019-04-12 23:19:08 2019-04-10 03:39:34
2019-04-07 09:00:00 2.7907e7 369.630157 0.186544 0.813456 3d 19h 27m 4s 5d 15h 12m 45s 2d 19h 36m 22s 2019-04-11 04:27:04 2019-04-13 00:12:45 2019-04-10 04:36:22
2019-04-07 10:00:00 2.8003e7 330.248749 0.187192 0.812808 4d 6h 16m 30s 5d 15h 6m 17s 2d 19h 33m 8s 2019-04-11 16:16:30 2019-04-13 01:06:17 2019-04-10 05:33:08
2019-04-07 11:00:00 2.8102e7 299.601105 0.187847 0.812153 4d 16h 38m 47s 5d 14h 59m 45s 2d 19h 29m 52s 2019-04-12 03:38:47 2019-04-13 01:59:45 2019-04-10 06:29:52

Get velocity

Code
psp_spi_vars = pyspedas.psp.spi(trange=validate(psp_timerange), time_clip=True)
swe_vars = pyspedas.ace.swe(trange=validate(timerange_earth), datatype="h0")
29-Jan-24 14:35:31: Downloading remote index: https://spdf.gsfc.nasa.gov/pub/data/psp/sweap/spi/l3/spi_sf00_l3_mom/2019/
Using LEVEL=L3
29-Jan-24 14:35:31: File is current: /Users/zijin/data/psp/sweap/spi/l3/spi_sf00_l3_mom/2019/psp_swp_spi_sf00_l3_mom_20190406_v04.cdf
29-Jan-24 14:35:31: File is current: /Users/zijin/data/psp/sweap/spi/l3/spi_sf00_l3_mom/2019/psp_swp_spi_sf00_l3_mom_20190407_v04.cdf
29-Jan-24 14:35:32: Downloading remote index: https://spdf.gsfc.nasa.gov/pub/data/ace/swepam/level_2_cdaweb/swe_h0/2019/
29-Jan-24 14:35:32: File is current: /Users/zijin/data/ace/swepam/level_2_cdaweb/swe_h0/2019/ac_h0_swe_20190409_v11.cdf
29-Jan-24 14:35:33: File is current: /Users/zijin/data/ace/swepam/level_2_cdaweb/swe_h0/2019/ac_h0_swe_20190410_v11.cdf
29-Jan-24 14:35:33: File is current: /Users/zijin/data/ace/swepam/level_2_cdaweb/swe_h0/2019/ac_h0_swe_20190411_v11.cdf
29-Jan-24 14:35:33: File is current: /Users/zijin/data/ace/swepam/level_2_cdaweb/swe_h0/2019/ac_h0_swe_20190412_v11.cdf
29-Jan-24 14:35:33: File is current: /Users/zijin/data/ace/swepam/level_2_cdaweb/swe_h0/2019/ac_h0_swe_20190413_v11.cdf
Code
tvar = tvectot("psp_spi_VEL_RTN_SUN")
psp_ion_speed: xr.DataArray = get_data(tvar, xarray=True)

ace_ion_speed: xr.DataArray = get_data("Vp", xarray=True)
Code
def calc_correlation(
    v1: xr.DataArray,
    v2: xr.DataArray,
    v1_timerange: TimeRange,
    v2_timerange: TimeRange,
    cadence: timedelta = timedelta(minutes=10),
    window: timedelta = timedelta(hours=1),
):
    v1 = v1.dropna("time").sel(
        time=slice(v1_timerange.start.to_datetime(), v1_timerange.end.to_datetime())
    )
    v2 = v2.dropna("time").sel(
        time=slice(v2_timerange.start.to_datetime(), v2_timerange.end.to_datetime())
    )
    v1_start = v1_timerange.start.to_datetime()
    v1_end = v1_timerange.end.to_datetime()

    results = []
    for temp_tr in v2_timerange.window(cadence=cadence, window=window):
        v2_temp_start = temp_tr.start.to_datetime()
        v2_temp_end = temp_tr.end.to_datetime()

        v2_temp = v2.sel(time=slice(v2_temp_start, v2_temp_end))
        v2_temp["time"] = v2_temp["time"] - timedelta64(v2_temp_start - v1_start)
        v1_temp = v1.interp_like(v2_temp)

        pearsonr = scipy.stats.pearsonr(v1_temp, v2_temp).statistic
        distance_correlation = dcor.distance_correlation(v1_temp, v2_temp)
        results.append(
            [
                v1_start,
                v1_end,
                v2_temp_start,
                v2_temp_end,
                pearsonr,
                distance_correlation,
            ]
        )

    return pl.DataFrame(
        results,
        schema=[
            "v1_start",
            "v1_end",
            "v2_start",
            "v2_end",
            "Pearson correlation",
            "Distance correlation",
        ],
    )
29-Jan-24 14:35:35: /Users/zijin/micromamba/envs/psp_conjunction/lib/python3.11/site-packages/dcor/_fast_dcov_avl.py:554: UserWarning: Falling back to uncompiled AVL fast distance covariance terms because of TypeError exception raised: No matching definition for argument type(s) array(float64, 1d, C), array(float32, 1d, C), bool. Rembember: only floating point values can be used in the compiled implementations.
  warnings.warn(
Code
window = psp_timerange.end - psp_timerange.start
cadence = timedelta(minutes=10)

df = calc_correlation(
    psp_ion_speed,
    ace_ion_speed,
    psp_timerange,
    timerange_earth,
    cadence=cadence,
    window=window,
)
Code
df
shape: (578, 6)
v1_start v1_end v2_start v2_end Pearson correlation Distance correlation
datetime[μs] datetime[μs] datetime[μs] datetime[μs] f64 f64
2019-04-06 12:00:00 2019-04-07 12:00:00 2019-04-09 00:00:00 2019-04-10 00:00:00 0.129733 0.295187
2019-04-06 12:00:00 2019-04-07 12:00:00 2019-04-09 00:10:00 2019-04-10 00:10:00 0.115776 0.290461
2019-04-06 12:00:00 2019-04-07 12:00:00 2019-04-09 00:20:00 2019-04-10 00:20:00 0.125829 0.295799
2019-04-06 12:00:00 2019-04-07 12:00:00 2019-04-09 00:30:00 2019-04-10 00:30:00 0.134056 0.299338
2019-04-06 12:00:00 2019-04-07 12:00:00 2019-04-09 00:40:00 2019-04-10 00:40:00 0.141806 0.309491
2019-04-06 12:00:00 2019-04-07 12:00:00 2019-04-09 00:50:00 2019-04-10 00:50:00 0.137222 0.310114
2019-04-06 12:00:00 2019-04-07 12:00:00 2019-04-09 01:00:00 2019-04-10 01:00:00 0.130128 0.307473
2019-04-06 12:00:00 2019-04-07 12:00:00 2019-04-09 01:10:00 2019-04-10 01:10:00 0.132468 0.314047
2019-04-06 12:00:00 2019-04-07 12:00:00 2019-04-09 01:20:00 2019-04-10 01:20:00 0.137969 0.310688
2019-04-06 12:00:00 2019-04-07 12:00:00 2019-04-09 01:30:00 2019-04-10 01:30:00 0.153246 0.316048
2019-04-06 12:00:00 2019-04-07 12:00:00 2019-04-09 01:40:00 2019-04-10 01:40:00 0.169566 0.320866
2019-04-06 12:00:00 2019-04-07 12:00:00 2019-04-09 01:50:00 2019-04-10 01:50:00 0.167484 0.325138
2019-04-06 12:00:00 2019-04-07 12:00:00 2019-04-12 22:20:00 2019-04-13 22:20:00 -0.75871 0.797919
2019-04-06 12:00:00 2019-04-07 12:00:00 2019-04-12 22:30:00 2019-04-13 22:30:00 -0.769157 0.807681
2019-04-06 12:00:00 2019-04-07 12:00:00 2019-04-12 22:40:00 2019-04-13 22:40:00 -0.772618 0.81499
2019-04-06 12:00:00 2019-04-07 12:00:00 2019-04-12 22:50:00 2019-04-13 22:50:00 -0.78226 0.825125
2019-04-06 12:00:00 2019-04-07 12:00:00 2019-04-12 23:00:00 2019-04-13 23:00:00 -0.788294 0.830554
2019-04-06 12:00:00 2019-04-07 12:00:00 2019-04-12 23:10:00 2019-04-13 23:10:00 -0.783344 0.829585
2019-04-06 12:00:00 2019-04-07 12:00:00 2019-04-12 23:20:00 2019-04-13 23:20:00 -0.788571 0.834751
2019-04-06 12:00:00 2019-04-07 12:00:00 2019-04-12 23:30:00 2019-04-13 23:30:00 -0.787044 0.837541
2019-04-06 12:00:00 2019-04-07 12:00:00 2019-04-12 23:40:00 2019-04-13 23:40:00 -0.788278 0.841798
2019-04-06 12:00:00 2019-04-07 12:00:00 2019-04-12 23:50:00 2019-04-13 23:50:00 -0.785141 0.839557
2019-04-06 12:00:00 2019-04-07 12:00:00 2019-04-13 00:00:00 2019-04-14 00:00:00 -0.777335 0.835922
2019-04-06 12:00:00 2019-04-07 12:00:00 2019-04-13 00:10:00 2019-04-14 00:10:00 -0.779654 0.840022
Code
df.hvplot("v2_start", ["Pearson correlation", "Distance correlation"])