Duration

They might be multiple ways to define the duration of a discontinuity. Here are some possibilities:

Notes:

Caveats:

Maxium distance method


ts_max_distance

 ts_max_distance (ts:xarray.core.dataarray.DataArray, coord:str='time')

Compute the time interval when the timeseries has maxium cumulative variation

test for ts_max_distance function
time = pd.date_range("2000-01-01", periods=10)
x = np.linspace(0, np.pi, 10)
# generate data circular in three dimensions, so the biggest distance is between the first and the last point
data = np.zeros((10, 3))
data[:, 0] = np.sin(x)
data[:, 1] = np.cos(x)
ts = xr.DataArray(data, coords={"time": time}, dims=["time", "space"])
start, end = ts_max_distance(ts)
assert start == time[0]
assert end == time[-1]

Maxium derivative method


get_time_from_condition

 get_time_from_condition (vec:xarray.core.dataarray.DataArray, threshold,
                          condition_type)

find_start_end_times

 find_start_end_times (vec_diff_mag:xarray.core.dataarray.DataArray,
                       d_time, threshold)

ts_max_derivative

 ts_max_derivative (vec:xarray.core.dataarray.DataArray,
                    threshold_ratio=0.25)

calc_duration

 calc_duration (ts:xarray.core.dataarray.DataArray,
                method:Literal['distance','derivative']='distance',
                **kwargs)
Code
def calc_d_duration(vec: xr.DataArray, d_time, threshold) -> pd.Series:
    vec_diff = vec.differentiate("time", datetime_unit="s")
    vec_diff_mag = linalg.norm(vec_diff, dims="v_dim")

    start_time, end_time = find_start_end_times(vec_diff_mag, d_time, threshold)

    return pd.Series(
        {
            "t_us": start_time,
            "t_ds": end_time,
        }
    )

Calibrates candidate duration

This calibration is based on the assumption that the magnetic discontinuity is symmetric around the center of time, which is not always true.

So instead of calibrating the duration, we drop the events. - Cons: Might influence the statistics of occurrence rate, but - Pros: More robust results about the properties of the magnetic discontinuity.

Code
# def calibrate_candidate_duration(
#     candidate: pd.Series, data:xr.DataArray, data_resolution, ratio = 3/4
# ):
#     """
#     Calibrates the candidate duration.
#     - If only one of 't_us' or 't_ds' is provided, calculates the missing one based on the provided one and 'd_time'.
#     - Then if this is not enough points between 't_us' and 't_ds', returns None for both.


#     Parameters
#     ----------
#     - candidate (pd.Series): The input candidate with potential missing 't_us' or 't_ds'.

#     Returns
#     -------
#     - pd.Series: The calibrated candidate.
#     """

#     start_notnull = pd.notnull(candidate['t_us'])
#     stop_notnull = pd.notnull(candidate['t_ds'])

#     match start_notnull, stop_notnull:
#         case (True, True):
#             t_us = candidate['t_us']
#             t_ds = candidate['t_ds']
#         case (True, False):
#             t_us = candidate['t_us']
#             t_ds = candidate['d_time'] -  candidate['t_us'] + candidate['d_time']
#         case (False, True):
#             t_us = candidate['d_time'] -  candidate['t_ds'] + candidate['d_time']
#             t_ds = candidate['t_ds']
#         case (False, False):
#             return pandas.Series({
#                 't_us': None,
#                 't_ds': None,
#             })

#     duration = t_ds - t_us
#     num_of_points_between = data.time.sel(time=slice(t_us, t_ds)).count().item()

#     if num_of_points_between <= (duration/data_resolution) * ratio:
#         t_us = None
#         t_ds = None

#     return pandas.Series({
#         't_us': t_us,
#         't_ds': t_ds,
#     })
Code
# def calibrate_candidates_duration(candidates, sat_fgm, data_resolution):
#     # calibrate duration

#     calibrate_duration = pdp.ApplyToRows(
#         lambda candidate: calibrate_candidate_duration(
#             candidate, sat_fgm, data_resolution
#         ),
#         func_desc="calibrating duration parameters if needed",
#     )

#     temp_candidates = candidates.loc[
#         lambda df: df["t_us"].isnull() | df["t_ds"].isnull()
#     ]  # temp_candidates = candidates.query('t_us.isnull() | t_ds.isnull()') # not implemented in `modin`

#     if not temp_candidates.empty:
#         temp_candidates_updated = calibrate_duration(sat_fgm, data_resolution).apply(
#             temp_candidates
#         )
#         candidates.update(temp_candidates_updated)
#     return candidates