Time resolution effect (data)

Process Wind data

Code
%load_ext autoreload
%autoreload 2
Code
from space_analysis.utils.speasy import Variables
from discontinuitypy.datasets import IDsDataset
from discontinuitypy.utils.basic import resample

from datetime import timedelta

from discontinuitypy.missions import wind_mag_h4_rtn_meta, wind_plasma_k0_swe_meta
Code
ts = timedelta(seconds=1 / 11)
tau = timedelta(seconds=60)
Code
timerange = ["2016-01-01", "2016-06-29"]
provider = "archive/local"
mag_meta = wind_mag_h4_rtn_meta
plasma_meta = wind_plasma_k0_swe_meta

wind_mag_vars = Variables(
    timerange=timerange,
    **mag_meta.model_dump(),
    provider=provider,
)

wind_plasma_vars = Variables(
    timerange=timerange,
    **plasma_meta.model_dump(),
    provider=provider,
)
Code
wind_mag_data = wind_mag_vars.to_polars()
wind_plasma_data = wind_plasma_vars.to_polars()
Code
# for freq in [11, 5 , 2, 1, 0.5]:
for freq in [5]:
    ts = timedelta(seconds=1 / freq)

    wind_ids_dataset = (
        IDsDataset(
            mag_data=wind_mag_data.pipe(resample, every=ts),
            plasma_data=wind_plasma_data,
            tau=tau,
            ts=ts,
            mag_meta=mag_meta,
            plasma_meta=plasma_meta,
        )
        .find_events(return_best_fit=False)
        .update_events()
        .export(f"data/ts_effect/events.Wind.ts_{1/freq:.2f}s_tau_60s.arrow")
    )
15-May-24 18:09:44: UserWarning: Ray execution environment not yet initialized. Initializing...
To remove this warning, run the following python code before doing dataframe operations:

    import ray
    ray.init()


2024-05-15 18:09:46,850 INFO worker.py:1724 -- Started a local Ray instance.
15-May-24 18:09:48: UserWarning: Distributing <class 'pandas.core.frame.DataFrame'> object. This may take some time.
(_deploy_ray_func pid=78514) 15-May-24 18:09:56: RuntimeWarning: overflow encountered in exp
(_deploy_ray_func pid=78514) 
(_deploy_ray_func pid=78514) 15-May-24 18:09:57: RuntimeWarning: overflow encountered in exp
(_deploy_ray_func pid=78514) 
(_deploy_ray_func pid=78514) 15-May-24 18:10:02: RuntimeWarning: overflow encountered in exp [repeated 11x across cluster] (Ray deduplicates logs by default. Set RAY_DEDUP_LOGS=0 to disable log deduplication, or see https://docs.ray.io/en/master/ray-observability/ray-logging.html#log-deduplication for more options.)
(_deploy_ray_func pid=78514)  [repeated 11x across cluster]
(_deploy_ray_func pid=78514) 15-May-24 18:10:07: RuntimeWarning: overflow encountered in exp [repeated 10x across cluster]
(_deploy_ray_func pid=78514)  [repeated 10x across cluster]
(_deploy_ray_func pid=78517) 15-May-24 18:10:13: RuntimeWarning: overflow encountered in exp [repeated 5x across cluster]
(_deploy_ray_func pid=78517)  [repeated 5x across cluster]
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[19], line 15
      2 for freq in [5]:
      3     ts = timedelta(seconds=1 / freq)
      5     wind_ids_dataset = (
      6         IDsDataset(
      7             mag_data=wind_mag_data.pipe(resample, every=ts),
      8             plasma_data=wind_plasma_data,
      9             tau=tau,
     10             ts=ts,
     11             mag_meta=mag_meta,
     12             plasma_meta=plasma_meta,
     13         )
     14         .find_events(return_best_fit=False)
---> 15         .update_events()
     16         .export(f"data/ts_effect/events.Wind.ts_{1/freq:.2f}s_tau_60s.arrow")
     17     )

File ~/micromamba/envs/psp_conjunction/lib/python3.11/site-packages/discontinuitypy/datasets.py:134, in IDsDataset.update_events(self, **kwargs)
    133 def update_events(self, **kwargs):
--> 134     return self.update_events_with_plasma_data(
    135         **kwargs
    136     ).update_events_with_temp_data(**kwargs)

File ~/micromamba/envs/psp_conjunction/lib/python3.11/site-packages/discontinuitypy/datasets.py:148, in IDsDataset.update_events_with_plasma_data(self, **kwargs)
    140 if self.plasma_data is not None:
    141     df_combined = combine_features(
    142         self.events,
    143         self.plasma_data.collect(),
    144         plasma_meta=self.plasma_meta,
    145         **kwargs,
    146     )
--> 148     self.events = calc_combined_features(
    149         df_combined,
    150         plasma_meta=self.plasma_meta,
    151         **kwargs,
    152     )
    153 else:
    154     logger.info("Plasma data is not available.")

File ~/micromamba/envs/psp_conjunction/lib/python3.11/site-packages/discontinuitypy/integration.py:285, in calc_combined_features(df, b_cols, detail, normal_cols, Vl_cols, Vn_cols, thickness_cols, current_cols, plasma_meta, **kwargs)
    281 vec_cols = plasma_meta.velocity_cols
    282 density_col = plasma_meta.density_col
    284 result = (
--> 285     result.pipe(vector_project_pl, vec_cols, Vl_cols, name="v_l")
    286     .pipe(vector_project_pl, vec_cols, Vn_cols, name="v_n")
    287     .pipe(vector_project_pl, vec_cols, normal_cols, name="v_k")
    288     .with_columns(
    289         pl.col("v_n").abs(),
    290         pl.col("v_k").abs(),
    291         # v_mn=(pl.col("plasma_speed") ** 2 - pl.col("v_l") ** 2).sqrt(),
    292     )
    293     .with_columns(
    294         L_k=pl.col("v_k") * pl.col("duration"),
    295         j0_k=pl.col("d_star")
    296         / pl.col(
    297             "v_k"
    298         ),  # TODO: d_star corresponding to dB/dt, which direction is not exactly perpendicular to the k direction
    299         # NOTE: n direction is not properly determined for MVA analysis
    300         # j0_mn=pl.col("d_star") / pl.col("v_mn"),
    301         # L_n=pl.col("v_n") * pl.col("duration"),
    302         # L_mn=pl.col("v_mn") * pl.col("duration"),
    303         # NOTE: the duration is not properly determined for `max distance` method
    304         # L_k=pl.col("v_k") * pl.col("duration"),
    305     )
    306     .pipe(compute_inertial_length)
    307     .pipe(compute_Alfven_speed, n=density_col, B="b_mag")
    308     .pipe(compute_Alfven_current)
    309     .with_columns(
    310         cs.by_name(current_cols) * J_FACTOR.value,
    311     )
    312     .with_columns(
    313         (cs.by_name(thickness_cols) / length_norm).name.suffix("_norm"),
    314         (cs.by_name(current_cols) / current_norm).name.suffix("_norm"),
    315         (cs.by_name(b_cols) / b_norm).name.suffix("_norm"),
    316     )
    317 )
    319 if detail:
    320     result = (
    321         result.pipe(
    322             vector_project_pl,
   (...)
    335         .pipe(calc_plasma_parameter_change, plasma_meta=plasma_meta)
    336     )

File ~/micromamba/envs/psp_conjunction/lib/python3.11/site-packages/polars/dataframe/frame.py:5249, in DataFrame.pipe(self, function, *args, **kwargs)
   5184 def pipe(
   5185     self,
   5186     function: Callable[Concatenate[DataFrame, P], T],
   5187     *args: P.args,
   5188     **kwargs: P.kwargs,
   5189 ) -> T:
   5190     """
   5191     Offers a structured way to apply a sequence of user-defined functions (UDFs).
   5192 
   (...)
   5247     └─────┴─────┘
   5248     """
-> 5249     return function(self, *args, **kwargs)

File ~/micromamba/envs/psp_conjunction/lib/python3.11/site-packages/discontinuitypy/integration.py:128, in vector_project_pl(df, v1_cols, v2_cols, name)
    127 def vector_project_pl(df: pl.DataFrame, v1_cols, v2_cols, name=None):
--> 128     v1 = df2ts(df, v1_cols).assign_coords(v_dim=["x", "y", "z"])
    129     v2 = df2ts(df, v2_cols).assign_coords(v_dim=["x", "y", "z"])
    130     result = vector_project(v1, v2, dim="v_dim")

File ~/micromamba/envs/psp_conjunction/lib/python3.11/site-packages/xarray/core/common.py:621, in DataWithCoords.assign_coords(self, coords, **coords_kwargs)
    618 else:
    619     results = self._calc_assign_results(coords_combined)
--> 621 data.coords.update(results)
    622 return data

File ~/micromamba/envs/psp_conjunction/lib/python3.11/site-packages/xarray/core/coordinates.py:566, in Coordinates.update(self, other)
    560 # special case for PandasMultiIndex: updating only its dimension coordinate
    561 # is still allowed but depreciated.
    562 # It is the only case where we need to actually drop coordinates here (multi-index levels)
    563 # TODO: remove when removing PandasMultiIndex's dimension coordinate.
    564 self._drop_coords(self._names - coords_to_align._names)
--> 566 self._update_coords(coords, indexes)

File ~/micromamba/envs/psp_conjunction/lib/python3.11/site-packages/xarray/core/coordinates.py:842, in DataArrayCoordinates._update_coords(self, coords, indexes)
    840 coords_plus_data = coords.copy()
    841 coords_plus_data[_THIS_ARRAY] = self._data.variable
--> 842 dims = calculate_dimensions(coords_plus_data)
    843 if not set(dims) <= set(self.dims):
    844     raise ValueError(
    845         "cannot add coordinates with new dimensions to a DataArray"
    846     )

File ~/micromamba/envs/psp_conjunction/lib/python3.11/site-packages/xarray/core/variable.py:3008, in calculate_dimensions(variables)
   3006             last_used[dim] = k
   3007         elif dims[dim] != size:
-> 3008             raise ValueError(
   3009                 f"conflicting sizes for dimension {dim!r}: "
   3010                 f"length {size} on {k!r} and length {dims[dim]} on {last_used!r}"
   3011             )
   3012 return dims

ValueError: conflicting sizes for dimension 'v_dim': length 50 on <this-array> and length 3 on {'time': 'time', 'v_dim': 'v_dim'}