Source code for metobs_toolkit.modeltimeseries

import logging
from typing import Union
import pandas as pd
import numpy as np

from matplotlib.pyplot import Axes

from metobs_toolkit.backend_collection.dev_collection import copy_doc
from metobs_toolkit.backend_collection.df_helpers import to_timedelta
from metobs_toolkit.xrconversions import modeltimeseries_to_xr
import metobs_toolkit.backend_collection.printing_collection as printing
from metobs_toolkit.obstypes import Obstype
from metobs_toolkit.plot_collection.general_functions import (
    create_axes,
    set_legend,
    set_xlabel,
    set_ylabel,
    set_title,
    create_categorical_color_map,
)
from metobs_toolkit.plot_collection.timeseries_plotting import add_lines_to_axes


from metobs_toolkit.backend_collection.loggingmodule import log_entry
from metobs_toolkit.backend_collection.dataframe_constructors import modeltimeseries_df

logger = logging.getLogger("<metobs_toolkit>")


[docs] class ModelTimeSeries: """Class for model-based timeseries at one location. Parameters ---------- site : object The site object representing the location. datarecords : np.ndarray Array of data records. timestamps : np.ndarray Array of timestamps corresponding to the data records. obstype : Obstype The observation type. datadtype : type, optional Data type for the records, by default np.float32. timezone : str, optional Timezone for the timestamps, by default "UTC". modelname : str, optional Name of the model, by default None. modelvariable : str, optional Name of the model variable, by default None. """
[docs] def __init__( self, site, datarecords: np.ndarray, timestamps: np.ndarray, obstype: Obstype, datadtype: type = np.float32, timezone: str = "UTC", modelname: str = None, modelvariable: str = None, ): self.site = site self.obstype = obstype # Data data = pd.Series( data=pd.to_numeric(datarecords, errors="coerce").astype(datadtype), index=pd.DatetimeIndex(data=timestamps, tz=timezone, name="datetime"), name=obstype.name, ) self.series = data # model metadata self.modelname = modelname # TODO: is modelvariable not band_unit from the obstype? self.modelvariable = modelvariable
# ------------------------------------------ # Specials # ------------------------------------------ def __repr__(self): return f"<ModelTimeSeries(id={self._id()},obstype={self.obstype.name})>" def _id(self) -> str: """A physical unique id. In the __add__ methods, if the id of two instances differs, adding is a regular concatenation. """ return f"{self.site._id()}{self.modelname}_{self.modelvariable}" def __eq__(self, other) -> bool: """Check equality with another ModelTimeSeries object.""" if not isinstance(other, ModelTimeSeries): return False return ( self.site == other.site and self.obstype == other.obstype and self.series.equals(other.series) and self.modelname == other.modelname and self.modelvariable == other.modelvariable ) def __add__(self, other: "ModelTimeSeries") -> "ModelTimeSeries": """ Combine two ModelTimeSeries objects for the same site and obstype. The result contains all unique records, with preference to non-NaN values from 'other'. """ if not isinstance(other, ModelTimeSeries): raise TypeError("Can only add ModelTimeSeries to ModelTimeSeries.") if self._id() != other._id(): raise ValueError( f"Cannot add ModelTimeSeries for different ID's ({self._id()} != {other._id()})." ) # Align timezones if different if self.tz != other.tz: other_series = other.series.tz_convert(str(self.tz)) else: other_series = other.series # Combine the series, preferring non-NaN from 'other' combined_series = self.series.combine_first(other_series) combined = ModelTimeSeries( site=self.site, datarecords=combined_series.values, timestamps=combined_series.index.values, obstype=self.obstype + other.obstype, datadtype=combined_series.dtype, timezone=self.tz, modelname=self.modelname, modelvariable=self.modelvariable, ) return combined @copy_doc(modeltimeseries_df) @property def df(self) -> pd.DataFrame: return modeltimeseries_df(self) @property def stationname(self) -> str: """Return the name of the station this SensorData belongs to.""" return self.site.stationname @property def tz(self) -> str: """Return the timezone of the stored timestamps.""" return self.series.index.tz @property def start_datetime(self) -> pd.Timestamp: """Return the start datetime of the series.""" return self.series.index.min() @property def end_datetime(self) -> pd.Timestamp: """Return the end datetime of the series.""" return self.series.index.max() @property def freq(self) -> pd.Timedelta: """Return the frequency of the series.""" freq = pd.infer_freq(self.series.index) if freq is None: raise ValueError("Frequency could not be computed.") # note: sometimes 'h' is returned, and this gives issues, so add a 1 in front return to_timedelta(freq)
[docs] @copy_doc(modeltimeseries_to_xr) @log_entry def to_xr(self) -> "xarray.Dataset": return modeltimeseries_to_xr(self)
def _get_info_core(self, nident_root=1) -> dict: infostr = "" infostr += printing.print_fmt_line( f"Modelname {self.modelname} -> variable/band: {self.modelvariable}", nident_root, ) infostr += printing.print_fmt_line( f"From {self.start_datetime} --> {self.end_datetime}", nident_root ) infostr += printing.print_fmt_line( f"Assumed frequency: {self.freq}", nident_root ) infostr += printing.print_fmt_line( f"Number of records: {self.series.shape[0]}", nident_root ) infostr += printing.print_fmt_line( f"Units are converted from {self.obstype.model_unit} --> {self.obstype.std_unit}", nident_root, ) return infostr
[docs] @log_entry def get_info(self, printout: bool = True) -> Union[None, str]: """ Print or return information about the ModelTimeSeries. Parameters ---------- printout : bool, optional If True, print the information. If False, return as string. Default is True. Returns ------- None or str None if printout is True, otherwise the information string. """ logger.debug(f"{self.__class__.__name__}.get_info called for {self}") infostr = "" infostr += printing.print_fmt_title("General info of ModelTimeSeries") infostr += printing.print_fmt_line( f"{self.obstype.name} model data at location of {self.stationname}" ) infostr += self._get_info_core(nident_root=1) if printout: print(infostr) else: return infostr
[docs] @log_entry def make_plot( self, linecolor: str = None, ax: Union[Axes, None] = None, figkwargs: dict = {}, title: Union[str, None] = None, ) -> Axes: """ Create a plot of the model time series. Parameters ---------- linecolor : str, optional Color of the line, by default None. ax : Axes, optional Matplotlib Axes to plot on, by default None. figkwargs : dict, optional Additional keyword arguments for figure creation, by default {}. title : str or None, optional Title for the plot, by default None. Returns ------- Axes The matplotlib Axes with the plot. """ logger.debug(f"{self.__class__.__name__}.make_plot called for {self}") # define figure if ax is None: ax = create_axes(**figkwargs) # Define a color if linecolor is None: # create a new color color = create_categorical_color_map(["dummy"])["dummy"] else: color = linecolor legendname = f"{self.modelname}:{self.modelvariable}@{self.stationname}" ax = add_lines_to_axes( ax=ax, series=self.series, legend_label=legendname, linestyle="--", color=color, ) # Add Styling attributes # Set title: if title is None: set_title(ax, f"{self.obstype.name} data for station {self.stationname}") else: set_title(ax, title) # Set ylabel set_ylabel(ax, self.obstype._get_plot_y_label()) # Set xlabel set_xlabel(ax, f"Timestamps (in {self.tz})") # Add legend set_legend(ax) return ax