"""Define core operations for xarray objects.
TODO(shoyer): rewrite this module, making use of xarray.core.computation,
NumPy's __array_ufunc__ and mixin classes instead of the unintuitive "inject"
functions.
"""
import operator
import numpy as np
from . import dtypes, duck_array_ops
try:
import bottleneck as bn
has_bottleneck = True
except ImportError:
# use numpy methods instead
bn = np
has_bottleneck = False
NUM_BINARY_OPS = [
"add",
"sub",
"mul",
"truediv",
"floordiv",
"mod",
"pow",
"and",
"xor",
"or",
]
# methods which pass on the numpy return value unchanged
# be careful not to list methods that we would want to wrap later
NUMPY_SAME_METHODS = ["item", "searchsorted"]
# methods which remove an axis
REDUCE_METHODS = ["all", "any"]
NAN_REDUCE_METHODS = [
"max",
"min",
"mean",
"prod",
"sum",
"std",
"var",
"median",
]
NAN_CUM_METHODS = ["cumsum", "cumprod"]
# TODO: wrap take, dot, sort
_CUM_DOCSTRING_TEMPLATE = """\
Apply `{name}` along some dimension of {cls}.
Parameters
----------
{extra_args}
skipna : bool, optional
If True, skip missing values (as marked by NaN). By default, only
skips missing values for float dtypes; other dtypes either do not
have a sentinel missing value (int) or skipna=True has not been
implemented (object, datetime64 or timedelta64).
keep_attrs : bool, optional
If True, the attributes (`attrs`) will be copied from the original
object to the new one. If False (default), the new object will be
returned without attributes.
**kwargs : dict
Additional keyword arguments passed on to `{name}`.
Returns
-------
cumvalue : {cls}
New {cls} object with `{name}` applied to its data along the
indicated dimension.
"""
_REDUCE_DOCSTRING_TEMPLATE = """\
Reduce this {cls}'s data by applying `{name}` along some dimension(s).
Parameters
----------
{extra_args}{skip_na_docs}{min_count_docs}
keep_attrs : bool, optional
If True, the attributes (`attrs`) will be copied from the original
object to the new one. If False (default), the new object will be
returned without attributes.
**kwargs : dict
Additional keyword arguments passed on to the appropriate array
function for calculating `{name}` on this object's data.
Returns
-------
reduced : {cls}
New {cls} object with `{name}` applied to its data and the
indicated dimension(s) removed.
"""
_SKIPNA_DOCSTRING = """
skipna : bool, optional
If True, skip missing values (as marked by NaN). By default, only
skips missing values for float dtypes; other dtypes either do not
have a sentinel missing value (int) or skipna=True has not been
implemented (object, datetime64 or timedelta64)."""
_MINCOUNT_DOCSTRING = """
min_count : int, default: None
The required number of valid values to perform the operation. If
fewer than min_count non-NA values are present the result will be
NA. Only used if skipna is set to True or defaults to True for the
array's dtype. New in version 0.10.8: Added with the default being
None. Changed in version 0.17.0: if specified on an integer array
and skipna=True, the result will be a float array."""
def fillna(data, other, join="left", dataset_join="left"):
"""Fill missing values in this object with data from the other object.
Follows normal broadcasting and alignment rules.
Parameters
----------
join : {"outer", "inner", "left", "right"}, optional
Method for joining the indexes of the passed objects along each
dimension
- "outer": use the union of object indexes
- "inner": use the intersection of object indexes
- "left": use indexes from the first object with each dimension
- "right": use indexes from the last object with each dimension
- "exact": raise `ValueError` instead of aligning when indexes to be
aligned are not equal
dataset_join : {"outer", "inner", "left", "right"}, optional
Method for joining variables of Dataset objects with mismatched
data variables.
- "outer": take variables from both Dataset objects
- "inner": take only overlapped variables
- "left": take only variables from the first object
- "right": take only variables from the last object
"""
from .computation import apply_ufunc
return apply_ufunc(
duck_array_ops.fillna,
data,
other,
join=join,
dask="allowed",
dataset_join=dataset_join,
dataset_fill_value=np.nan,
keep_attrs=True,
)
def where_method(self, cond, other=dtypes.NA):
"""Return elements from `self` or `other` depending on `cond`.
Parameters
----------
cond : DataArray or Dataset with boolean dtype
Locations at which to preserve this objects values.
other : scalar, DataArray or Dataset, optional
Value to use for locations in this object where ``cond`` is False.
By default, inserts missing values.
Returns
-------
Same type as caller.
"""
from .computation import apply_ufunc
# alignment for three arguments is complicated, so don't support it yet
join = "inner" if other is dtypes.NA else "exact"
return apply_ufunc(
duck_array_ops.where_method,
self,
cond,
other,
join=join,
dataset_join=join,
dask="allowed",
keep_attrs=True,
)
def _call_possibly_missing_method(arg, name, args, kwargs):
try:
method = getattr(arg, name)
except AttributeError:
duck_array_ops.fail_on_dask_array_input(arg, func_name=name)
if hasattr(arg, "data"):
duck_array_ops.fail_on_dask_array_input(arg.data, func_name=name)
raise
else:
return method(*args, **kwargs)
def _values_method_wrapper(name):
def func(self, *args, **kwargs):
return _call_possibly_missing_method(self.data, name, args, kwargs)
func.__name__ = name
func.__doc__ = getattr(np.ndarray, name).__doc__
return func
def _method_wrapper(name):
def func(self, *args, **kwargs):
return _call_possibly_missing_method(self, name, args, kwargs)
func.__name__ = name
func.__doc__ = getattr(np.ndarray, name).__doc__
return func
def _func_slash_method_wrapper(f, name=None):
# try to wrap a method, but if not found use the function
# this is useful when patching in a function as both a DataArray and
# Dataset method
if name is None:
name = f.__name__
def func(self, *args, **kwargs):
try:
return getattr(self, name)(*args, **kwargs)
except AttributeError:
return f(self, *args, **kwargs)
func.__name__ = name
func.__doc__ = f.__doc__
return func
def inject_reduce_methods(cls):
methods = (
[
(name, getattr(duck_array_ops, "array_%s" % name), False)
for name in REDUCE_METHODS
]
+ [(name, getattr(duck_array_ops, name), True) for name in NAN_REDUCE_METHODS]
+ [("count", duck_array_ops.count, False)]
)
for name, f, include_skipna in methods:
numeric_only = getattr(f, "numeric_only", False)
available_min_count = getattr(f, "available_min_count", False)
skip_na_docs = _SKIPNA_DOCSTRING if include_skipna else ""
min_count_docs = _MINCOUNT_DOCSTRING if available_min_count else ""
func = cls._reduce_method(f, include_skipna, numeric_only)
func.__name__ = name
func.__doc__ = _REDUCE_DOCSTRING_TEMPLATE.format(
name=name,
cls=cls.__name__,
extra_args=cls._reduce_extra_args_docstring.format(name=name),
skip_na_docs=skip_na_docs,
min_count_docs=min_count_docs,
)
setattr(cls, name, func)
def inject_cum_methods(cls):
methods = [(name, getattr(duck_array_ops, name), True) for name in NAN_CUM_METHODS]
for name, f, include_skipna in methods:
numeric_only = getattr(f, "numeric_only", False)
func = cls._reduce_method(f, include_skipna, numeric_only)
func.__name__ = name
func.__doc__ = _CUM_DOCSTRING_TEMPLATE.format(
name=name,
cls=cls.__name__,
extra_args=cls._cum_extra_args_docstring.format(name=name),
)
setattr(cls, name, func)
def op_str(name):
return "__%s__" % name
def get_op(name):
return getattr(operator, op_str(name))
NON_INPLACE_OP = {get_op("i" + name): get_op(name) for name in NUM_BINARY_OPS}
def inplace_to_noninplace_op(f):
return NON_INPLACE_OP[f]
# _typed_ops.py uses the following wrapped functions as a kind of unary operator
argsort = _method_wrapper("argsort")
conj = _method_wrapper("conj")
conjugate = _method_wrapper("conjugate")
round_ = _func_slash_method_wrapper(duck_array_ops.around, name="round")
def inject_numpy_same(cls):
# these methods don't return arrays of the same shape as the input, so
# don't try to patch these in for Dataset objects
for name in NUMPY_SAME_METHODS:
setattr(cls, name, _values_method_wrapper(name))
class IncludeReduceMethods:
__slots__ = ()
def __init_subclass__(cls, **kwargs):
super().__init_subclass__(**kwargs)
if getattr(cls, "_reduce_method", None):
inject_reduce_methods(cls)
class IncludeCumMethods:
__slots__ = ()
def __init_subclass__(cls, **kwargs):
super().__init_subclass__(**kwargs)
if getattr(cls, "_reduce_method", None):
inject_cum_methods(cls)
class IncludeNumpySameMethods:
__slots__ = ()
def __init_subclass__(cls, **kwargs):
super().__init_subclass__(**kwargs)
inject_numpy_same(cls) # some methods not applicable to Dataset objects