import numpy as np
import pandas as pd
import xarray as xr
from functools import cache
import os
import impactlab_tools.assets
# file readers for default weights files in assets directory
@cache
def _get_weights(project='acp', rcp='rcp85'):
da = (
pd.read_csv(
os.path.join(
os.path.dirname(impactlab_tools.assets.__file__),
f'weights_{project}.csv'),
index_col=[0, 1])['weight']
.xs(rcp, level='rcp')
.to_xarray())
return da
# Computation helpers
[docs]
def weighted_quantile_xr(
data,
quantiles,
sample_weight,
dim,
values_sorted=False):
"""
Compute quantiles of a weighted distribution
similar to :py:func:`weighted_quantile` operates on a named dimension of an
:py:class:`xarray.DataArray`.
.. NOTE ::
quantiles should be in [0, 1]!
Parameters
----------
data : DataArray or Dataset
:py:class:`xarray.DataArray` or :py:class`xarray.Dataset` with data
indexed by ``dim``
quantiles : array-like
quantiles of distribution to return
sample_weight : numpy.array
weights array-like of the same length as `array`
values_sorted : bool
if True, then will avoid sorting of initial array
dim : str
Dimension along which to weight the data
Returns
-------
xarray.DataArray
computed quantiles from weighted distribution
"""
if hasattr(data, 'data_vars'):
res = xr.Dataset()
for var in data.data_vars.keys():
if dim in data[var].dims:
res[var] = _weighted_quantile_xr_da(
data[var], quantiles, sample_weight, dim, values_sorted)
else:
res[var] = data[var]
return res
else:
return _weighted_quantile_xr_da(
data, quantiles, sample_weight, dim, values_sorted)
def _weighted_quantile_xr_da(
data,
quantiles,
sample_weight,
dim,
values_sorted=False):
axis = data.get_axis_num(dim)
dims = list(data.dims[:axis]) + ['quantile'] + list(data.dims[axis+1:])
coords = {
coord: data.coords[coord] for coord in data.dims if coord in dims}
coords.update({'quantile': quantiles})
if isinstance(sample_weight, (pd.Series, xr.DataArray)):
weights = sample_weight.loc[data.coords[dim].values].values
else:
weights = sample_weight
data_dist = weighted_quantile(
data.values,
quantiles,
weights,
values_sorted=values_sorted,
axis=axis)
data_dist = xr.DataArray(data_dist, dims=dims, coords=coords)
return data_dist
[docs]
def weighted_quantile(
values,
quantiles,
sample_weight=None,
values_sorted=False,
old_style=False,
axis=None):
"""
Compute quantiles of a weighted distribution
similar to :py:func:`weighted_quantile_1d` but supports weighting along any
(numbered) dimension
.. NOTE ::
quantiles should be in [0, 1]!
Parameters
----------
values : numpy.array
numpy.array with data
quantiles : array-like
quantiles of distribution to return
sample_weight : numpy.array
weights array-like of the same length as `array`
values_sorted : bool
if True, then will avoid sorting of initial array
old_style : bool
if True, will correct output to be consistent with numpy.percentile.
Returns
-------
numpy.array
computed quantiles from weighted distribution
"""
if len(values.shape) > 1 and axis is not None:
return np.apply_along_axis(
weighted_quantile_1d,
axis,
values,
quantiles,
sample_weight,
values_sorted,
old_style)
else:
return weighted_quantile_1d(
values,
quantiles,
sample_weight,
values_sorted,
old_style)
[docs]
def weighted_quantile_1d(
values,
quantiles,
sample_weight=None,
values_sorted=False,
old_style=False):
"""
Very close to numpy.percentile, but supports weights
Thanks to Alleo!
http://stackoverflow.com/questions/21844024/weighted-percentile-using-numpy/29677616#29677616
.. NOTE ::
quantiles should be in [0, 1]!
Parameters
----------
values : numpy.array
numpy.array with data
quantiles : array-like
quantiles of distribution to return
sample_weight : numpy.array
weights array-like of the same length as `array`
values_sorted : bool
if True, then will avoid sorting of initial array
old_style : bool
if True, will correct output to be consistent with numpy.percentile.
Returns
-------
numpy.array
computed quantiles from weighted distribution
"""
values = np.array(values)
quantiles = np.array(quantiles)
if sample_weight is None:
return np.percentile(values, quantiles)
sample_weight = np.array(sample_weight)
if not np.all(quantiles >= 0) and np.all(quantiles <= 1):
raise ValueError('quantiles should be in [0, 1]')
if not values_sorted:
sorter = np.argsort(values)
values = values[sorter]
sample_weight = sample_weight[sorter]
# Find the cumsum, and locate the center of each weight's mass
# e.g. [0.25, 0.25, 0.5, 0] --> [0.125, 0.375, 0.75, 1]
weighted_quantiles = np.cumsum(sample_weight) - 0.5 * sample_weight
if old_style:
# To be convenient with np.percentile
# e.g. [0, 0.25, 0.25, 0.5, 0] --> [ 0, 0.2857, 0.7143, 1]
weighted_quantiles -= weighted_quantiles[0]
weighted_quantiles /= weighted_quantiles[-1]
else:
weighted_quantiles /= np.sum(sample_weight)
result = np.interp(quantiles, weighted_quantiles, values)
return result