"""Frequency-severity modeling for actuarial applications.
This module provides classes and functions for modeling compound distributions
commonly used in insurance and actuarial science, where claims are modeled as
the sum of a random number (frequency) of random amounts (severity).
Key components:
- FrequencySeverityModel: Main class for compound distribution modeling
- FreqSevSims: Container for frequency-severity simulation results
- Utility functions for simulation index management
The frequency-severity approach is fundamental in actuarial modeling for:
- Aggregate claims modeling
- Risk assessment and capital modeling
- Insurance pricing and reserving
Example:
>>> from pal.distributions import Poisson, LogNormal
>>> freq_dist = Poisson(5.0) # Expected 5 claims
>>> sev_dist = LogNormal(mean=10000, sigma=0.5) # Claim amounts
>>> model = FrequencySeverityModel(freq_dist, sev_dist)
>>> simulations = model.simulate(n_sims=10000)
"""
from __future__ import annotations
import typing as t
from collections.abc import Callable
import numpy as np
import numpy.typing as npt
from . import distributions
from ._maths import xp
from .config import config
from .couplings import ProteusStochasticVariable as _ProteusStochasticVariable
from .stochastic_scalar import (
StochasticScalar,
)
# Type aliases for frequency-severity modeling
# Types that can be used in mathematical operations with FreqSevSims objects
ProteusCompatibleTypes = t.Union["FreqSevSims", StochasticScalar, int, float, npt.NDArray[t.Any]]
# Function signature for numpy ufunc operations that reduce events to simulations
ReductionOperation = t.Callable[
[
npt.NDArray[np.floating], # result: array to store reduced values
npt.NDArray[np.int64], # indices: simulation indices for each event
npt.NDArray[np.floating], # values: event values to reduce
],
None,
]
# Function signature for transforming arrays element-wise
ArrayTransform = t.Callable[
[npt.NDArray[np.floating]], # input_array: array to transform
npt.NDArray[np.floating], # return: transformed array
]
def _get_sims_of_events(
n_events_by_sim: npt.NDArray[np.int64],
) -> npt.NDArray[np.int64]:
"""Get the simulation index for each event.
Given the number of events in each simulation, returns the simulation
index for each event.
>>> n_events_by_sim = np.array([1, 0, 3])
>>> _get_sims_of_events(n_events_by_sim)
array([0, 2, 2, 2])
Args:
n_events_by_sim (np.ndarray): Array of the number of events in each simulation.
Returns:
np.ndarray: Array of simulation indices for each event.
"""
cumulative_n_events = n_events_by_sim.cumsum()
total_events = cumulative_n_events[-1]
event_no = t.cast(npt.NDArray[np.int64], xp.arange(total_events))
return cumulative_n_events.searchsorted(event_no + 1)
[docs]
class FrequencySeverityModel:
"""Constructs and simulates from Frequency-Severity, or Compound distributions."""
[docs]
def __init__(
self,
freq_dist: distributions.DistributionBase,
sev_dist: distributions.DistributionBase,
):
"""Initialize a frequency-severity model.
Args:
freq_dist: Distribution for frequency component.
sev_dist: Distribution for severity component.
"""
self.freq_dist = freq_dist
self.sev_dist = sev_dist
[docs]
def generate(
self,
n_sims: int | None = None,
rng: np.random.Generator | None = None,
) -> FreqSevSims:
"""Generate simulations from the Frequency-Severity model.
Parameters:
- n_sims (int): Number of simulations to generate. If None, uses the
default value from the config.
- rng (np.random.Generator, optional): Random number generator. Uses
config.rng if None.
Returns:
- FreqSevSims: Object containing the generated simulations.
"""
if n_sims is None:
n_sims = config.n_sims
if rng is None:
rng = config.rng
n_events = self.freq_dist.generate(n_sims, rng)
total_events = n_events.sum()
sev = self.sev_dist.generate(int(total_events), rng)
# Convert n_events to integers since _get_sims_of_events expects integer counts
# but n_events.values comes from distributions which return floating arrays
result = FreqSevSims(_get_sims_of_events(n_events.values.astype(np.int64)), sev.values, n_sims)
result.coupled_variable_group.merge(n_events.coupled_variable_group)
result.coupled_variable_group.merge(sev.coupled_variable_group)
return result
[docs]
class FreqSevSims(_ProteusStochasticVariable):
"""A class for storing and manipulating Frequency-Severity simulations.
FreqSevSims objects provide convenience methods for aggregating and
summarizing the simulations.
>>> sim_index = np.array([0, 0, 1, 1, 1, 2, 2, 2, 2])
>>> values = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9])
>>> n_sims = 3
>>> fs = FreqSevSims(sim_index, values, n_sims)
>>> fs.aggregate()
StochasticScalar([ 3., 12., 30.])
>>> fs.occurrence()
StochasticScalar([2., 5., 9.])
They can be operated on using standard mathematical operations, as well as
as numpy ufuncs and functions.
>>> fs + 1 # doctest: +ELLIPSIS
FreqSevSims(...)
>>> np.maximum(fs, 5) # doctest: +ELLIPSIS
FreqSevSims(...)
>>> np.where(fs > 5, 1, 0) # doctest: +ELLIPSIS
FreqSevSims(...)
FreqSevSims objects can be multiplied, added, subtracted, divided, and
compared with other FreqSevSims objects,
provided that the simulation indices match.
>>> fs1 = FreqSevSims(sim_index, values, n_sims)
>>> fs2 = FreqSevSims(sim_index, values, n_sims)
>>> fs1 + fs2 # doctest: +ELLIPSIS
FreqSevSims(...)
"""
n_sims: int
"""Number of simulations."""
[docs]
def __init__(
self,
sim_index: np.ndarray | list[int],
values: np.ndarray | list[float | int],
n_sims: int,
):
"""Create a new FreqSevSims object out the list of simulation indices.
Creates a FreqSevSims object from simulation indices and corresponding values.
Note, the simulation indices are assumed to be ordered and 0-indexed.
Parameters:
sim_index: simulation indices.
values: the values.
n_sims: Number of simulations.
Raises:
AssertionError: If lengths of values and sim_index don't match.
"""
super().__init__()
self.sim_index = xp.asarray(sim_index)
self.values = xp.asarray(values)
self.n_sims = n_sims # type: ignore
if len(self.sim_index) != len(self.values):
raise ValueError(
f"Length mismatch: sim_index has {len(self.sim_index)} elements "
f"but values has {len(self.values)} elements"
)
def __str__(self):
return "Simulation Index\n" + str(self.sim_index) + "\n Values\n" + str(self.values)
def _reorder_sims(self, new_order: t.Sequence[int]) -> None:
"""Reorder simulations according to the given order.
This method updates the simulation indices to match the new order.
Args:
new_order: A sequence of integers representing the new order of simulations.
"""
reverse_ordering = xp.empty(len(new_order), dtype=int)
reverse_ordering[new_order] = xp.arange(len(new_order), dtype=int)
self.sim_index = reverse_ordering[self.sim_index]
def __getitem__(self, sim_index: int) -> StochasticScalar:
"""Returns the values of the simulation with the given simulation index."""
# get the positions of the given simulation index
if isinstance(sim_index, int): # type: ignore[unecessary-check]
ints = np.where(self.sim_index == sim_index)
return StochasticScalar(self.values[ints])
else:
raise NotImplementedError
def __len__(self) -> int:
"""Return the number of simulations."""
return self.n_sims
def __iter__(self) -> t.Iterator[StochasticScalar]:
"""Iterate over the simulations."""
for i in range(self.n_sims):
yield self[i]
def _reduce_over_events(self, operation: ReductionOperation) -> StochasticScalar:
"""Apply a reduction operation over events for each simulation.
Groups events by simulation index and applies the specified operation
to combine values within each simulation.
Args:
operation: Numpy ufunc operation to apply (e.g., np.add.at, np.maximum.at)
Returns:
A StochasticScalar containing the reduced values for each simulation
Raises:
ValueError: If n_sims is not set
"""
_result = xp.zeros(self.n_sims)
operation(_result, self.sim_index, self.values)
result = StochasticScalar(_result)
result.coupled_variable_group.merge(self.coupled_variable_group)
return result
[docs]
def aggregate(self) -> StochasticScalar:
"""Calculates the aggregate loss for each simulation.
Sums all individual event losses within each simulation to get the total
loss per simulation. This converts event-level FreqSevSims data to
simulation-level StochasticScalar data suitable for statistical analysis.
Example:
>>> sim_index = np.array([0, 0, 1, 1, 1, 2, 2, 2, 2])
>>> values = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9])
>>> n_sims = 3
>>> fs = FreqSevSims(sim_index, values, n_sims)
>>> aggregate_losses: StochasticScalar = fs.aggregate()
>>> aggregate_losses
StochasticScalar([3., 12., 30.])
>>> # Now you can apply statistical methods
>>> aggregate_losses.mean()
15.0
Returns:
StochasticScalar: Array containing the aggregate loss for each simulation.
Use this for statistical analysis (mean, std, percentiles, etc.).
"""
return self._reduce_over_events(np.add.at)
[docs]
def occurrence(self) -> StochasticScalar:
"""Calculates the maximum occurrence loss for each simulation.
Finds the largest individual event loss within each simulation. This
converts event-level FreqSevSims data to simulation-level StochasticScalar
data suitable for statistical analysis.
Example:
>>> sim_index = np.array([0, 0, 1, 1, 1, 2, 2, 2, 2])
>>> values = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9])
>>> n_sims = 3
>>> fs = FreqSevSims(sim_index, values, n_sims)
>>> max_losses: StochasticScalar = fs.occurrence()
>>> max_losses
StochasticScalar([2., 5., 9.])
>>> # Now you can apply statistical methods
>>> max_losses.mean()
5.33
Returns:
StochasticScalar: Array containing the maximum occurrence loss for each
simulation. Use this for statistical analysis (mean, std,
percentiles, etc.).
"""
return self._reduce_over_events(np.maximum.at)
[docs]
def count(self) -> StochasticScalar:
"""Counts the number of losses (events) in each simulation.
Returns the frequency count of how many individual losses occurred
within each simulation. This is useful for analyzing frequency
distributions and understanding claim counts.
Example:
>>> sim_index = np.array([0, 0, 1, 1, 1, 2, 2, 2, 2])
>>> values = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9])
>>> n_sims = 3
>>> fs = FreqSevSims(sim_index, values, n_sims)
>>> loss_counts: StochasticScalar = fs.count()
>>> loss_counts
StochasticScalar([2., 3., 4.])
>>> # Now you can apply statistical methods
>>> loss_counts.mean()
3.0
>>> loss_counts.quantile(0.5)
3.0
Returns:
StochasticScalar: Array containing the number of losses for each
simulation. Use this for analyzing frequency distributions.
"""
counts = xp.bincount(self.sim_index, minlength=self.n_sims).astype(float)
result = StochasticScalar(counts)
result.coupled_variable_group.merge(self.coupled_variable_group)
return result
[docs]
def deep_copy(self) -> FreqSevSims:
"""Creates a deep copy of the FreqSevSims object."""
return FreqSevSims(self.sim_index, self.values.copy(), self.n_sims)
[docs]
def copy(self) -> FreqSevSims:
"""Creates a copy of the FreqSevSims object."""
result = FreqSevSims(self.sim_index, self.values.copy(), self.n_sims)
result.coupled_variable_group.merge(self.coupled_variable_group)
return result
[docs]
def apply(self, func: ArrayTransform) -> FreqSevSims:
"""Applies a function to the values of the FreqSevSims object."""
result = FreqSevSims(self.sim_index, func(self.values), self.n_sims)
result.coupled_variable_group.merge(self.coupled_variable_group)
return result
def _extract_array_for_ufunc(self, x: t.Any) -> npt.NDArray[np.floating]:
"""Extract array values from various input types for ufunc operations.
Args:
x: Input value that could be FreqSevSims, StochasticScalar, ndarray,
or scalar
Returns:
Array values aligned with simulation indices
"""
if isinstance(x, FreqSevSims):
return x.values
elif isinstance(x, StochasticScalar):
return x.values[self.sim_index]
elif isinstance(x, np.ndarray):
# Type ignore: Pyright can't infer the exact dtype of indexed arrays
return x[self.sim_index] # type: ignore[misc]
else:
# Scalar value - return as-is
return x
def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs: t.Any, **kwargs: t.Any) -> FreqSevSims:
_inputs = tuple(self._extract_array_for_ufunc(x) for x in inputs)
out = kwargs.get("out", ())
if out:
kwargs["out"] = tuple(x.values for x in out)
result = getattr(ufunc, method)(*_inputs, **kwargs)
result = FreqSevSims(self.sim_index, result, self.n_sims)
for input in inputs:
if isinstance(input, _ProteusStochasticVariable):
input.coupled_variable_group.merge(self.coupled_variable_group)
result.coupled_variable_group.merge(self.coupled_variable_group)
return result
def __array_function__(
self, func: Callable[..., t.Any], _: t.Any, args: t.Any, kwargs: t.Any
) -> np.number[t.Any] | FreqSevSims:
"""Handle numpy array functions for FreqSevSims objects.
Args:
func: The numpy function being called
types: Types involved in the operation
args: Arguments passed to the function
kwargs: Keyword arguments passed to the function
Returns:
Either a scalar result or new FreqSevSims object
Raises:
NotImplementedError: If the function is not supported
"""
if func not in (
np.where,
np.sum,
np.array_equal,
np.minimum,
np.maximum,
np.mean,
np.round,
):
raise NotImplementedError(f"Function {func.__name__} not supported")
# Special handling for mean - aggregate first, then take mean
if func is np.mean and len(args) == 1 and args[0] is self:
return func(self.aggregate(), **kwargs)
# Extract values from FreqSevSims objects, leave others as-is
processed_args = tuple(x.values if isinstance(x, FreqSevSims) else x for x in args)
result = func(*processed_args, **kwargs)
# If result is a scalar, return it directly
# Type ignore: Pyright can't infer the exact numpy scalar type
if isinstance(result, (np.number, np.bool_, bool)) or np.isscalar(result):
return result # type: ignore[misc]
# Otherwise create a new FreqSevSims object with the result
new_freq_sev = FreqSevSims(self.sim_index, result, self.n_sims)
new_freq_sev.coupled_variable_group.merge(self.coupled_variable_group)
return new_freq_sev
def __repr__(self):
return f"{type(self).__name__}({self.values!r})"
def _is_compatible(self, other: ProteusCompatibleTypes) -> bool:
"""Check if two FreqSevSims objects are compatible for mathematical operations.
Args:
other: Another FreqSevSims object or compatible type
Returns:
True if compatible, False otherwise
"""
return isinstance(other, FreqSevSims) and self.sim_index is other.sim_index
[docs]
def upsample(self, n_sims: int) -> FreqSevSims:
"""Upsamples the FreqSevSims object to the given number of simulations.
Args:
n_sims: Target number of simulations
Returns:
New FreqSevSims object with upsampled data
Raises:
ValueError: If self.n_sims is None
"""
if n_sims == self.n_sims:
return self.copy()
sim_index = np.repeat(self.sim_index, n_sims // self.n_sims)
values = np.repeat(self.values, n_sims // self.n_sims)
if n_sims % self.n_sims > 0:
sim_index = np.concatenate((sim_index, self.sim_index[self.sim_index < n_sims % self.n_sims]))
values = np.concatenate((values, self.values[self.sim_index < n_sims % self.n_sims]))
sim_index = sim_index + xp.arange(len(sim_index)) % n_sims
return FreqSevSims(sim_index, values, n_sims)