Source code for pal.types

"""Type definitions and protocols for the PAL library.

Defines common type aliases, protocols, and configuration classes used
throughout the library for type safety and consistency.
"""

# standard library
from __future__ import annotations

import dataclasses
import itertools
import typing as t

# third party
import numpy.typing as npt

# project
from ._compat import Self
from ._maths import xp as np

__all__ = [
    "ArithmeticProtocol",
    "Config",
    "DistributionLike",
    "DistributionParameter",
    "Numeric",
    "NumericLike",
    "NumericProtocol",
    "ScipyNumeric",
    "VectorLike",
]

Numeric = t.Union[float, int, np.number]

# Type alias for scipy special functions and numpy random generators
# These functions expect more restrictive types than our general Numeric type.
# They don't accept complex numbers, _NumericProtocol objects, or general
# np.number types.
ScipyNumeric = t.Union[float, int, np.floating, np.integer]

# Type alias for distribution parameters - clean type for documentation
if t.TYPE_CHECKING:
    from .stochastic_scalar import StochasticScalar

DistributionParameter = t.Union[int, float, "StochasticScalar"]

T_value = t.TypeVar("T_value")
T_co = t.TypeVar("T_co", covariant=True)


[docs] @dataclasses.dataclass class Config: """Configuration class for PAL.""" n_sims: int = 10000 seed: int = 123456789 rng: np.random.Generator = np.random.default_rng(seed) _uid_counter = itertools.count(1)
[docs] @t.runtime_checkable class ArithmeticProtocol(t.Protocol): """Base protocol for objects that support arithmetic operations. Defines common arithmetic operations shared by both scalar and vector types. """ # Arithmetic operations def __add__(self, other: t.Any) -> Self: ... def __radd__(self, other: t.Any) -> Self: ... def __sub__(self, other: t.Any) -> Self: ... def __rsub__(self, other: t.Any) -> Self: ... def __mul__(self, other: t.Any) -> Self: ... def __rmul__(self, other: t.Any) -> Self: ... def __truediv__(self, other: t.Any) -> Self: ... def __rtruediv__(self, other: t.Any) -> Self: ... def __pow__(self, other: t.Any) -> Self: ... def __rpow__(self, other: t.Any) -> Self: ... def __neg__(self) -> t.Any: ...
[docs] @t.runtime_checkable class NumericProtocol(ArithmeticProtocol, t.Protocol): """Protocol for scalar-like objects that support numeric operations. Comparison operations return bool (scalar semantics). """ # Comparison operations (scalar semantics - return bool) def __lt__(self, other: t.Any) -> bool: ... def __le__(self, other: t.Any) -> bool: ... def __gt__(self, other: t.Any) -> bool: ... def __ge__(self, other: t.Any) -> bool: ... # Equality operations (scalar semantics - return bool) def __eq__(self, other: t.Any) -> bool: ... def __ne__(self, other: t.Any) -> bool: ...
@t.runtime_checkable class SupportsArray(t.Protocol): """Protocol for objects that support numpy array operations. This protocol defines the complete interface for objects that can interact with numpy, including conversion to arrays and handling numpy operations. It encompasses all three numpy protocol methods for full numpy integration. Array Protocol Methods in PAL: ----------------------------- **__array__ method:** - **Purpose**: Converts an object to a numpy array for basic array operations - **When called**: When np.asarray(obj) or similar conversion functions are used - **Return type**: Always returns a numpy array (npt.NDArray) - **Usage**: Simple array conversion, enables basic numpy compatibility **__array_function__ method:** - **Purpose**: Handles numpy function dispatch - intercepts numpy function calls - **When called**: When numpy functions like np.sum(), np.mean(), etc. are called - **Return type**: Can return any type (scalars, arrays, custom objects) - **Usage**: Custom behavior for numpy functions **__array_ufunc__ method:** - **Purpose**: Handles numpy universal functions (element-wise operations) - **When called**: When ufuncs like np.exp(), np.add() are used, or operators +, - - **Return type**: Typically returns same type as input (Self) for type preservation - **Usage**: Preserves custom types through element-wise operations The key distinctions: - __array__ is for conversion to numpy arrays - __array_function__ is for array functions (np.sum, np.mean, etc.) - __array_ufunc__ is for element-wise ufuncs (np.exp, np.add, +, -, etc.) """ def __array__(self, dtype: t.Any = None) -> npt.NDArray[t.Any]: """Convert to numpy array for compatibility with numpy functions.""" ... def __array_function__( self, func: t.Any, _: tuple[type, ...], args: tuple[t.Any, ...], kwargs: dict[str, t.Any], ) -> t.Any: """Handle numpy function dispatch to preserve object semantics.""" ... def __array_ufunc__( self, ufunc: t.Any, method: t.Literal["__call__", "reduce", "reduceat", "accumulate", "outer", "at"], *inputs: t.Any, **kwargs: t.Any, ) -> t.Any: """Handle numpy ufuncs to preserve type through element-wise operations.""" ... @t.runtime_checkable class VectorOperations(ArithmeticProtocol, t.Protocol): """Protocol for objects with vector-like operation semantics. This protocol defines vector-style operations where: - Comparison operations return Self (element-wise) - Support for len() to get the vector size - No requirement for numpy integration This is the base for both VectorLikeProtocol (which adds numpy support) and ProteusLike (container protocol without numpy support). """ # Comparison operations (vector semantics - return Self) def __lt__(self, other: t.Any) -> Self: ... def __le__(self, other: t.Any) -> Self: ... def __gt__(self, other: t.Any) -> Self: ... def __ge__(self, other: t.Any) -> Self: ... # Equality operations (vector semantics - return Self) # Note: These override object.__eq__ and object.__ne__ which return bool, # but vector types need to return Self for element-wise comparisons def __eq__(self, other: t.Any) -> Self: ... # type: ignore[override] def __ne__(self, other: t.Any) -> Self: ... # type: ignore[override] # Length support def __len__(self) -> int: ...
[docs] @t.runtime_checkable class VectorLike(VectorOperations, SupportsArray, t.Protocol[T_co]): """Protocol for vector-like objects that support array conversion. This protocol combines VectorOperations (vector-style arithmetic and comparisons) with SupportsArray (numpy array conversion). Use this for objects that are true vector-like types that can be converted to numpy arrays. Comparison operations return Self (vectorized semantics). Why VectorLikeProtocol vs numpy.ArrayLike? -------------------------------------------- These solve fundamentally different problems: **numpy.ArrayLike**: "What can become an array?" - Purpose: Defines what inputs numpy functions will accept and convert to arrays - It's about data conversion compatibility - Just a Union of types that numpy knows how to convert (lists, objects with __array__, etc.) - Example: np.sum([1, 2, 3]) works because list is ArrayLike **VectorLikeProtocol**: "How do math operations behave?" - Purpose: Defines the behavioral contract for mathematical operations - It's about operation semantics and type preservation - Ensures that operations return the same type (Self), not just any array - Example: StochasticScalar([1, 2, 3]) + 5 returns StochasticScalar, not ndarray Key Differences: ---------------- 1. **Type Preservation**: VectorLikeProtocol ensures operations maintain the original type (e.g., StochasticScalar + StochasticScalar = StochasticScalar), while ArrayLike would lose this information (becoming ndarray). 2. **Comparison Semantics**: VectorLikeProtocol defines that comparisons return vectorized results (Self) for element-wise operations, not scalar bool values. Example: StochasticScalar([1, 2, 3]) > 2 returns StochasticScalar([False, False, True]) 3. **Operation Contracts**: VectorLikeProtocol defines how mathematical operations should behave for custom types, while ArrayLike only cares about convertibility. Why Both Are Needed: -------------------- Classes implementing VectorLikeProtocol should also be ArrayLike-compatible by implementing __array__() for numpy interoperability. This gives: - Type preservation through operations (VectorLikeProtocol benefit) - Numpy function compatibility (ArrayLike benefit) - Clear semantic distinction between scalar and vector operations Example Implementation: ---------------------- class StochasticScalar: # VectorLikeProtocol: defines operation behavior def __add__(self, other) -> Self: ... # Returns StochasticScalar def __gt__(self, other) -> Self: ... # Returns StochasticScalar # ArrayLike compatibility: allows numpy function usage def __array__(self) -> np.ndarray: ... # Enables np.sum(stochastic_scalar) Numpy Compatibility: ------------------- VectorLikeProtocol includes numpy compatibility methods: - __array__(): Enables conversion to numpy array for use with numpy functions - __len__(): Required for many numpy operations - __array_ufunc__(): (inherited from ArrayUfuncCapable) Enables proper handling of numpy universal functions while preserving type These methods ensure VectorLike objects can seamlessly integrate with numpy's ecosystem while maintaining their custom type semantics. """
# All methods are inherited from: # - VectorOperations: comparison ops, __len__, __array_ufunc__ # - SupportsArray: __array__ # - ArithmeticProtocol (via VectorOperations): arithmetic ops # Union type that includes both the basic numeric types and objects implementing # the scalar protocol (comparison operations return bool) NumericLike = t.Union[Numeric, NumericProtocol] # FIXME: VectorLike should be generic VectorLike[T] to enable proper typing # of math functions like sum(VectorLike[T]) -> T. This would allow: # - sum(StochasticScalar) -> float # - sum(ProteusVariable) -> StochasticScalar (when containing StochasticScalar) # Currently blocked by need to refactor all VectorLike usage sites. @t.runtime_checkable class SequenceLike(t.Protocol[T_value]): """Protocol for sequence-like objects.""" def __len__(self) -> int: ... def __getitem__(self, index: int) -> T_value: ... def __iter__(self) -> t.Iterator[T_value]: ... def __contains__(self, value: object) -> bool: ... def __reversed__(self) -> t.Iterator[T_value]: ... def count(self, value: T_value) -> int: ... def index(self, value: T_value, start: int = 0, stop: int | None = None) -> int: ... @t.runtime_checkable class ProteusLike(VectorOperations, SequenceLike[T_value], t.Protocol): """Generic protocol for multi-dimensional stochastic variable containers. ProteusLike is a generic protocol that is covariant with respect to the type of values it contains. This ensures type safety and predictability: - ProteusLike[NumericLike] contains scalar values, operations return scalars - ProteusLike[VectorLike] contains vector values, operations return vectors - ProteusLike[StochasticScalar] contains stochastic scalars, preserves type Key Characteristics: -------------------- 1. **Type Preservation**: The type parameter T_value determines what type operations like mean() return. If you store scalars, you get scalars back. If you store vectors, you get vectors back. 2. **Container Nature**: ProteusLike objects are containers that hold multiple stochastic variables or scalars, indexed by dimension names. 3. **Mathematical Operations**: They support vectorized arithmetic operations that preserve the container type (ProteusLike[T] + ProteusLike[T] = ProteusLike[T]). 4. **Iteration**: When iterated, they yield values of type T_value. 5. **Nesting Support**: ProteusLike[ProteusLike[T]] enables hierarchical structures for multi-dimensional risk modeling. Usage Examples: -------------- ```python # Type hints show the covariance def analyze_scalars(var: ProteusLike[NumericLike]) -> NumericLike: return var.mean() # Returns NumericLike def analyze_vectors(var: ProteusLike[VectorLike]) -> VectorLike: return var.mean() # Returns VectorLike def combine_risks( var1: ProteusLike[T_value], var2: ProteusLike[T_value] ) -> ProteusLike[T_value]: return var1 + var2 # Type preserved # Nested structures def process_nested( var: ProteusLike[ProteusLike[StochasticScalar]] ) -> ProteusLike[StochasticScalar]: # Work with hierarchical risk structures return var["region"]["peril"] ``` Implementation Note: ------------------- Classes should NOT inherit from this protocol. Instead, they should implement the required methods and attributes. The protocol is used purely for static type checking: ```python class ProteusVariable: # Note: NO inheritance from ProteusLike def __init__(self, dim_name: str, values: dict[str, T]): self.dim_name = dim_name self.values = values self.n_sims = self._calculate_n_sims() ``` """ n_sims: int | None values: t.Mapping[str, T_value] def __getitem__(self, key: int | str) -> T_value: """Override SequenceLike.__getitem__ to support both int and str indexing. ProteusLike containers support both: - Integer indexing: Access by position in iteration order - String indexing: Access by dimension name (e.g., obj["region"]) This override is necessary because SequenceLike protocol only supports int indexing, but ProteusLike containers are mapping-like and need string key access for dimension names. """ ... def upsample(self, n_sims: int) -> ProteusLike[t.Any]: """Upsample the variable to match the specified number of simulations. Args: n_sims: The number of simulations to upsample to. Returns: A new instance of self with the upsampled values. """ ... T_distribution = t.TypeVar("T_distribution", bound="ScipyNumeric | npt.NDArray[np.floating]")
[docs] class DistributionLike(t.Protocol[T_distribution]): """Generic protocol for distribution-like objects. DistributionLike is generic over the type of values it operates on, ensuring type consistency between inputs and outputs for mathematical operations. Type Parameter: T_distribution: The type of values the distribution operates on, bounded to ScipyNumeric (float | int | np.floating | np.integer) or npt.NDArray[np.floating] for vectorized operations. Key Properties: --------------- 1. **Input-Output Consistency**: cdf() and invcdf() preserve input types - DistributionLike[float].cdf(x: float) -> float - DistributionLike[NDArray].cdf(x: NDArray) -> NDArray 2. **Scalar and Vector Support**: Same distribution can work with both: - Scalar inputs: Individual probability calculations - Array inputs: Vectorized calculations across multiple values 3. **Type Safety**: Generic parameter prevents mixing incompatible types and ensures mathematical operations maintain type consistency. Usage Examples: -------------- ```python # Scalar distribution operations def eval_at_point(dist: DistributionLike[float], value: float) -> float: return dist.cdf(value) # Returns float # Vectorized distribution operations def eval_array(dist: DistributionLike[NDArray], values: NDArray) -> NDArray: return dist.cdf(values) # Returns NDArray # Generate always returns VectorLike (StochasticScalar-like object) def sample_distribution(dist: DistributionLike[Any]) -> VectorLike: return dist.generate(n_sims=1000) ``` """
[docs] def cdf(self, x: T_distribution) -> T_distribution: """Compute cumulative distribution function.""" ...
[docs] def invcdf(self, u: T_distribution) -> T_distribution: """Compute inverse cumulative distribution function.""" ...
[docs] def generate( self, n_sims: int | None = None, rng: np.random.Generator | None = None ) -> ProteusLike[T_distribution]: """Generate random samples from the distribution. Parameters: n_sims (int, optional): Number of simulations. Uses config.n_sims if None. rng (np.random.Generator, optional): Random number generator. Returns: Generated samples. """ ...