quant-sci
diff --git a/‎pyproject.toml‎
Lines changed: 2 additions & 0 deletions b/‎pyproject.toml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/dynaris/estimation/__init__.py‎
Lines changed: 19 additions & 0 deletions b/‎src/dynaris/estimation/__init__.py‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎src/dynaris/estimation/diagnostics.py‎
Lines changed: 155 additions & 0 deletions b/‎src/dynaris/estimation/diagnostics.py‎
Lines changed: 155 additions & 0 deletions
diff --git a/‎src/dynaris/estimation/em.py‎
Lines changed: 145 additions & 0 deletions b/‎src/dynaris/estimation/em.py‎
Lines changed: 145 additions & 0 deletions
@@ -23,6 +23,7 @@ dependencies = [
     "jaxlib>=0.4.35",
     "numpy>=1.26",
     "pandas>=2.2",
+    "scipy>=1.12",
 ]
 
 [project.urls]
@@ -78,6 +79,7 @@ mypy_path = "src"
 module = [
     "jax.*",
     "jaxlib.*",
+    "scipy.*",
 ]
 ignore_missing_imports = true
 
 
@@ -0,0 +1,19 @@
+"""Parameter estimation: MLE, EM, and model diagnostics."""
+
+from dynaris.estimation.diagnostics import acf, ljung_box, pacf, standardized_residuals
+from dynaris.estimation.em import EMResult, fit_em
+from dynaris.estimation.mle import MLEResult, fit_mle
+from dynaris.estimation.transforms import inverse_softplus, softplus
+
+__all__ = [
+    "EMResult",
+    "MLEResult",
+    "acf",
+    "fit_em",
+    "fit_mle",
+    "inverse_softplus",
+    "ljung_box",
+    "pacf",
+    "softplus",
+    "standardized_residuals",
+]
@@ -0,0 +1,155 @@
+"""Model diagnostics for fitted state-space models."""
+
+from __future__ import annotations
+
+import jax.numpy as jnp
+from jax import Array
+from scipy import stats
+
+from dynaris.core.results import FilterResult
+from dynaris.core.state_space import StateSpaceModel
+
+
+def standardized_residuals(
+    filter_result: FilterResult,
+    model: StateSpaceModel,
+) -> Array:
+    """Compute standardized (one-step-ahead) prediction residuals.
+
+    e_t = (y_t - H @ x_{t|t-1}) / sqrt(H @ P_{t|t-1} @ H^T + R)
+
+    Returns:
+        Standardized residuals, shape (T,) for univariate or (T, obs_dim).
+    """
+    obs = filter_result.observations  # (T, m)
+    pred_states = filter_result.predicted_states  # (T, n)
+    pred_covs = filter_result.predicted_covariances  # (T, n, n)
+
+    # Innovation: y_t - H @ x_{t|t-1}
+    innovations = obs - pred_states @ model.H.T  # (T, m)
+
+    # Innovation covariance: H @ P_{t|t-1} @ H^T + R
+    # Shape: (T, m, m)
+    innovation_covs = jnp.einsum(
+        "ij,tjk,lk->til", model.H, pred_covs, model.H
+    ) + model.R[None, :, :]
+
+    # For univariate case, standardize directly
+    # For multivariate, use diagonal elements
+    std_devs = jnp.sqrt(
+        jnp.diagonal(innovation_covs, axis1=-2, axis2=-1)
+    )  # (T, m)
+
+    std_resids = innovations / std_devs
+
+    # Squeeze if univariate
+    if std_resids.shape[-1] == 1:
+        return std_resids[:, 0]
+    return std_resids
+
+
+def acf(x: Array, n_lags: int = 20) -> Array:
+    """Compute the sample autocorrelation function.
+
+    Args:
+        x: 1D array of residuals, shape (T,).
+        n_lags: Number of lags to compute.
+
+    Returns:
+        Autocorrelations at lags 0, 1, ..., n_lags. Shape (n_lags + 1,).
+    """
+    x = jnp.asarray(x).ravel()
+    n = x.shape[0]
+    x_centered = x - jnp.mean(x)
+    var = jnp.sum(x_centered**2) / n
+
+    lags = jnp.arange(n_lags + 1)
+
+    def _acf_at_lag(lag: Array) -> Array:
+        # For lag 0, return 1.0
+        shifted = jnp.roll(x_centered, lag)
+        # Zero out the rolled-in values
+        mask = jnp.arange(n) >= lag
+        cov = jnp.sum(x_centered * shifted * mask) / n
+        return jnp.where(lag == 0, 1.0, cov / var)
+
+    return jnp.vectorize(_acf_at_lag)(lags)  # type: ignore[no-any-return]
+
+
+def pacf(x: Array, n_lags: int = 20) -> Array:
+    """Compute the sample partial autocorrelation function via Durbin-Levinson.
+
+    Args:
+        x: 1D array of residuals, shape (T,).
+        n_lags: Number of lags to compute.
+
+    Returns:
+        Partial autocorrelations at lags 0, 1, ..., n_lags.
+        Shape (n_lags + 1,). PACF at lag 0 is 1.0.
+    """
+    acf_vals = acf(x, n_lags)
+
+    result = [1.0]  # lag 0
+
+    # Durbin-Levinson algorithm
+    phi = float(acf_vals[1])
+    result.append(phi)
+
+    phi_prev = [phi]
+
+    for k in range(2, n_lags + 1):
+        # phi_k,k = (r(k) - sum_{j=1}^{k-1} phi_{k-1,j} * r(k-j))
+        #           / (1 - sum_{j=1}^{k-1} phi_{k-1,j} * r(j))
+        numer = float(acf_vals[k])
+        denom = 1.0
+        for j in range(len(phi_prev)):
+            numer -= phi_prev[j] * float(acf_vals[k - j - 1])
+            denom -= phi_prev[j] * float(acf_vals[j + 1])
+
+        if abs(denom) < 1e-12:
+            result.append(0.0)
+            phi_prev = [0.0] * k
+            continue
+
+        phi_kk = numer / denom
+        result.append(phi_kk)
+
+        # Update phi coefficients
+        new_phi = []
+        for j in range(len(phi_prev)):
+            new_phi.append(phi_prev[j] - phi_kk * phi_prev[-(j + 1)])
+        new_phi.append(phi_kk)
+        phi_prev = new_phi
+
+    return jnp.array(result)
+
+
+def ljung_box(
+    residuals: Array, n_lags: int = 10
+) -> tuple[float, float]:
+    """Ljung-Box test for autocorrelation in residuals.
+
+    Tests H0: the residuals are independently distributed (no autocorrelation).
+
+    Args:
+        residuals: 1D array of (standardized) residuals, shape (T,).
+        n_lags: Number of lags to include in the test.
+
+    Returns:
+        Tuple of (test_statistic, p_value).
+    """
+    residuals = jnp.asarray(residuals).ravel()
+    n = residuals.shape[0]
+    acf_vals = acf(residuals, n_lags)
+
+    # Q = n(n+2) * sum_{k=1}^{h} r_k^2 / (n-k)
+    q_stat = 0.0
+    for k in range(1, n_lags + 1):
+        rk = float(acf_vals[k])
+        q_stat += rk**2 / (n - k)
+    q_stat *= float(n * (n + 2))
+
+    # Under H0, Q ~ chi-squared(n_lags)
+    p_value = float(1.0 - stats.chi2.cdf(q_stat, df=n_lags))
+
+    return q_stat, p_value
@@ -0,0 +1,145 @@
+"""EM algorithm for variance estimation in linear-Gaussian SSMs."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+import jax.numpy as jnp
+from jax import Array
+
+from dynaris.core.results import SmootherResult
+from dynaris.core.state_space import StateSpaceModel
+from dynaris.filters.kalman import kalman_filter
+from dynaris.smoothers.rts import rts_smooth
+
+
+@dataclass(frozen=True)
+class EMResult:
+    """Result of EM estimation.
+
+    Attributes:
+        model: Fitted StateSpaceModel at convergence.
+        log_likelihood: Final log-likelihood value.
+        n_iterations: Number of EM iterations performed.
+        converged: Whether the algorithm converged.
+        log_likelihood_history: Log-likelihood at each iteration.
+    """
+
+    model: StateSpaceModel
+    log_likelihood: float
+    n_iterations: int
+    converged: bool
+    log_likelihood_history: list[float]
+
+
+def _e_step(
+    model: StateSpaceModel, observations: Array
+) -> tuple[SmootherResult, float]:
+    """E-step: run Kalman filter + RTS smoother."""
+    fr = kalman_filter(model, observations)
+    sr = rts_smooth(model, fr)
+    return sr, float(fr.log_likelihood)
+
+
+def _m_step(
+    sr: SmootherResult, model: StateSpaceModel
+) -> StateSpaceModel:
+    """M-step: update Q and R from smoothed sufficient statistics.
+
+    For a general linear-Gaussian SSM:
+        Q_new = (1/T) * sum_t [P_{t|T} + x_{t|T} x_{t|T}^T
+                                - (P_{t,t-1|T} + x_{t|T} x_{t-1|T}^T) F^T
+                                - F (P_{t,t-1|T} + x_{t|T} x_{t-1|T}^T)^T
+                                + F (P_{t-1|T} + x_{t-1|T} x_{t-1|T}^T) F^T]
+
+        R_new = (1/T) * sum_t [(y_t - H x_{t|T})(y_t - H x_{t|T})^T
+                                + H P_{t|T} H^T]
+
+    We use a simplified version that directly estimates the
+    diagonal variances, which is standard for DLM applications.
+    """
+    obs = sr.observations  # (T, m)
+    x_smooth = sr.smoothed_states  # (T, n)
+    p_smooth = sr.smoothed_covariances  # (T, n, n)
+    n_time = obs.shape[0]
+
+    # --- Estimate R (observation noise covariance) ---
+    # residual_t = y_t - H @ x_{t|T}
+    residuals = obs - (x_smooth @ model.H.T)  # (T, m)
+    # R = (1/T) * sum_t [r_t r_t^T + H P_{t|T} H^T]
+    outer_sum = jnp.einsum("ti,tj->ij", residuals, residuals)  # (m, m)
+    hp_ht_sum = jnp.sum(model.H @ p_smooth @ model.H.T, axis=0)  # sum over T -> (m, m)
+    new_r = (outer_sum + hp_ht_sum) / n_time
+
+    # --- Estimate Q (state noise covariance) ---
+    # Using: Q = (1/T) sum_t [P_{t|T} + (x_t - F x_{t-1})(x_t - F x_{t-1})^T
+    #                          - F P_{t-1,t|T}^T - P_{t-1,t|T} F^T + F P_{t-1|T} F^T]
+    # Simplified: approximate cross-covariance P_{t,t-1|T} via smoother gain
+    # For practical DLM usage, we use:
+    #   state_resid_t = x_{t|T} - F @ x_{t-1|T}
+    #   Q ~ (1/(T-1)) sum_t [state_resid_t state_resid_t^T + P_{t|T} + F P_{t-1|T} F^T]
+    # But a cleaner standard approach for the diagonal case:
+    x_pred = (x_smooth[:-1] @ model.F.T)  # F @ x_{t-1|T}, shape (T-1, n)
+    state_resids = x_smooth[1:] - x_pred  # (T-1, n)
+    outer_q = jnp.einsum("ti,tj->ij", state_resids, state_resids)  # (n, n)
+    # Add smoothed covariance terms
+    p_curr = jnp.sum(p_smooth[1:], axis=0)  # sum P_{t|T} for t=1..T-1
+    fp_ft = jnp.sum(
+        model.F @ p_smooth[:-1] @ model.F.T, axis=0
+    )  # sum F P_{t-1|T} F^T
+    new_q = (outer_q + p_curr + fp_ft) / (n_time - 1)
+    # Ensure symmetry
+    new_q = (new_q + new_q.T) / 2.0
+    new_r = (new_r + new_r.T) / 2.0
+
+    return StateSpaceModel(
+        transition_matrix=model.transition_matrix,
+        observation_matrix=model.observation_matrix,
+        state_noise_cov=new_q,
+        obs_noise_cov=new_r,
+        input_matrix=model.input_matrix,
+    )
+
+
+def fit_em(
+    observations: Array,
+    initial_model: StateSpaceModel,
+    max_iter: int = 100,
+    tol: float = 1e-6,
+) -> EMResult:
+    """Fit a state-space model via the EM algorithm.
+
+    Iteratively updates Q (state noise) and R (observation noise)
+    covariance matrices while keeping F, H, and B fixed.
+
+    Args:
+        observations: Observation sequence, shape (T, obs_dim).
+        initial_model: Starting model with initial variance guesses.
+        max_iter: Maximum number of EM iterations.
+        tol: Convergence tolerance on log-likelihood change.
+
+    Returns:
+        EMResult with the fitted model and convergence details.
+    """
+    observations = jnp.asarray(observations)
+    model = initial_model
+    ll_history: list[float] = []
+    converged = False
+
+    for i in range(max_iter):
+        sr, ll = _e_step(model, observations)
+        ll_history.append(ll)
+
+        if i > 0 and abs(ll - ll_history[-2]) < tol:
+            converged = True
+            break
+
+        model = _m_step(sr, model)
+
+    return EMResult(
+        model=model,
+        log_likelihood=ll_history[-1] if ll_history else float("-inf"),
+        n_iterations=len(ll_history),
+        converged=converged,
+        log_likelihood_history=ll_history,
+    )
Original file line number	Diff line number	Diff line change
`@@ -23,6 +23,7 @@ dependencies = [`
`23`	`23`	`"jaxlib>=0.4.35",`
`24`	`24`	`"numpy>=1.26",`
`25`	`25`	`"pandas>=2.2",`
	`26`	`+ "scipy>=1.12",`
`26`	`27`	`]`
`27`	`28`
`28`	`29`	`[project.urls]`
`@@ -78,6 +79,7 @@ mypy_path = "src"`
`78`	`79`	`module = [`
`79`	`80`	`"jax.*",`
`80`	`81`	`"jaxlib.*",`
	`82`	`+ "scipy.*",`
`81`	`83`	`]`
`82`	`84`	`ignore_missing_imports = true`
`83`	`85`