mavats.alphaPCA

  1import math
  2import warnings
  3from typing import Tuple, Union
  4
  5import numpy as np
  6from numpy import linalg as LA
  7from scipy.linalg import eigh
  8
  9from mavats.factormodel import _estimate_k
 10
 11
 12def estimate_alpha_PCA(
 13    Y: np.ndarray,
 14    alpha: Union[float, int],
 15    k: Union[int, None] = None,
 16    r: Union[int, None] = None,
 17):
 18    r"""
 19    Estimates the $\alpha$-PCA model in Chen and Fan 2021 (https://doi.org/10.1080/01621459.2021.1970569)
 20    where $Y_t = R F_t C^T + E_t$
 21
 22    Parameters
 23    ----------
 24    Y : (T, p, q) ndarray
 25        The observed data.
 26    alpha : float
 27        The hyperparameter $\alpha \geq -1$ that aims to balance information between first and second moments.
 28    k : int, optional
 29        Rank of the front loading matrix. If None, the rank is estimated.
 30    r : int, optional
 31        Rank of the back loading matrix. If None, the rank is estimated.
 32
 33    Returns
 34    -------
 35    F : (T, k, r) ndarray
 36        The estimated factor matrix.
 37    S : (T, p, q) ndarray
 38        The estimated signal $S_t = R F_t C^T$.
 39    R : (p, k) ndarray
 40        The estimated front loading matrix.
 41    C : (q, r) ndarray
 42        The estimated back loading matrix.
 43
 44    """
 45    if not (alpha >= -1):
 46        raise ValueError("alpha must be greater than or equal to -1")
 47    T, p, q = Y.shape
 48    pqT = p * q * T
 49    pq = p * q
 50    Y_tilde = _compute_Y_tilde(Y, alpha)
 51    M_R = np.einsum("tij,tkj->ik", Y_tilde, Y_tilde) / pqT
 52    M_C = np.einsum("tji,tjk->ik", Y_tilde, Y_tilde) / pqT
 53    R = _compute_loading(M_R, k)
 54    R *= math.sqrt(p)
 55    C = _compute_loading(M_C, r)
 56    C *= math.sqrt(q)
 57    F = R.T @ Y @ C / pq
 58    S = R @ F @ C.T / pq
 59    return F, S, R, C
 60
 61
 62def _compute_Y_tilde(Y, alpha):
 63    alpha_tilde = math.sqrt(alpha + 1) - 1
 64    Y_bar = np.mean(Y, axis=0)
 65    Y_tilde = Y + alpha_tilde * Y_bar
 66    return Y_tilde
 67
 68
 69def _compute_loading(M: np.ndarray, k: Union[int, None]) -> np.ndarray:
 70    if k is None:
 71        w, v = eigh(M)
 72        k = _estimate_k(w)
 73        w, v = w[-k:], v[:, -k:]
 74    else:
 75        w, v = eigh(M, subset_by_index=(M.shape[0] - k, M.shape[0] - 1))
 76    return v
 77
 78
 79def estimate_cov_Ri(
 80    Y: np.ndarray,
 81    F: np.ndarray,
 82    R: np.ndarray,
 83    C: np.ndarray,
 84    i: int,
 85    alpha: Union[float, int],
 86    m: int,
 87) -> np.ndarray:
 88    r"""
 89    Estimates the covariance matrix of the $i$-th row (0 indexed) of $\hat{R}$.
 90
 91    Parameters
 92    ----------
 93    Y : (T, p, q) ndarray
 94        The observed data.
 95    F : (T, k, r) ndarray
 96        The estimated factors.
 97    R : (p, k) ndarray
 98        The estimated front loading matrix.
 99    C : (q, r) ndarray
100        The estimated back loading matrix.
101    i : int
102        The row index.
103    alpha : float
104        The hyperparameter $\alpha \geq -1$. Should be the same as the one used in `estimate_alpha_PCA`.
105    m : int
106        A tuning parameter. Theoretically, it satisfies $m \rightarrow \infty$ and $m/(qT)^{\frac{1}{4}} \rightarrow 0$.
107
108    Returns
109    -------
110    Sig_Ri : (k, k) ndarray
111        The estimated covariance matrix of the $i$-th row of $\hat{R}$.
112    """
113    if not (alpha >= -1):
114        raise ValueError("alpha must be greater than or equal to -1")
115    T, p, q = Y.shape
116    if m >= T:
117        warnings.warn("m is greater than T, unexpected results may occur.")
118    _, k, r = F.shape
119    E = Y - R @ F @ C.T
120    Y_tilde = _compute_Y_tilde(Y, alpha)
121    YYT = (Y_tilde @ Y_tilde.transpose(0, 2, 1)).sum(axis=0) / (p * q * T)
122    V, _ = eigh(YYT, subset_by_index=(p - k, p - 1))
123    V_inv = 1 / V
124    V_inv = np.diag(V_inv)
125    F_bar = np.mean(F, axis=0)
126    middle = _compute_D_Rnui(F, F_bar, C, E, alpha, 0, i, q)
127    for nu in range(1, m + 1):
128        D_Rnui = _compute_D_Rnui(F, F_bar, C, E, alpha, nu, i, q)
129        middle += (1 - nu / (m + 1)) * (D_Rnui + D_Rnui.T)
130    Sig_Ri = V_inv @ middle @ V_inv
131    return Sig_Ri
132
133
134def _compute_D_Rnui(
135    F: np.ndarray,
136    F_bar: np.ndarray,
137    C: np.ndarray,
138    E: np.ndarray,
139    alpha: Union[float, int],
140    nu: int,
141    i: int,
142    q: int,
143):
144    T, k, r = F.shape
145    IaF = np.hstack([np.eye(k), alpha * F_bar])
146    if nu > 0:
147        F_nu = F[nu:]
148        F_0nu = F[:-nu]
149        E_nu = E[nu:]
150        E_0nu = E[:-nu]
151    else:
152        F_nu = F
153        F_0nu = F
154        E_nu = E
155        E_0nu = E
156    F_0nu_trans = F_0nu.transpose(0, 2, 1)
157    e_nui = E_nu[:, i]
158    e_0i = E_0nu[:, i]
159    e_outer = np.einsum("ti, tj->tij", e_nui, e_0i)
160    br = C.T @ e_outer @ C
161    bl = br @ F_0nu_trans
162    tr = F_nu @ br
163    tl = F_nu @ bl
164    block = np.block([[tl, tr], [bl, br]]).sum(axis=0)
165    block /= q * T
166    D_Rnui = IaF @ block @ IaF.T
167    return D_Rnui
168
169
170def estimate_cov_Cj(
171    Y: np.ndarray,
172    F: np.ndarray,
173    R: np.ndarray,
174    C: np.ndarray,
175    j: int,
176    alpha: Union[float, int],
177    m: int,
178) -> np.ndarray:
179    r"""
180    Estimates the covariance matrix of the $j$-th row (0 indexed) of $\hat{C}$.
181
182    Parameters
183    ----------
184    Y : (T, p, q) ndarray
185        The observed data.
186    F : (T, k, r) ndarray
187        The estimated factors.
188    R : (p, k) ndarray
189        The estimated front loading matrix.
190    C : (q, r) ndarray
191        The estimated back loading matrix.
192    j : int
193        The row index.
194    alpha : float
195        The hyperparameter $\alpha \geq -1$. Should be the same as the one used in `estimate_alpha_PCA`.
196    m : int
197        A tuning parameter. Theoretically, it satisfies $m \rightarrow \infty$ and $m/(pT)^{\frac{1}{4}} \rightarrow 0$.
198
199    Returns
200    -------
201    Sig_Cj : (r, r) ndarray
202        The estimated covariance matrix of the $j$-th row of $\hat{C}$.
203    """
204    if not (alpha >= -1):
205        raise ValueError("alpha must be greater than or equal to -1")
206    T, p, q = Y.shape
207    if m >= T:
208        warnings.warn("m is greater than T, unexpected results may occur.")
209    _, k, r = F.shape
210    E = Y - R @ F @ C.T
211    Y_tilde = _compute_Y_tilde(Y, alpha)
212    YTY = (Y_tilde.transpose(0, 2, 1) @ Y_tilde).sum(axis=0) / (p * q * T)
213    V, _ = eigh(YTY, subset_by_index=(q - r, q - 1))
214    V_inv = 1 / V
215    V_inv = np.diag(V_inv)
216    F_bar = np.mean(F, axis=0)
217    middle = _compute_D_Cnuj(F, F_bar, R, E, alpha, 0, j, p)
218    for nu in range(1, m + 1):
219        D_Rnui = _compute_D_Cnuj(F, F_bar, R, E, alpha, nu, j, p)
220        middle += (1 - nu / (m + 1)) * (D_Rnui + D_Rnui.T)
221    Sig_Ri = V_inv @ middle @ V_inv
222    return Sig_Ri
223
224
225def _compute_D_Cnuj(
226    F: np.ndarray,
227    F_bar: np.ndarray,
228    R: np.ndarray,
229    E: np.ndarray,
230    alpha: Union[float, int],
231    nu: int,
232    j: int,
233    p: int,
234):
235    T, k, r = F.shape
236    IaFt = np.hstack([np.eye(r), alpha * F_bar.T])
237    if nu > 0:
238        F_nu = F[nu:]
239        F_0nu = F[:-nu]
240        E_nu = E[nu:]
241        E_0nu = E[:-nu]
242    else:
243        F_nu = F
244        F_0nu = F
245        E_nu = E
246        E_0nu = E
247    F_nu_trans = F_nu.transpose(0, 2, 1)
248    e_nuj = E_nu[:, :, j]
249    e_0j = E_0nu[:, :, j]
250    e_outer = np.einsum("ti, tj->tij", e_nuj, e_0j)
251    br = R.T @ e_outer @ R
252    bl = br @ F_0nu
253    tr = F_nu_trans @ br
254    tl = tr @ F_0nu
255    block = np.block([[tl, tr], [bl, br]]).sum(axis=0)
256    block /= p * T
257    D_Cnuj = IaFt @ block @ IaFt.T
258    return D_Cnuj
def estimate_alpha_PCA( Y: numpy.ndarray, alpha: Union[float, int], k: Optional[int] = None, r: Optional[int] = None):
13def estimate_alpha_PCA(
14    Y: np.ndarray,
15    alpha: Union[float, int],
16    k: Union[int, None] = None,
17    r: Union[int, None] = None,
18):
19    r"""
20    Estimates the $\alpha$-PCA model in Chen and Fan 2021 (https://doi.org/10.1080/01621459.2021.1970569)
21    where $Y_t = R F_t C^T + E_t$
22
23    Parameters
24    ----------
25    Y : (T, p, q) ndarray
26        The observed data.
27    alpha : float
28        The hyperparameter $\alpha \geq -1$ that aims to balance information between first and second moments.
29    k : int, optional
30        Rank of the front loading matrix. If None, the rank is estimated.
31    r : int, optional
32        Rank of the back loading matrix. If None, the rank is estimated.
33
34    Returns
35    -------
36    F : (T, k, r) ndarray
37        The estimated factor matrix.
38    S : (T, p, q) ndarray
39        The estimated signal $S_t = R F_t C^T$.
40    R : (p, k) ndarray
41        The estimated front loading matrix.
42    C : (q, r) ndarray
43        The estimated back loading matrix.
44
45    """
46    if not (alpha >= -1):
47        raise ValueError("alpha must be greater than or equal to -1")
48    T, p, q = Y.shape
49    pqT = p * q * T
50    pq = p * q
51    Y_tilde = _compute_Y_tilde(Y, alpha)
52    M_R = np.einsum("tij,tkj->ik", Y_tilde, Y_tilde) / pqT
53    M_C = np.einsum("tji,tjk->ik", Y_tilde, Y_tilde) / pqT
54    R = _compute_loading(M_R, k)
55    R *= math.sqrt(p)
56    C = _compute_loading(M_C, r)
57    C *= math.sqrt(q)
58    F = R.T @ Y @ C / pq
59    S = R @ F @ C.T / pq
60    return F, S, R, C

Estimates the $\alpha$-PCA model in Chen and Fan 2021 (https://doi.org/10.1080/01621459.2021.1970569) where $Y_t = R F_t C^T + E_t$

Parameters
  • Y ((T, p, q) ndarray): The observed data.
  • alpha (float): The hyperparameter $\alpha \geq -1$ that aims to balance information between first and second moments.
  • k (int, optional): Rank of the front loading matrix. If None, the rank is estimated.
  • r (int, optional): Rank of the back loading matrix. If None, the rank is estimated.
Returns
  • F ((T, k, r) ndarray): The estimated factor matrix.
  • S ((T, p, q) ndarray): The estimated signal $S_t = R F_t C^T$.
  • R ((p, k) ndarray): The estimated front loading matrix.
  • C ((q, r) ndarray): The estimated back loading matrix.
def estimate_cov_Ri( Y: numpy.ndarray, F: numpy.ndarray, R: numpy.ndarray, C: numpy.ndarray, i: int, alpha: Union[float, int], m: int) -> numpy.ndarray:
 80def estimate_cov_Ri(
 81    Y: np.ndarray,
 82    F: np.ndarray,
 83    R: np.ndarray,
 84    C: np.ndarray,
 85    i: int,
 86    alpha: Union[float, int],
 87    m: int,
 88) -> np.ndarray:
 89    r"""
 90    Estimates the covariance matrix of the $i$-th row (0 indexed) of $\hat{R}$.
 91
 92    Parameters
 93    ----------
 94    Y : (T, p, q) ndarray
 95        The observed data.
 96    F : (T, k, r) ndarray
 97        The estimated factors.
 98    R : (p, k) ndarray
 99        The estimated front loading matrix.
100    C : (q, r) ndarray
101        The estimated back loading matrix.
102    i : int
103        The row index.
104    alpha : float
105        The hyperparameter $\alpha \geq -1$. Should be the same as the one used in `estimate_alpha_PCA`.
106    m : int
107        A tuning parameter. Theoretically, it satisfies $m \rightarrow \infty$ and $m/(qT)^{\frac{1}{4}} \rightarrow 0$.
108
109    Returns
110    -------
111    Sig_Ri : (k, k) ndarray
112        The estimated covariance matrix of the $i$-th row of $\hat{R}$.
113    """
114    if not (alpha >= -1):
115        raise ValueError("alpha must be greater than or equal to -1")
116    T, p, q = Y.shape
117    if m >= T:
118        warnings.warn("m is greater than T, unexpected results may occur.")
119    _, k, r = F.shape
120    E = Y - R @ F @ C.T
121    Y_tilde = _compute_Y_tilde(Y, alpha)
122    YYT = (Y_tilde @ Y_tilde.transpose(0, 2, 1)).sum(axis=0) / (p * q * T)
123    V, _ = eigh(YYT, subset_by_index=(p - k, p - 1))
124    V_inv = 1 / V
125    V_inv = np.diag(V_inv)
126    F_bar = np.mean(F, axis=0)
127    middle = _compute_D_Rnui(F, F_bar, C, E, alpha, 0, i, q)
128    for nu in range(1, m + 1):
129        D_Rnui = _compute_D_Rnui(F, F_bar, C, E, alpha, nu, i, q)
130        middle += (1 - nu / (m + 1)) * (D_Rnui + D_Rnui.T)
131    Sig_Ri = V_inv @ middle @ V_inv
132    return Sig_Ri

Estimates the covariance matrix of the $i$-th row (0 indexed) of $\hat{R}$.

Parameters
  • Y ((T, p, q) ndarray): The observed data.
  • F ((T, k, r) ndarray): The estimated factors.
  • R ((p, k) ndarray): The estimated front loading matrix.
  • C ((q, r) ndarray): The estimated back loading matrix.
  • i (int): The row index.
  • alpha (float): The hyperparameter $\alpha \geq -1$. Should be the same as the one used in estimate_alpha_PCA.
  • m (int): A tuning parameter. Theoretically, it satisfies $m \rightarrow \infty$ and $m/(qT)^{\frac{1}{4}} \rightarrow 0$.
Returns
  • Sig_Ri ((k, k) ndarray): The estimated covariance matrix of the $i$-th row of $\hat{R}$.
def estimate_cov_Cj( Y: numpy.ndarray, F: numpy.ndarray, R: numpy.ndarray, C: numpy.ndarray, j: int, alpha: Union[float, int], m: int) -> numpy.ndarray:
171def estimate_cov_Cj(
172    Y: np.ndarray,
173    F: np.ndarray,
174    R: np.ndarray,
175    C: np.ndarray,
176    j: int,
177    alpha: Union[float, int],
178    m: int,
179) -> np.ndarray:
180    r"""
181    Estimates the covariance matrix of the $j$-th row (0 indexed) of $\hat{C}$.
182
183    Parameters
184    ----------
185    Y : (T, p, q) ndarray
186        The observed data.
187    F : (T, k, r) ndarray
188        The estimated factors.
189    R : (p, k) ndarray
190        The estimated front loading matrix.
191    C : (q, r) ndarray
192        The estimated back loading matrix.
193    j : int
194        The row index.
195    alpha : float
196        The hyperparameter $\alpha \geq -1$. Should be the same as the one used in `estimate_alpha_PCA`.
197    m : int
198        A tuning parameter. Theoretically, it satisfies $m \rightarrow \infty$ and $m/(pT)^{\frac{1}{4}} \rightarrow 0$.
199
200    Returns
201    -------
202    Sig_Cj : (r, r) ndarray
203        The estimated covariance matrix of the $j$-th row of $\hat{C}$.
204    """
205    if not (alpha >= -1):
206        raise ValueError("alpha must be greater than or equal to -1")
207    T, p, q = Y.shape
208    if m >= T:
209        warnings.warn("m is greater than T, unexpected results may occur.")
210    _, k, r = F.shape
211    E = Y - R @ F @ C.T
212    Y_tilde = _compute_Y_tilde(Y, alpha)
213    YTY = (Y_tilde.transpose(0, 2, 1) @ Y_tilde).sum(axis=0) / (p * q * T)
214    V, _ = eigh(YTY, subset_by_index=(q - r, q - 1))
215    V_inv = 1 / V
216    V_inv = np.diag(V_inv)
217    F_bar = np.mean(F, axis=0)
218    middle = _compute_D_Cnuj(F, F_bar, R, E, alpha, 0, j, p)
219    for nu in range(1, m + 1):
220        D_Rnui = _compute_D_Cnuj(F, F_bar, R, E, alpha, nu, j, p)
221        middle += (1 - nu / (m + 1)) * (D_Rnui + D_Rnui.T)
222    Sig_Ri = V_inv @ middle @ V_inv
223    return Sig_Ri

Estimates the covariance matrix of the $j$-th row (0 indexed) of $\hat{C}$.

Parameters
  • Y ((T, p, q) ndarray): The observed data.
  • F ((T, k, r) ndarray): The estimated factors.
  • R ((p, k) ndarray): The estimated front loading matrix.
  • C ((q, r) ndarray): The estimated back loading matrix.
  • j (int): The row index.
  • alpha (float): The hyperparameter $\alpha \geq -1$. Should be the same as the one used in estimate_alpha_PCA.
  • m (int): A tuning parameter. Theoretically, it satisfies $m \rightarrow \infty$ and $m/(pT)^{\frac{1}{4}} \rightarrow 0$.
Returns
  • Sig_Cj ((r, r) ndarray): The estimated covariance matrix of the $j$-th row of $\hat{C}$.