mavats.alphaPCA
1import math 2import warnings 3from typing import Tuple, Union 4 5import numpy as np 6from numpy import linalg as LA 7from scipy.linalg import eigh 8 9from mavats.factormodel import _estimate_k 10 11 12def estimate_alpha_PCA( 13 Y: np.ndarray, 14 alpha: Union[float, int], 15 k: Union[int, None] = None, 16 r: Union[int, None] = None, 17): 18 r""" 19 Estimates the $\alpha$-PCA model in Chen and Fan 2021 (https://doi.org/10.1080/01621459.2021.1970569) 20 where $Y_t = R F_t C^T + E_t$ 21 22 Parameters 23 ---------- 24 Y : (T, p, q) ndarray 25 The observed data. 26 alpha : float 27 The hyperparameter $\alpha \geq -1$ that aims to balance information between first and second moments. 28 k : int, optional 29 Rank of the front loading matrix. If None, the rank is estimated. 30 r : int, optional 31 Rank of the back loading matrix. If None, the rank is estimated. 32 33 Returns 34 ------- 35 F : (T, k, r) ndarray 36 The estimated factor matrix. 37 S : (T, p, q) ndarray 38 The estimated signal $S_t = R F_t C^T$. 39 R : (p, k) ndarray 40 The estimated front loading matrix. 41 C : (q, r) ndarray 42 The estimated back loading matrix. 43 44 """ 45 if not (alpha >= -1): 46 raise ValueError("alpha must be greater than or equal to -1") 47 T, p, q = Y.shape 48 pqT = p * q * T 49 pq = p * q 50 Y_tilde = _compute_Y_tilde(Y, alpha) 51 M_R = np.einsum("tij,tkj->ik", Y_tilde, Y_tilde) / pqT 52 M_C = np.einsum("tji,tjk->ik", Y_tilde, Y_tilde) / pqT 53 R = _compute_loading(M_R, k) 54 R *= math.sqrt(p) 55 C = _compute_loading(M_C, r) 56 C *= math.sqrt(q) 57 F = R.T @ Y @ C / pq 58 S = R @ F @ C.T / pq 59 return F, S, R, C 60 61 62def _compute_Y_tilde(Y, alpha): 63 alpha_tilde = math.sqrt(alpha + 1) - 1 64 Y_bar = np.mean(Y, axis=0) 65 Y_tilde = Y + alpha_tilde * Y_bar 66 return Y_tilde 67 68 69def _compute_loading(M: np.ndarray, k: Union[int, None]) -> np.ndarray: 70 if k is None: 71 w, v = eigh(M) 72 k = _estimate_k(w) 73 w, v = w[-k:], v[:, -k:] 74 else: 75 w, v = eigh(M, subset_by_index=(M.shape[0] - k, M.shape[0] - 1)) 76 return v 77 78 79def estimate_cov_Ri( 80 Y: np.ndarray, 81 F: np.ndarray, 82 R: np.ndarray, 83 C: np.ndarray, 84 i: int, 85 alpha: Union[float, int], 86 m: int, 87) -> np.ndarray: 88 r""" 89 Estimates the covariance matrix of the $i$-th row (0 indexed) of $\hat{R}$. 90 91 Parameters 92 ---------- 93 Y : (T, p, q) ndarray 94 The observed data. 95 F : (T, k, r) ndarray 96 The estimated factors. 97 R : (p, k) ndarray 98 The estimated front loading matrix. 99 C : (q, r) ndarray 100 The estimated back loading matrix. 101 i : int 102 The row index. 103 alpha : float 104 The hyperparameter $\alpha \geq -1$. Should be the same as the one used in `estimate_alpha_PCA`. 105 m : int 106 A tuning parameter. Theoretically, it satisfies $m \rightarrow \infty$ and $m/(qT)^{\frac{1}{4}} \rightarrow 0$. 107 108 Returns 109 ------- 110 Sig_Ri : (k, k) ndarray 111 The estimated covariance matrix of the $i$-th row of $\hat{R}$. 112 """ 113 if not (alpha >= -1): 114 raise ValueError("alpha must be greater than or equal to -1") 115 T, p, q = Y.shape 116 if m >= T: 117 warnings.warn("m is greater than T, unexpected results may occur.") 118 _, k, r = F.shape 119 E = Y - R @ F @ C.T 120 Y_tilde = _compute_Y_tilde(Y, alpha) 121 YYT = (Y_tilde @ Y_tilde.transpose(0, 2, 1)).sum(axis=0) / (p * q * T) 122 V, _ = eigh(YYT, subset_by_index=(p - k, p - 1)) 123 V_inv = 1 / V 124 V_inv = np.diag(V_inv) 125 F_bar = np.mean(F, axis=0) 126 middle = _compute_D_Rnui(F, F_bar, C, E, alpha, 0, i, q) 127 for nu in range(1, m + 1): 128 D_Rnui = _compute_D_Rnui(F, F_bar, C, E, alpha, nu, i, q) 129 middle += (1 - nu / (m + 1)) * (D_Rnui + D_Rnui.T) 130 Sig_Ri = V_inv @ middle @ V_inv 131 return Sig_Ri 132 133 134def _compute_D_Rnui( 135 F: np.ndarray, 136 F_bar: np.ndarray, 137 C: np.ndarray, 138 E: np.ndarray, 139 alpha: Union[float, int], 140 nu: int, 141 i: int, 142 q: int, 143): 144 T, k, r = F.shape 145 IaF = np.hstack([np.eye(k), alpha * F_bar]) 146 if nu > 0: 147 F_nu = F[nu:] 148 F_0nu = F[:-nu] 149 E_nu = E[nu:] 150 E_0nu = E[:-nu] 151 else: 152 F_nu = F 153 F_0nu = F 154 E_nu = E 155 E_0nu = E 156 F_0nu_trans = F_0nu.transpose(0, 2, 1) 157 e_nui = E_nu[:, i] 158 e_0i = E_0nu[:, i] 159 e_outer = np.einsum("ti, tj->tij", e_nui, e_0i) 160 br = C.T @ e_outer @ C 161 bl = br @ F_0nu_trans 162 tr = F_nu @ br 163 tl = F_nu @ bl 164 block = np.block([[tl, tr], [bl, br]]).sum(axis=0) 165 block /= q * T 166 D_Rnui = IaF @ block @ IaF.T 167 return D_Rnui 168 169 170def estimate_cov_Cj( 171 Y: np.ndarray, 172 F: np.ndarray, 173 R: np.ndarray, 174 C: np.ndarray, 175 j: int, 176 alpha: Union[float, int], 177 m: int, 178) -> np.ndarray: 179 r""" 180 Estimates the covariance matrix of the $j$-th row (0 indexed) of $\hat{C}$. 181 182 Parameters 183 ---------- 184 Y : (T, p, q) ndarray 185 The observed data. 186 F : (T, k, r) ndarray 187 The estimated factors. 188 R : (p, k) ndarray 189 The estimated front loading matrix. 190 C : (q, r) ndarray 191 The estimated back loading matrix. 192 j : int 193 The row index. 194 alpha : float 195 The hyperparameter $\alpha \geq -1$. Should be the same as the one used in `estimate_alpha_PCA`. 196 m : int 197 A tuning parameter. Theoretically, it satisfies $m \rightarrow \infty$ and $m/(pT)^{\frac{1}{4}} \rightarrow 0$. 198 199 Returns 200 ------- 201 Sig_Cj : (r, r) ndarray 202 The estimated covariance matrix of the $j$-th row of $\hat{C}$. 203 """ 204 if not (alpha >= -1): 205 raise ValueError("alpha must be greater than or equal to -1") 206 T, p, q = Y.shape 207 if m >= T: 208 warnings.warn("m is greater than T, unexpected results may occur.") 209 _, k, r = F.shape 210 E = Y - R @ F @ C.T 211 Y_tilde = _compute_Y_tilde(Y, alpha) 212 YTY = (Y_tilde.transpose(0, 2, 1) @ Y_tilde).sum(axis=0) / (p * q * T) 213 V, _ = eigh(YTY, subset_by_index=(q - r, q - 1)) 214 V_inv = 1 / V 215 V_inv = np.diag(V_inv) 216 F_bar = np.mean(F, axis=0) 217 middle = _compute_D_Cnuj(F, F_bar, R, E, alpha, 0, j, p) 218 for nu in range(1, m + 1): 219 D_Rnui = _compute_D_Cnuj(F, F_bar, R, E, alpha, nu, j, p) 220 middle += (1 - nu / (m + 1)) * (D_Rnui + D_Rnui.T) 221 Sig_Ri = V_inv @ middle @ V_inv 222 return Sig_Ri 223 224 225def _compute_D_Cnuj( 226 F: np.ndarray, 227 F_bar: np.ndarray, 228 R: np.ndarray, 229 E: np.ndarray, 230 alpha: Union[float, int], 231 nu: int, 232 j: int, 233 p: int, 234): 235 T, k, r = F.shape 236 IaFt = np.hstack([np.eye(r), alpha * F_bar.T]) 237 if nu > 0: 238 F_nu = F[nu:] 239 F_0nu = F[:-nu] 240 E_nu = E[nu:] 241 E_0nu = E[:-nu] 242 else: 243 F_nu = F 244 F_0nu = F 245 E_nu = E 246 E_0nu = E 247 F_nu_trans = F_nu.transpose(0, 2, 1) 248 e_nuj = E_nu[:, :, j] 249 e_0j = E_0nu[:, :, j] 250 e_outer = np.einsum("ti, tj->tij", e_nuj, e_0j) 251 br = R.T @ e_outer @ R 252 bl = br @ F_0nu 253 tr = F_nu_trans @ br 254 tl = tr @ F_0nu 255 block = np.block([[tl, tr], [bl, br]]).sum(axis=0) 256 block /= p * T 257 D_Cnuj = IaFt @ block @ IaFt.T 258 return D_Cnuj
def
estimate_alpha_PCA( Y: numpy.ndarray, alpha: Union[float, int], k: Optional[int] = None, r: Optional[int] = None):
13def estimate_alpha_PCA( 14 Y: np.ndarray, 15 alpha: Union[float, int], 16 k: Union[int, None] = None, 17 r: Union[int, None] = None, 18): 19 r""" 20 Estimates the $\alpha$-PCA model in Chen and Fan 2021 (https://doi.org/10.1080/01621459.2021.1970569) 21 where $Y_t = R F_t C^T + E_t$ 22 23 Parameters 24 ---------- 25 Y : (T, p, q) ndarray 26 The observed data. 27 alpha : float 28 The hyperparameter $\alpha \geq -1$ that aims to balance information between first and second moments. 29 k : int, optional 30 Rank of the front loading matrix. If None, the rank is estimated. 31 r : int, optional 32 Rank of the back loading matrix. If None, the rank is estimated. 33 34 Returns 35 ------- 36 F : (T, k, r) ndarray 37 The estimated factor matrix. 38 S : (T, p, q) ndarray 39 The estimated signal $S_t = R F_t C^T$. 40 R : (p, k) ndarray 41 The estimated front loading matrix. 42 C : (q, r) ndarray 43 The estimated back loading matrix. 44 45 """ 46 if not (alpha >= -1): 47 raise ValueError("alpha must be greater than or equal to -1") 48 T, p, q = Y.shape 49 pqT = p * q * T 50 pq = p * q 51 Y_tilde = _compute_Y_tilde(Y, alpha) 52 M_R = np.einsum("tij,tkj->ik", Y_tilde, Y_tilde) / pqT 53 M_C = np.einsum("tji,tjk->ik", Y_tilde, Y_tilde) / pqT 54 R = _compute_loading(M_R, k) 55 R *= math.sqrt(p) 56 C = _compute_loading(M_C, r) 57 C *= math.sqrt(q) 58 F = R.T @ Y @ C / pq 59 S = R @ F @ C.T / pq 60 return F, S, R, C
Estimates the $\alpha$-PCA model in Chen and Fan 2021 (https://doi.org/10.1080/01621459.2021.1970569) where $Y_t = R F_t C^T + E_t$
Parameters
- Y ((T, p, q) ndarray): The observed data.
- alpha (float): The hyperparameter $\alpha \geq -1$ that aims to balance information between first and second moments.
- k (int, optional): Rank of the front loading matrix. If None, the rank is estimated.
- r (int, optional): Rank of the back loading matrix. If None, the rank is estimated.
Returns
- F ((T, k, r) ndarray): The estimated factor matrix.
- S ((T, p, q) ndarray): The estimated signal $S_t = R F_t C^T$.
- R ((p, k) ndarray): The estimated front loading matrix.
- C ((q, r) ndarray): The estimated back loading matrix.
def
estimate_cov_Ri( Y: numpy.ndarray, F: numpy.ndarray, R: numpy.ndarray, C: numpy.ndarray, i: int, alpha: Union[float, int], m: int) -> numpy.ndarray:
80def estimate_cov_Ri( 81 Y: np.ndarray, 82 F: np.ndarray, 83 R: np.ndarray, 84 C: np.ndarray, 85 i: int, 86 alpha: Union[float, int], 87 m: int, 88) -> np.ndarray: 89 r""" 90 Estimates the covariance matrix of the $i$-th row (0 indexed) of $\hat{R}$. 91 92 Parameters 93 ---------- 94 Y : (T, p, q) ndarray 95 The observed data. 96 F : (T, k, r) ndarray 97 The estimated factors. 98 R : (p, k) ndarray 99 The estimated front loading matrix. 100 C : (q, r) ndarray 101 The estimated back loading matrix. 102 i : int 103 The row index. 104 alpha : float 105 The hyperparameter $\alpha \geq -1$. Should be the same as the one used in `estimate_alpha_PCA`. 106 m : int 107 A tuning parameter. Theoretically, it satisfies $m \rightarrow \infty$ and $m/(qT)^{\frac{1}{4}} \rightarrow 0$. 108 109 Returns 110 ------- 111 Sig_Ri : (k, k) ndarray 112 The estimated covariance matrix of the $i$-th row of $\hat{R}$. 113 """ 114 if not (alpha >= -1): 115 raise ValueError("alpha must be greater than or equal to -1") 116 T, p, q = Y.shape 117 if m >= T: 118 warnings.warn("m is greater than T, unexpected results may occur.") 119 _, k, r = F.shape 120 E = Y - R @ F @ C.T 121 Y_tilde = _compute_Y_tilde(Y, alpha) 122 YYT = (Y_tilde @ Y_tilde.transpose(0, 2, 1)).sum(axis=0) / (p * q * T) 123 V, _ = eigh(YYT, subset_by_index=(p - k, p - 1)) 124 V_inv = 1 / V 125 V_inv = np.diag(V_inv) 126 F_bar = np.mean(F, axis=0) 127 middle = _compute_D_Rnui(F, F_bar, C, E, alpha, 0, i, q) 128 for nu in range(1, m + 1): 129 D_Rnui = _compute_D_Rnui(F, F_bar, C, E, alpha, nu, i, q) 130 middle += (1 - nu / (m + 1)) * (D_Rnui + D_Rnui.T) 131 Sig_Ri = V_inv @ middle @ V_inv 132 return Sig_Ri
Estimates the covariance matrix of the $i$-th row (0 indexed) of $\hat{R}$.
Parameters
- Y ((T, p, q) ndarray): The observed data.
- F ((T, k, r) ndarray): The estimated factors.
- R ((p, k) ndarray): The estimated front loading matrix.
- C ((q, r) ndarray): The estimated back loading matrix.
- i (int): The row index.
- alpha (float):
The hyperparameter $\alpha \geq -1$. Should be the same as the one used in
estimate_alpha_PCA
. - m (int): A tuning parameter. Theoretically, it satisfies $m \rightarrow \infty$ and $m/(qT)^{\frac{1}{4}} \rightarrow 0$.
Returns
- Sig_Ri ((k, k) ndarray): The estimated covariance matrix of the $i$-th row of $\hat{R}$.
def
estimate_cov_Cj( Y: numpy.ndarray, F: numpy.ndarray, R: numpy.ndarray, C: numpy.ndarray, j: int, alpha: Union[float, int], m: int) -> numpy.ndarray:
171def estimate_cov_Cj( 172 Y: np.ndarray, 173 F: np.ndarray, 174 R: np.ndarray, 175 C: np.ndarray, 176 j: int, 177 alpha: Union[float, int], 178 m: int, 179) -> np.ndarray: 180 r""" 181 Estimates the covariance matrix of the $j$-th row (0 indexed) of $\hat{C}$. 182 183 Parameters 184 ---------- 185 Y : (T, p, q) ndarray 186 The observed data. 187 F : (T, k, r) ndarray 188 The estimated factors. 189 R : (p, k) ndarray 190 The estimated front loading matrix. 191 C : (q, r) ndarray 192 The estimated back loading matrix. 193 j : int 194 The row index. 195 alpha : float 196 The hyperparameter $\alpha \geq -1$. Should be the same as the one used in `estimate_alpha_PCA`. 197 m : int 198 A tuning parameter. Theoretically, it satisfies $m \rightarrow \infty$ and $m/(pT)^{\frac{1}{4}} \rightarrow 0$. 199 200 Returns 201 ------- 202 Sig_Cj : (r, r) ndarray 203 The estimated covariance matrix of the $j$-th row of $\hat{C}$. 204 """ 205 if not (alpha >= -1): 206 raise ValueError("alpha must be greater than or equal to -1") 207 T, p, q = Y.shape 208 if m >= T: 209 warnings.warn("m is greater than T, unexpected results may occur.") 210 _, k, r = F.shape 211 E = Y - R @ F @ C.T 212 Y_tilde = _compute_Y_tilde(Y, alpha) 213 YTY = (Y_tilde.transpose(0, 2, 1) @ Y_tilde).sum(axis=0) / (p * q * T) 214 V, _ = eigh(YTY, subset_by_index=(q - r, q - 1)) 215 V_inv = 1 / V 216 V_inv = np.diag(V_inv) 217 F_bar = np.mean(F, axis=0) 218 middle = _compute_D_Cnuj(F, F_bar, R, E, alpha, 0, j, p) 219 for nu in range(1, m + 1): 220 D_Rnui = _compute_D_Cnuj(F, F_bar, R, E, alpha, nu, j, p) 221 middle += (1 - nu / (m + 1)) * (D_Rnui + D_Rnui.T) 222 Sig_Ri = V_inv @ middle @ V_inv 223 return Sig_Ri
Estimates the covariance matrix of the $j$-th row (0 indexed) of $\hat{C}$.
Parameters
- Y ((T, p, q) ndarray): The observed data.
- F ((T, k, r) ndarray): The estimated factors.
- R ((p, k) ndarray): The estimated front loading matrix.
- C ((q, r) ndarray): The estimated back loading matrix.
- j (int): The row index.
- alpha (float):
The hyperparameter $\alpha \geq -1$. Should be the same as the one used in
estimate_alpha_PCA
. - m (int): A tuning parameter. Theoretically, it satisfies $m \rightarrow \infty$ and $m/(pT)^{\frac{1}{4}} \rightarrow 0$.
Returns
- Sig_Cj ((r, r) ndarray): The estimated covariance matrix of the $j$-th row of $\hat{C}$.