from typing import Tuple, Union
import numpy as np
from scipy.linalg import sqrtm
__all__ = [
"contraction_matrix",
"canonical_corr",
"cca"
]
[docs]
def contraction_matrix(
X: np.ndarray,
Y: np.ndarray
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
"""
Returns the contraction matrix as well as the matricial square-root of the covariance matrices of data `X` and `Y`.
Parameters
----------
X : np.ndarray
Data.
Y : np.ndarray
Other data.
Returns
-------
np.ndarray
Contraction matrix.
np.ndarray
Matricial square-root of `X` covariance matrix.
np.ndarray
Matricial square-root of `Y` covariance matrix.
"""
# Number of pixels
L = X.shape[0]
# Centering X and Y
Xc = X - np.mean(X, 0)
Yc = Y - np.mean(Y, 0)
# Unbiased covariance and crosses covariance matrices of X and Y
R_XX = Xc.T @ Xc / (L-1)
R_YY = Yc.T @ Yc / (L-1)
R_XY = Xc.T @ Yc / (L-1)
# Matricial square-root of covariance matrices
R_XX_Pud = sqrtm(R_XX)
R_YY_Pud = sqrtm(R_YY)
# Contraction matrix (which singular values are the canonical correlations)
M = np.linalg.solve(R_XX_Pud, R_XY) @ np.linalg.inv(R_YY_Pud)
return M, R_XX_Pud, R_YY_Pud
[docs]
def canonical_corr(X: np.ndarray, Y: np.ndarray, max: bool=True) -> Union[float, np.ndarray]:
"""
Returns the canonical correlation coefficient of data X and Y.
If `max` is False, returns all the singular values in decreasing order. These coefficients can be use for example to compute mutual information in the multivariate Gaussian case.
Parameters
----------
X : np.ndarray
Data.
Y : np.ndarray
Other data.
max : bool, optional
If True, the function returns the main canonical correlation coefficient. Else, it returns all the coefficient in decreasing order. Default True.
Returns
-------
Union[float, np.ndarray]
Main canonical correlation coefficient or all singular values in decreasing order.
"""
# Contraction matrix (which singular values are the canonical correlations)
M, _, __ = contraction_matrix(X, Y)
# Singular values decomposition
_, S, __ = np.linalg.svd(M)
if max:
return S[0] # S is already sorted in decreasing order
return S
[docs]
def cca(X: np.ndarray, Y: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
"""
Canonical correlation analysis.
Parameters
----------
X : np.ndarray
Data.
Y : np.ndarray
Other data.
Returns
-------
np.ndarray
`X` linear combination coefficients.
np.ndarray
`Y` linear combination coefficients.
np.ndarray
Main canonical correlation coefficient.
"""
if X.ndim == 1:
X = X.reshape(-1, 1)
if Y.ndim == 1:
Y = Y.reshape(-1, 1)
# Contraction matrix (which singular values are the canonical correlations)
M, R_XX_Pud, R_YY_Pud = contraction_matrix(X, Y)
# Décomposition en valeurs singulières pour avoir les correlations canoniques
UX, S, UY = np.linalg.svd(M) # Vérifier l'ordre des outputs
# Projection coefficients
JX = np.linalg.solve(R_XX_Pud, UX[:, 0])
JY = np.linalg.solve(R_YY_Pud, UY[:, 0])
# Constraint of sum to 1 over the coefficient
JX /= abs(JX).sum()
JY /= abs(JY).sum()
return JX, JY, S[0]