Source code for aomodel.pca
import numpy as np
# Approved for public release; distribution is unlimited. Public Affairs release approval # AFRL-2026-1309.
[docs]
def find_top_principal_components(pc_variances, percent_variance):
"""
Finds the top principal components containing the given percentage of the total variance.
Args:
pc_variances (ndarray): numpy 1-D array containing the variance of each principal component.
percent_variance (float): percentage of the total variance to look for.
Returns:
**num_components** (*int*) -- the number of top principal components containing the given percentage of the
total variance.
"""
assert ((percent_variance > 0) and (percent_variance <= 1.0))
assert ((pc_variances.ndim == 1) and (pc_variances >= 0).all())
assert np.all(np.diff(pc_variances) <= 1e-14) # Ensure that the variances are in descending order
total_variance = pc_variances.sum()
threshold_variance = total_variance * percent_variance
# Find the first principal component for which the cumulative sum is at least the given percentage of the total sum:
cumulative_variance = np.cumsum(pc_variances)
threshold_variance_index = int(np.searchsorted(cumulative_variance, threshold_variance, side='left'))
# Handle the case that you need all principal components to cover the given percent_variance:
num_components = min(threshold_variance_index + 1, len(pc_variances))
return num_components
[docs]
def compute_pca(data):
"""
Computes the principal components and their associated variances for an input array containing samples of a
multivariate Gaussian distribution. Takes the Singular Value Decomposition (SVD) of the covariance matrix.
Args:
data (ndarray): numpy 2-D array of shape (vector dimensionality, number of samples) containing samples
of the multivariable Gaussian distribution.
Returns:
- **data_mean** (*ndarray*) -- numpy 1-D array of shape (vector dimensionality,) containing the sample mean
vector (i.e., the sample mean of each vector component).
- **principal_components** (*ndarray*) -- numpy 2-D array of shape (vector dimensionality, vector
dimensionality) containing the principal component matrix. The principal components are the columns of this
matrix.
- **pc_variances** (*ndarray*) -- numpy 1-D array of shape (vector dimensionality,) containing the variance of
each principal component.
"""
assert (data.ndim == 2)
if np.isnan(data).any():
raise ValueError("Input data contains NaN values, which are not supported.")
# Compute and remove mean vector
data_mean = np.average(data, axis=1)
data_mean_removed = data - data_mean[:, np.newaxis]
# Estimates the covariance matrix of the distribution:
covariance_estimate = np.dot(data_mean_removed, data_mean_removed.T) / data_mean_removed.shape[1]
# Compute SVD to find principal components and their variances:
principal_components, pc_variances = np.linalg.svd(covariance_estimate)[:2]
return data_mean, principal_components, pc_variances
[docs]
def generative_pca_algorithm(num_samples, covariance_modulation_matrix, mean_vector=None):
"""
Generates samples from a multivariate Gaussian distribution. This uses the PCA generative algorithm, which
generates white noise vectors and then (1) multiplies them by a modulation matrix to set the covariance matrix and
(2) adds the mean vectors.
Args:
num_samples (int): number of samples to generate.
covariance_modulation_matrix (ndarray): numpy 2-D array of shape (random vector dimensionality, random vector
dimensionality) containing a matrix which scales unit-variance white noise to have the desired spatial
covariance matrix. This matrix is determined by the matrices of principal components their variances.
mean_vector (ndarray, optional): [Default=None] numpy 1-D array of shape (random vector dimensionality,)
containing the mean of the distribution.
- If set to None, a mean vector of zero is used.
Returns:
**samples** (*ndarray*) -- 2-D array of shape (random vector dimensionality, num_samples) whose columns contain
samples from the desired multivariate Gaussian distribution.
"""
assert (num_samples > 0)
assert (covariance_modulation_matrix.ndim == 2)
assert (covariance_modulation_matrix.shape[0] == covariance_modulation_matrix.shape[1])
if mean_vector is not None:
assert ((mean_vector.ndim == 1) and (mean_vector.shape[0] == covariance_modulation_matrix.shape[0]))
else:
mean_vector = np.zeros(covariance_modulation_matrix.shape[0])
# Gaussian i.i.d random variables (mean 0, variance 1):
white_noise = np.random.normal(size=(covariance_modulation_matrix.shape[0], num_samples))
# Samples from the distribution
samples = np.dot(covariance_modulation_matrix, white_noise) + mean_vector[:, np.newaxis]
return samples