Source code for aomodel.pca

import numpy as np

# Approved for public release; distribution is unlimited. Public Affairs release approval # AFRL-2026-1309.


[docs]
def find_top_principal_components(pc_variances, percent_variance):
    """
    Finds the top principal components containing the given percentage of the total variance.

    Args:
        pc_variances (ndarray): numpy 1-D array containing the variance of each principal component.
        percent_variance (float): percentage of the total variance to look for.

    Returns:
        **num_components** (*int*) -- the number of top principal components containing the given percentage of the
        total variance.
    """
    assert ((percent_variance > 0) and (percent_variance <= 1.0))
    assert ((pc_variances.ndim == 1) and (pc_variances >= 0).all())
    assert np.all(np.diff(pc_variances) <= 1e-14)    # Ensure that the variances are in descending order

    total_variance = pc_variances.sum()
    threshold_variance = total_variance * percent_variance

    # Find the first principal component for which the cumulative sum is at least the given percentage of the total sum:
    cumulative_variance = np.cumsum(pc_variances)
    threshold_variance_index = int(np.searchsorted(cumulative_variance, threshold_variance, side='left'))

    # Handle the case that you need all principal components to cover the given percent_variance:
    num_components = min(threshold_variance_index + 1, len(pc_variances))

    return num_components





[docs]
def compute_pca(data):
    """
    Computes the principal components and their associated variances for an input array containing samples of a
    multivariate Gaussian distribution. Takes the Singular Value Decomposition (SVD) of the covariance matrix.

    Args:
        data (ndarray): numpy 2-D array of shape (vector dimensionality, number of samples) containing samples
            of the multivariable Gaussian distribution.

    Returns:
        - **data_mean** (*ndarray*) -- numpy 1-D array of shape (vector dimensionality,) containing the sample mean
          vector (i.e., the sample mean of each vector component).
        - **principal_components** (*ndarray*) -- numpy 2-D array of shape (vector dimensionality, vector
          dimensionality) containing the principal component matrix. The principal components are the columns of this
          matrix.
        - **pc_variances** (*ndarray*) -- numpy 1-D array of shape (vector dimensionality,) containing the variance of
          each principal component.
    """
    assert (data.ndim == 2)

    if np.isnan(data).any():
        raise ValueError("Input data contains NaN values, which are not supported.")

    # Compute and remove mean vector
    data_mean = np.average(data, axis=1)
    data_mean_removed = data - data_mean[:, np.newaxis]

    # Estimates the covariance matrix of the distribution:
    covariance_estimate = np.dot(data_mean_removed, data_mean_removed.T) / data_mean_removed.shape[1]

    # Compute SVD to find principal components and their variances:
    principal_components, pc_variances = np.linalg.svd(covariance_estimate)[:2]

    return data_mean, principal_components, pc_variances




[docs]
def generative_pca_algorithm(num_samples, covariance_modulation_matrix, mean_vector=None):
    """
    Generates samples from a multivariate Gaussian distribution. This uses the PCA generative algorithm, which
    generates white noise vectors and then (1) multiplies them by a modulation matrix to set the covariance matrix and
    (2) adds the mean vectors.

    Args:
        num_samples (int): number of samples to generate.
        covariance_modulation_matrix (ndarray): numpy 2-D array of shape (random vector dimensionality, random vector
            dimensionality) containing a matrix which scales unit-variance white noise to have the desired spatial
            covariance matrix. This matrix is determined by the matrices of principal components their variances.
        mean_vector (ndarray, optional): [Default=None] numpy 1-D array of shape (random vector dimensionality,)
            containing the mean of the distribution.

            - If set to None, a mean vector of zero is used.

    Returns:
        **samples** (*ndarray*) -- 2-D array of shape (random vector dimensionality, num_samples) whose columns contain
        samples from the desired multivariate Gaussian distribution.
    """
    assert (num_samples > 0)
    assert (covariance_modulation_matrix.ndim == 2)
    assert (covariance_modulation_matrix.shape[0] == covariance_modulation_matrix.shape[1])

    if mean_vector is not None:
        assert ((mean_vector.ndim == 1) and (mean_vector.shape[0] == covariance_modulation_matrix.shape[0]))
    else:
        mean_vector = np.zeros(covariance_modulation_matrix.shape[0])

    # Gaussian i.i.d random variables (mean 0, variance 1):
    white_noise = np.random.normal(size=(covariance_modulation_matrix.shape[0], num_samples))

    # Samples from the distribution
    samples = np.dot(covariance_modulation_matrix, white_noise) + mean_vector[:, np.newaxis]

    return samples