Source code for aomodel.revar

import numpy as np
import aomodel.utils as utils
import aomodel.pca as pca
from aomodel.long_range_ar import LongRangeAR
import time
from datetime import timedelta
import os
import warnings

# Approved for public release; distribution is unlimited. Public Affairs release approval # AFRL-2026-1309.


[docs]
class ReVAR(LongRangeAR):
    """
    Implements the **Re-whitened Vector AutoRegression (ReVAR)** algorithm to generate synthetic time series of images
    with the same spatial and temporal statistics as an input data set. For a thorough description of this model, see
    :ref:`Theory`.

    Args:
        data_mask (ndarray, optional): [Default=None] numpy 2-D boolean array of shape (image height, image width)
            indicating which 2-D indices of each time-step correspond to valid pixel values.

            - If set to None, then ``load_file`` must be provided. In this case, the class loads the mask from the file.

        time_lags (Union[int, list, ndarray], optional): [Default=1] either an integer, list, or numpy 1-D array
            indicating the time lags to use for the model.

            - If ``time_lags`` is an integer, the function uses the previous time-steps up to this integer value.

            - If ``time_lags`` is a list or numpy 1-D array, the model uses these time lags.

        num_low_pass_filters (int, optional): [Default=0] the number of low-pass filters to use in the linear time
            predictive model.

        load_file (str, optional): [Default=None] directory to a file from which the model's instance variables can
            be loaded.

        prediction_window_structure (PredictionWindowStructure, optional): [Default=None] an instance of a sub-class
                of ``PredictionWindowStructure`` implementing get_mask(). Passed as input to the constructor of
                ``LongRangeAR``.

                - If set to None, the method builds a prediction window structure from `**kwargs`.

        **kwargs (dict, optional): Keyword arguments passed to ``LongRangeAR``.

            - **prediction_subspace_dimension** (*int*) -- number of (top) vector components to use for linear time
              prediction.

            - **low_pass_filter_params** (*ndarray*) -- numpy 1-D array of length ``num_low_pass_filters`` containing
              the parameters of each low-pass filter.
    """

    def __init__(self, data_mask=None, time_lags=1, num_low_pass_filters=0, load_file=None,
                 prediction_window_structure=None, **kwargs):
        if data_mask is None:
            assert (load_file is not None)
            num_pixels = None
        else:
            assert (data_mask.dtype == bool)
            assert (data_mask.ndim == 2)
            num_pixels = data_mask.sum()

        # Initializes instance variables:
        self.data_mask = data_mask
        self.principal_components = None
        self.pc_variances = None
        self.standard_deviation_vector = None
        self.mean_vector = None

        print("\nInitializing ReVAR model...")
        super().__init__(vector_dimensionality=num_pixels, time_lags=time_lags,
                         num_low_pass_filters=num_low_pass_filters, load_file=load_file,
                         prediction_window_structure=prediction_window_structure, print_statements=False, **kwargs)

        if load_file is None:
            print("ReVAR model initialized. Use ReVAR.fit() to fit this model to data or ReVAR.load() to load a "
                  "pre-trained model.\n")



[docs]
    def fit(self, training_data, percent_variance=None, cutoff_frequency=None, tps_block_size=None):
        """
        Estimates the parameters of **ReVAR** from training data. Uses ``LongRangeAR.fit`` to compute the prediction
        weights, low-pass filter parameters (if used), and noise modulation matrix.

        Args:
            training_data (ndarray): numpy 3-D array of shape (number of images, image height, image width) containing
                the data to fit.

            percent_variance (float, optional): [Default=None] percent variance of the subspace to use for linear time
                prediction in the **Long-Range AR** model.

                - If set to None, linear prediction is applied to all principal coefficients.

            cutoff_frequency (float, optional): [Default=None] cutoff frequency (in units of cycles/sample) to use for
                estimation of the low-pass filter parameters.

                - If set to None and low-pass filters are used, this method estimates the cutoff frequency as the peak
                  frequency of the temporal power spectrum (TPS) of the training data.

            tps_block_size (int, optional): [Default=None] time-block size for calculating the TPS of the principal
                coefficients.

                - If set to None and the low-pass filter parameters need to be estimated, the function finds the block
                  size so that the TPS estimates are averaged over 100 blocks. This ensures that the signal-to-noise
                  ratio of each 1-D TPS estimate is at least 10.
        """
        self.__validate_training_data(training_data=training_data)

        # Warns about overwriting previously-fit models:
        if self._is_fitted:
            warnings.warn("This model instance has already been fitted. Calling fit() will overwrite existing "
                          "parameters.", UserWarning)

        print("\nReVAR Parameter Estimation\n"
              "==========================\n"
              "Number of time-steps in training data:  %d\n" % training_data.shape[0])
        start_time = time.time()

        # Run ReVAR pre-processing to normalize the data and compute a spatial PCA:
        principal_coefficients = self.pre_processing(training_data=training_data,
                                                     percent_variance=percent_variance)

        print("> Long-Range AR model fitting started...")
        # First the Long-Range AR model to the principal coefficients:
        super().fit(training_data=principal_coefficients,
                    cutoff_frequency=cutoff_frequency,
                    tps_block_size=tps_block_size,
                    print_statements=False)
        print("> Long-Range AR model fitting completed.\n")

        runtime_in_seconds = time.time() - start_time
        elapsed_time = str(timedelta(seconds=runtime_in_seconds))
        print("==========================")
        print(f"ReVAR Parameter Estimation completed in {elapsed_time} (hr:min:sec)\n")

        # Total number of parameters estimated from data:
        print(f"Total number of parameters: ", self.num_parameters)



[docs]
    def run(self, num_time_steps, initial_vectors=None, print_statements=False):
        """
        Runs the **ReVAR** data synthesis algorithm using estimated parameters from training data, as set by either the
        ``self.fit`` or ``self.load`` instance methods. This model generates synthetic data with the same statistics as
        training data.

        Args:
            num_time_steps (int): the number of time-steps of synthetic data to generate.
            initial_vectors (ndarray, optional): [Default=None] numpy 2-D array of shape (data vector dimensionality,
                number of time lags) containing the initial data vectors used by the data synthesis algorithm.

                - If set to None, generates initial vectors that have the same spatial distribution as the principal
                  coefficients of training data.

            print_statements (str, optional): [Default=False] whether to print out ``LongRangeAR`` statements.

        Returns:
            **synthetic_data** (*ndarray*) -- numpy 3-D array of shape (``num_time_steps``, image height, image width)
            containing the model's output synthetic data.

        Raises:
            RuntimeError: if the attribute ``self._if_fitted`` is not True (indicating that the model has not been fit).
        """
        if not self._is_fitted:
            raise RuntimeError("Model is not fitted. Please call 'fit()' or 'load()' before running generation.")

        print("\nReVAR Data Synthesis\n"
              "====================\n"
              f"Generating {num_time_steps} time-steps of synthetic data...\n")
        start_time = time.time()

        # Uses PCA to generate the initial vectors (which have the same spatial distribution as the principal
        # coefficients of training data):
        if initial_vectors is not None:
            assert (initial_vectors.shape == (self.vector_dimensionality, max(self.time_lags)))
        else:
            initial_white_noise = np.random.normal(size=(self.principal_components.shape[0], max(self.time_lags)))
            initial_vectors = np.multiply(np.sqrt(self.pc_variances)[:, None], initial_white_noise,
                                          out=initial_white_noise)

        # Generates synthetic data using the Long-Range AR model:
        synthetic_data = super().run(num_time_steps=num_time_steps,
                                     initial_vectors=initial_vectors,
                                     print_statements=False)

        # Projects the output of the AR model back onto the standard coordinate space:
        synthetic_data = np.dot(self.principal_components, synthetic_data)

        # Puts the data in the correct units:
        synthetic_data = (np.multiply(synthetic_data, self.standard_deviation_vector[:, np.newaxis]) +
                          self.mean_vector[:, np.newaxis])

        # Switch from vectors to images:
        synthetic_data = utils.vec_to_img(data_vec=synthetic_data.T, mask=self.data_mask)

        runtime_in_seconds = time.time() - start_time
        elapsed_time = str(timedelta(seconds=runtime_in_seconds))
        print("ReVAR Data Synthesis completed in {} (hr:min:sec)\n".format(elapsed_time))

        return synthetic_data



[docs]
    def save(self, save_file):
        """
        Saves all necessary information to re-construct the trained ``ReVAR`` model with a new instance.

        Args:
            save_file (str): directory of the file to which the data will be saved
        """
        assert (type(save_file) == str)
        print(f"Saving ReVAR model to {os.path.abspath(save_file)}...")
        valid_indices_of_prediction_weights_array = (self.prediction_window_indices != -1)
        prediction_weights = self.prediction_weights[valid_indices_of_prediction_weights_array]

        save_arrays = {'prediction_window_mask': self._base_prediction_window_mask,
                       'noise_modulation': self.noise_modulation, 'residuals_mean': self.residuals_mean,
                       'prediction_weights': prediction_weights, 'time_lags': self.time_lags,
                       'principal_components': self.principal_components, 'pc_variances': self.pc_variances,
                       'standard_deviation_vector': self.standard_deviation_vector, 'mean_vector': self.mean_vector,
                       'data_mask': self.data_mask}

        if self.prediction_subspace_dimension is not None:
            save_arrays['prediction_subspace_dimension'] = self.prediction_subspace_dimension

        if self.low_pass_filter_params is not None:
            save_arrays['low_pass_filter_params'] = self.low_pass_filter_params

        np.savez(save_file, **save_arrays)
        print(f"ReVAR model saved to {os.path.abspath(save_file)}.\n")



[docs]
    def load(self, load_file, print_statements=False):
        """
        Loads the ``ReVAR`` model information as saved by the ``self.save`` method and re-constructs the model.

        Args:
            load_file (str): directory of the file from which the data will be loaded
            print_statements (str, optional): [Default=False] whether to print out ``LongRangeAR`` loading statements.
        """
        assert (type(load_file) == str)
        print(f"Loading ReVAR model from {os.path.abspath(load_file)}...")
        super().load(load_file=load_file, print_statements=print_statements)

        # Load data containing instance variables:
        data = np.load(file=load_file, allow_pickle=True)

        # Ensure that the loaded model is compatible with the current model:
        self.data_mask = data['data_mask']

        # Saves PCA instance variables:
        self.principal_components = data['principal_components']
        self.pc_variances = data['pc_variances']
        self.standard_deviation_vector = data['standard_deviation_vector']
        self.mean_vector = data['mean_vector']

        print(f"Finished loading ReVAR model from {os.path.abspath(load_file)}.")

        # Total number of parameters estimated from data:
        print(f"Total number of parameters: ", self.num_parameters, '\n')



[docs]
    def pre_processing(self, training_data, percent_variance=None):
        """
        Pre-processing step of **ReVAR** parameter estimation: i) normalize the training data by its sample mean and
        standard deviation vectors and ii) compute a spatial PCA from the normalized training data.

        Args:
            training_data (ndarray): numpy 3-D array of shape (number of images, image height, image width) containing
                the data to fit.

            percent_variance (float, optional): [Default=None] percent variance of the subspace to use for linear time
                prediction in the ``LongRangeAR`` model.

                - If set to None, linear prediction is applied to all principal coefficients.

        Returns:
            **principal_coefficients** (*ndarray*) -- numpy 2-D array of shape (vector dimensionality, number of images)
            containing the principal coefficient vectors of the (normalized) training data.
        """
        assert (training_data.ndim == 3)

        print("> ReVAR pre-processing started...")
        # Data normalization: remove mean and standard deviation vectors
        data_matrix = utils.img_to_vec(image_data=training_data, mask=self.data_mask).T
        self.mean_vector = np.average(data_matrix, axis=1)
        data_mean_removed = data_matrix - self.mean_vector[:, np.newaxis]
        self.standard_deviation_vector = np.sqrt(np.sum(data_mean_removed ** 2, axis=1) / training_data.shape[0])

        # If there are (temporally) constant components, allow normalization:
        with np.errstate(divide='ignore', invalid='ignore'):
            data_normalized = data_mean_removed / self.standard_deviation_vector[:, np.newaxis]

        # Handle the case of zero variance:
        data_normalized = np.nan_to_num(data_normalized, nan=0.0, posinf=0.0, neginf=0.0)

        # Take a spatial PCA of the data:
        self.principal_components, self.pc_variances = pca.compute_pca(data=data_normalized)[1:]

        # Compute the principal coefficients:
        principal_coefficients = np.dot(self.principal_components.T, data_normalized)

        # Calculates the number of components to include in the subspace:
        if percent_variance is not None:
            num_coefficients = pca.find_top_principal_components(self.pc_variances, percent_variance)

            # Re-sets the prediction window indices of the linear time prediction model:
            self.create_model_structure(prediction_subspace_dimension=num_coefficients)

        print("> ReVAR pre-processing completed.\n")

        return principal_coefficients


    def __validate_training_data(self, training_data):
        """
        Ensures that the training data is compatible with the ``ReVAR`` class and the data mask of the model instance.

        Args:
            training_data (ndarray): numpy 3-D array of shape (number of images, image height, image width) containing
                the data to fit.

        Raises:
            ValueError: if the training data is not compatible with the class or with ``self.data_mask``.
        """
        if training_data.ndim != 3:
            raise ValueError(f"Training data must be 2-dimensional, got shape {training_data.shape}.")

        # Make sure the training data images have the same shape as self.data_mask:
        if (training_data.shape[1], training_data.shape[2]) != self.data_mask.shape:
            raise ValueError(f"Training data images must have the same shape as self.data_mask,"
                             f" got shape {(training_data.shape[1], training_data.shape[2])}.")

        if training_data.shape[0] == 0:
            raise ValueError(f"Training data vector must have at least one time sample.")

        # Ensure that the training data has no values of "nan" inside of self.data_mask:
        mask_compatibility = np.isnan(utils.img_to_vec(image_data=training_data, mask=self.data_mask))
        if mask_compatibility.any():
            num_incompatible_components = np.count_nonzero(mask_compatibility)
            raise ValueError(f"Training data is incompatible with self.data_mask, "
                             f"found {num_incompatible_components} incompatible values in data.")

    @property
    def num_parameters(self):
        """
        Calculates the number of parameters in the model. These parameters are estimated from training data and used
        to generate synthetic data.

        Returns:
            **total_num_params** (*int*) -- total number of parameters in the model.
        """
        num_pixels = self.data_mask.sum()
        total_num_params = self.num_prediction_weights + 2 * num_pixels * (num_pixels + 2) + self.num_low_pass_filters

        return total_num_params