import numpy as np
import aomodel.utils as utils
import aomodel.pca as pca
from aomodel.long_range_ar import LongRangeAR
import time
from datetime import timedelta
import os
import warnings
# Approved for public release; distribution is unlimited. Public Affairs release approval # AFRL-2026-1309.
[docs]
class ReVAR(LongRangeAR):
"""
Implements the **Re-whitened Vector AutoRegression (ReVAR)** algorithm to generate synthetic time series of images
with the same spatial and temporal statistics as an input data set. For a thorough description of this model, see
:ref:`Theory`.
Args:
data_mask (ndarray, optional): [Default=None] numpy 2-D boolean array of shape (image height, image width)
indicating which 2-D indices of each time-step correspond to valid pixel values.
- If set to None, then ``load_file`` must be provided. In this case, the class loads the mask from the file.
time_lags (Union[int, list, ndarray], optional): [Default=1] either an integer, list, or numpy 1-D array
indicating the time lags to use for the model.
- If ``time_lags`` is an integer, the function uses the previous time-steps up to this integer value.
- If ``time_lags`` is a list or numpy 1-D array, the model uses these time lags.
num_low_pass_filters (int, optional): [Default=0] the number of low-pass filters to use in the linear time
predictive model.
load_file (str, optional): [Default=None] directory to a file from which the model's instance variables can
be loaded.
prediction_window_structure (PredictionWindowStructure, optional): [Default=None] an instance of a sub-class
of ``PredictionWindowStructure`` implementing get_mask(). Passed as input to the constructor of
``LongRangeAR``.
- If set to None, the method builds a prediction window structure from `**kwargs`.
**kwargs (dict, optional): Keyword arguments passed to ``LongRangeAR``.
- **prediction_subspace_dimension** (*int*) -- number of (top) vector components to use for linear time
prediction.
- **low_pass_filter_params** (*ndarray*) -- numpy 1-D array of length ``num_low_pass_filters`` containing
the parameters of each low-pass filter.
"""
def __init__(self, data_mask=None, time_lags=1, num_low_pass_filters=0, load_file=None,
prediction_window_structure=None, **kwargs):
if data_mask is None:
assert (load_file is not None)
num_pixels = None
else:
assert (data_mask.dtype == bool)
assert (data_mask.ndim == 2)
num_pixels = data_mask.sum()
# Initializes instance variables:
self.data_mask = data_mask
self.principal_components = None
self.pc_variances = None
self.standard_deviation_vector = None
self.mean_vector = None
print("\nInitializing ReVAR model...")
super().__init__(vector_dimensionality=num_pixels, time_lags=time_lags,
num_low_pass_filters=num_low_pass_filters, load_file=load_file,
prediction_window_structure=prediction_window_structure, print_statements=False, **kwargs)
if load_file is None:
print("ReVAR model initialized. Use ReVAR.fit() to fit this model to data or ReVAR.load() to load a "
"pre-trained model.\n")
[docs]
def fit(self, training_data, percent_variance=None, cutoff_frequency=None, tps_block_size=None):
"""
Estimates the parameters of **ReVAR** from training data. Uses ``LongRangeAR.fit`` to compute the prediction
weights, low-pass filter parameters (if used), and noise modulation matrix.
Args:
training_data (ndarray): numpy 3-D array of shape (number of images, image height, image width) containing
the data to fit.
percent_variance (float, optional): [Default=None] percent variance of the subspace to use for linear time
prediction in the **Long-Range AR** model.
- If set to None, linear prediction is applied to all principal coefficients.
cutoff_frequency (float, optional): [Default=None] cutoff frequency (in units of cycles/sample) to use for
estimation of the low-pass filter parameters.
- If set to None and low-pass filters are used, this method estimates the cutoff frequency as the peak
frequency of the temporal power spectrum (TPS) of the training data.
tps_block_size (int, optional): [Default=None] time-block size for calculating the TPS of the principal
coefficients.
- If set to None and the low-pass filter parameters need to be estimated, the function finds the block
size so that the TPS estimates are averaged over 100 blocks. This ensures that the signal-to-noise
ratio of each 1-D TPS estimate is at least 10.
"""
self.__validate_training_data(training_data=training_data)
# Warns about overwriting previously-fit models:
if self._is_fitted:
warnings.warn("This model instance has already been fitted. Calling fit() will overwrite existing "
"parameters.", UserWarning)
print("\nReVAR Parameter Estimation\n"
"==========================\n"
"Number of time-steps in training data: %d\n" % training_data.shape[0])
start_time = time.time()
# Run ReVAR pre-processing to normalize the data and compute a spatial PCA:
principal_coefficients = self.pre_processing(training_data=training_data,
percent_variance=percent_variance)
print("> Long-Range AR model fitting started...")
# First the Long-Range AR model to the principal coefficients:
super().fit(training_data=principal_coefficients,
cutoff_frequency=cutoff_frequency,
tps_block_size=tps_block_size,
print_statements=False)
print("> Long-Range AR model fitting completed.\n")
runtime_in_seconds = time.time() - start_time
elapsed_time = str(timedelta(seconds=runtime_in_seconds))
print("==========================")
print(f"ReVAR Parameter Estimation completed in {elapsed_time} (hr:min:sec)\n")
# Total number of parameters estimated from data:
print(f"Total number of parameters: ", self.num_parameters)
[docs]
def run(self, num_time_steps, initial_vectors=None, print_statements=False):
"""
Runs the **ReVAR** data synthesis algorithm using estimated parameters from training data, as set by either the
``self.fit`` or ``self.load`` instance methods. This model generates synthetic data with the same statistics as
training data.
Args:
num_time_steps (int): the number of time-steps of synthetic data to generate.
initial_vectors (ndarray, optional): [Default=None] numpy 2-D array of shape (data vector dimensionality,
number of time lags) containing the initial data vectors used by the data synthesis algorithm.
- If set to None, generates initial vectors that have the same spatial distribution as the principal
coefficients of training data.
print_statements (str, optional): [Default=False] whether to print out ``LongRangeAR`` statements.
Returns:
**synthetic_data** (*ndarray*) -- numpy 3-D array of shape (``num_time_steps``, image height, image width)
containing the model's output synthetic data.
Raises:
RuntimeError: if the attribute ``self._if_fitted`` is not True (indicating that the model has not been fit).
"""
if not self._is_fitted:
raise RuntimeError("Model is not fitted. Please call 'fit()' or 'load()' before running generation.")
print("\nReVAR Data Synthesis\n"
"====================\n"
f"Generating {num_time_steps} time-steps of synthetic data...\n")
start_time = time.time()
# Uses PCA to generate the initial vectors (which have the same spatial distribution as the principal
# coefficients of training data):
if initial_vectors is not None:
assert (initial_vectors.shape == (self.vector_dimensionality, max(self.time_lags)))
else:
initial_white_noise = np.random.normal(size=(self.principal_components.shape[0], max(self.time_lags)))
initial_vectors = np.multiply(np.sqrt(self.pc_variances)[:, None], initial_white_noise,
out=initial_white_noise)
# Generates synthetic data using the Long-Range AR model:
synthetic_data = super().run(num_time_steps=num_time_steps,
initial_vectors=initial_vectors,
print_statements=False)
# Projects the output of the AR model back onto the standard coordinate space:
synthetic_data = np.dot(self.principal_components, synthetic_data)
# Puts the data in the correct units:
synthetic_data = (np.multiply(synthetic_data, self.standard_deviation_vector[:, np.newaxis]) +
self.mean_vector[:, np.newaxis])
# Switch from vectors to images:
synthetic_data = utils.vec_to_img(data_vec=synthetic_data.T, mask=self.data_mask)
runtime_in_seconds = time.time() - start_time
elapsed_time = str(timedelta(seconds=runtime_in_seconds))
print("ReVAR Data Synthesis completed in {} (hr:min:sec)\n".format(elapsed_time))
return synthetic_data
[docs]
def save(self, save_file):
"""
Saves all necessary information to re-construct the trained ``ReVAR`` model with a new instance.
Args:
save_file (str): directory of the file to which the data will be saved
"""
assert (type(save_file) == str)
print(f"Saving ReVAR model to {os.path.abspath(save_file)}...")
valid_indices_of_prediction_weights_array = (self.prediction_window_indices != -1)
prediction_weights = self.prediction_weights[valid_indices_of_prediction_weights_array]
save_arrays = {'prediction_window_mask': self._base_prediction_window_mask,
'noise_modulation': self.noise_modulation, 'residuals_mean': self.residuals_mean,
'prediction_weights': prediction_weights, 'time_lags': self.time_lags,
'principal_components': self.principal_components, 'pc_variances': self.pc_variances,
'standard_deviation_vector': self.standard_deviation_vector, 'mean_vector': self.mean_vector,
'data_mask': self.data_mask}
if self.prediction_subspace_dimension is not None:
save_arrays['prediction_subspace_dimension'] = self.prediction_subspace_dimension
if self.low_pass_filter_params is not None:
save_arrays['low_pass_filter_params'] = self.low_pass_filter_params
np.savez(save_file, **save_arrays)
print(f"ReVAR model saved to {os.path.abspath(save_file)}.\n")
[docs]
def load(self, load_file, print_statements=False):
"""
Loads the ``ReVAR`` model information as saved by the ``self.save`` method and re-constructs the model.
Args:
load_file (str): directory of the file from which the data will be loaded
print_statements (str, optional): [Default=False] whether to print out ``LongRangeAR`` loading statements.
"""
assert (type(load_file) == str)
print(f"Loading ReVAR model from {os.path.abspath(load_file)}...")
super().load(load_file=load_file, print_statements=print_statements)
# Load data containing instance variables:
data = np.load(file=load_file, allow_pickle=True)
# Ensure that the loaded model is compatible with the current model:
self.data_mask = data['data_mask']
# Saves PCA instance variables:
self.principal_components = data['principal_components']
self.pc_variances = data['pc_variances']
self.standard_deviation_vector = data['standard_deviation_vector']
self.mean_vector = data['mean_vector']
print(f"Finished loading ReVAR model from {os.path.abspath(load_file)}.")
# Total number of parameters estimated from data:
print(f"Total number of parameters: ", self.num_parameters, '\n')
[docs]
def pre_processing(self, training_data, percent_variance=None):
"""
Pre-processing step of **ReVAR** parameter estimation: i) normalize the training data by its sample mean and
standard deviation vectors and ii) compute a spatial PCA from the normalized training data.
Args:
training_data (ndarray): numpy 3-D array of shape (number of images, image height, image width) containing
the data to fit.
percent_variance (float, optional): [Default=None] percent variance of the subspace to use for linear time
prediction in the ``LongRangeAR`` model.
- If set to None, linear prediction is applied to all principal coefficients.
Returns:
**principal_coefficients** (*ndarray*) -- numpy 2-D array of shape (vector dimensionality, number of images)
containing the principal coefficient vectors of the (normalized) training data.
"""
assert (training_data.ndim == 3)
print("> ReVAR pre-processing started...")
# Data normalization: remove mean and standard deviation vectors
data_matrix = utils.img_to_vec(image_data=training_data, mask=self.data_mask).T
self.mean_vector = np.average(data_matrix, axis=1)
data_mean_removed = data_matrix - self.mean_vector[:, np.newaxis]
self.standard_deviation_vector = np.sqrt(np.sum(data_mean_removed ** 2, axis=1) / training_data.shape[0])
# If there are (temporally) constant components, allow normalization:
with np.errstate(divide='ignore', invalid='ignore'):
data_normalized = data_mean_removed / self.standard_deviation_vector[:, np.newaxis]
# Handle the case of zero variance:
data_normalized = np.nan_to_num(data_normalized, nan=0.0, posinf=0.0, neginf=0.0)
# Take a spatial PCA of the data:
self.principal_components, self.pc_variances = pca.compute_pca(data=data_normalized)[1:]
# Compute the principal coefficients:
principal_coefficients = np.dot(self.principal_components.T, data_normalized)
# Calculates the number of components to include in the subspace:
if percent_variance is not None:
num_coefficients = pca.find_top_principal_components(self.pc_variances, percent_variance)
# Re-sets the prediction window indices of the linear time prediction model:
self.create_model_structure(prediction_subspace_dimension=num_coefficients)
print("> ReVAR pre-processing completed.\n")
return principal_coefficients
def __validate_training_data(self, training_data):
"""
Ensures that the training data is compatible with the ``ReVAR`` class and the data mask of the model instance.
Args:
training_data (ndarray): numpy 3-D array of shape (number of images, image height, image width) containing
the data to fit.
Raises:
ValueError: if the training data is not compatible with the class or with ``self.data_mask``.
"""
if training_data.ndim != 3:
raise ValueError(f"Training data must be 2-dimensional, got shape {training_data.shape}.")
# Make sure the training data images have the same shape as self.data_mask:
if (training_data.shape[1], training_data.shape[2]) != self.data_mask.shape:
raise ValueError(f"Training data images must have the same shape as self.data_mask,"
f" got shape {(training_data.shape[1], training_data.shape[2])}.")
if training_data.shape[0] == 0:
raise ValueError(f"Training data vector must have at least one time sample.")
# Ensure that the training data has no values of "nan" inside of self.data_mask:
mask_compatibility = np.isnan(utils.img_to_vec(image_data=training_data, mask=self.data_mask))
if mask_compatibility.any():
num_incompatible_components = np.count_nonzero(mask_compatibility)
raise ValueError(f"Training data is incompatible with self.data_mask, "
f"found {num_incompatible_components} incompatible values in data.")
@property
def num_parameters(self):
"""
Calculates the number of parameters in the model. These parameters are estimated from training data and used
to generate synthetic data.
Returns:
**total_num_params** (*int*) -- total number of parameters in the model.
"""
num_pixels = self.data_mask.sum()
total_num_params = self.num_prediction_weights + 2 * num_pixels * (num_pixels + 2) + self.num_low_pass_filters
return total_num_params