Source code for pyValEIA.utils.clean

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# DISTRIBUTION STATEMENT A: Approved for public release. Distribution is
# unlimited.
# ----------------------------------------------------------------------------
"""Functions for cleaning data products."""

import numpy as np

from pyValEIA.utils import filters


[docs] def mad_tec_clean(mad_tec_meas, mad_std_meas, mad_mlat, mlat_val, max_nan=20): """Clean Madrigal TEC data. Parameters ---------- mad_tec_meas : array-like averaged TEC over longitude and time mad_std_meas : array-like Standard deviation of `mad_tec_meas` mad_mlat : array-like magnetic laittude of `mad_tec_meas` mlat_val : int magnetic latitude cutoff max_nan : float or int Maximum acceptable percent nan values in a pass (default=20) """ # minimum is 20 degree cutoff on either side # filter by by magnetic latitude (start with given mlat_val) mad_tec_lat = mad_tec_meas[abs(mad_mlat) < mlat_val] mad_std_lat = mad_std_meas[abs(mad_mlat) < mlat_val] if np.all(mad_tec_lat[np.isfinite(mad_tec_lat)] < 5): mad_tec_lat[:] = np.nan mad_std_lat[:] = np.nan nan_perc = (np.isnan(mad_tec_lat).mean() * 100) if nan_perc != 100: # Remove oultier tec values out_tec = filters.detect_outliers(mad_tec_lat) mad_tec_lat[out_tec] = np.nan mad_std_lat[out_tec] = np.nan # calculate nan percent nan_perc = np.isnan(mad_tec_lat).mean() * 100 mlat_try = mlat_val # if nan_perc is greater than max_nan, # we want to try to get it below 20 until we hit max_nan degrees mag lat if (nan_perc > max_nan) & (nan_perc < 80): while (nan_perc > max_nan) & (mlat_try >= max_nan) & (nan_perc < 80): mlat_try = mlat_try - 1 mad_tec_lat = mad_tec_meas[abs(mad_mlat) < mlat_try] mad_std_lat = mad_std_meas[abs(mad_mlat) < mlat_try] # remove oultier tec values out_tec = filters.detect_outliers(mad_tec_lat) mad_tec_lat[out_tec] = np.nan mad_std_lat[out_tec] = np.nan # calculate nan percent nan_perc = np.isnan(mad_tec_lat).mean() * 100 # if all data is below 5, then remove completely if np.all(mad_tec_lat[np.isfinite(mad_tec_lat)] < 5): mad_tec_lat[:] = np.nan mad_std_lat[:] = np.nan # calculate nan percent one final time nan_perc = np.isnan(mad_tec_lat).mean() * 100 return mad_tec_lat, mad_std_lat, nan_perc, mlat_try