Module pyl4c.apps.nature
Tools for working with L4SM Nature Run data.
TODO: Refactor to use numpy.ravel_multi_index
?
Classes
class NatureRunNetCDF4 (file_path: str)
-
Expand source code
class NatureRunNetCDF4(object): ''' Represents an L4SM Nature Run netCDF4 dataset. Contains methods for converting from the compact, 1D "tile-space" to a 2D EASE-Grid 2.0. Would have liked to subclass netCDF4.Dataset directly, but it is part of a Cython library so I can't interrogate its class structure. Parameters ---------- file_path : str The NetCDF4 file path ''' def __init__(self, file_path: str): self._dataset_file_path = file_path self._grid = 'M09' try: self.dataset = netCDF4.Dataset(file_path) except OSError: raise OSError('Due to a bug in netCDF4, you MUST import netCDF4 BEFORE h5py; import this module (calibration) first if using in a script') @property def variables(self): return self.dataset.variables @cached_property def _col_idx(self): return np.array(self.dataset.variables['IG'][:]) @cached_property def _row_idx(self): return np.array(self.dataset.variables['JG'][:]) @classmethod def index_bulk(cls, nc, variable, tile_idx, reducer = 'mean'): ''' A more efficient way of indexing, assuming that tile indices are already known. This avoids the expensive mapping of the same set of EASE-Grid 2.0 coordinates to tile space, and can be applied to multiple tile-space netCDF4 files. Parameters ---------- nc : netCDF4.Dataset The netCDF4 file to sample from variable : str Name of the variable tile_idx : list or tuple Tile-space indices Returns ------- numpy.ndarray ''' target_variable = nc.variables[variable] # Minimize the extent of the array we need to read from the file; # e.g., tile space indices are [0, 1, ... N] but we may only need # to index between A and B, so instead of reading from 0 all the way # to N, just read between A (tmin) and B (tmax). tmin, tmax = (min(tile_idx), max(tile_idx) + 1) subarray = target_variable[:,tmin:tmax] # Check if we're working with a 3-hourly L4SM variable if target_variable.ndim == 2: value_store = [ getattr(subarray[:,idx - tmin], reducer)(axis = 0) for idx in tile_idx ] else: value_store = [ subarray[idx - tmin] for idx in tile_idx ] return np.array(value_store) def as_tile(self, array): 'Converts a 2D EASE-Grid 2.0 array to tile format' new_array = np.ones(self._row_idx.shape) * np.nan for i in range(0, self._row_idx.shape[0]): row, col = self.tile_to_ease2(i) new_array[i] = array[row, col] return new_array def tile_to_ease2(self, tile_idx): 'Maps 1D tile coordinates to 2D EASE-Grid 2.0 coordinates' return (self._row_idx[tile_idx], self._col_idx[tile_idx]) def ease2_to_tile(self, row, col): 'Maps 2D EASE-Grid 2.0 coordinates to 1D tile coordinates' length = self._row_idx.size match = np.logical_and(self._row_idx == row, self._col_idx == col) if not np.any(match): raise ValueError("No matching row-column index pair was found; requested cell might be outside of the model domain") return int(np.arange(length)[match][0]) def index(self, variable, indices): ''' Returns values at the given indices, based on row-column pairs from a 2D EASE-Grid 2.0. NOTE: It is MUCH faster (by an order of magnitude) to use the class method index_bulk() if you need to extract the same indices from multiple tile-space netCDF4 files. The recommended pattern is based on this function's signature, where the `tile_idx` are computed first and then passed to index_bulk(). Parameters ---------- variable : str Name of the netCDF4 variable to inflate indices : tuple or list Sequence of (row, column) 2-tuples Returns ------- numpy.ndarray ''' assert hasattr(indices, 'index'), 'Indices should be a sequence of (row, column) pairs' target_variable = self.dataset.variables[variable] tile_idx = [ # This is what takes the longest self.ease2_to_tile(row, col) if row is not None and col is not None else None for row, col in indices ] return self.__class__.index_bulk(self, variable, tile_idx) def inflated(self, variable, dtype = np.float32, nodata = -9999): ''' Returns a "tile space" variable (1D) as a 2D NumPy array, calculating a daily average as needed. Parameters ---------- variable : str Name of the netCDF4 variable to inflate dtype : type The NumPy data type to use for the resulting array nodata : int or float The NoData value to use in the output array Returns ------- numpy.ndarray ''' target_variable = self.dataset.variables[variable] shp = EASE2_GRID_PARAMS[self._grid]['shape'] result = np.ones(shp, dtype = dtype) * nodata # Check if we're working with a 3-hourly L4SM variable if target_variable.ndim == 2 and target_variable.shape[0] == 8: avg = np.array(target_variable[:]).mean(axis = 0) for i in range(0, target_variable.shape[1]): # Calculate daily mean (collapse first axis) row, col = self.tile_to_ease2(i) result[row, col] = avg[i] # Or, it's a daily mean ("1200z") file with a trivial first axis if target_variable.ndim == 2 and target_variable.shape[0] == 1: target_variable = np.array(target_variable).ravel() for i in range(0, target_variable.shape[0]): row, col = self.tile_to_ease2(i) result[row, col] = target_variable[i] return result
Represents an L4SM Nature Run netCDF4 dataset. Contains methods for converting from the compact, 1D "tile-space" to a 2D EASE-Grid 2.0.
Would have liked to subclass netCDF4.Dataset directly, but it is part of a Cython library so I can't interrogate its class structure.
Parameters
file_path
:str
- The NetCDF4 file path
Static methods
def index_bulk(nc, variable, tile_idx, reducer='mean')
-
A more efficient way of indexing, assuming that tile indices are already known. This avoids the expensive mapping of the same set of EASE-Grid 2.0 coordinates to tile space, and can be applied to multiple tile-space netCDF4 files.
Parameters
nc
:netCDF4.Dataset
- The netCDF4 file to sample from
variable
:str
- Name of the variable
tile_idx
:list
ortuple
- Tile-space indices
Returns
numpy.ndarray
Instance variables
prop variables
-
Expand source code
@property def variables(self): return self.dataset.variables
Methods
def as_tile(self, array)
-
Expand source code
def as_tile(self, array): 'Converts a 2D EASE-Grid 2.0 array to tile format' new_array = np.ones(self._row_idx.shape) * np.nan for i in range(0, self._row_idx.shape[0]): row, col = self.tile_to_ease2(i) new_array[i] = array[row, col] return new_array
Converts a 2D EASE-Grid 2.0 array to tile format
def ease2_to_tile(self, row, col)
-
Expand source code
def ease2_to_tile(self, row, col): 'Maps 2D EASE-Grid 2.0 coordinates to 1D tile coordinates' length = self._row_idx.size match = np.logical_and(self._row_idx == row, self._col_idx == col) if not np.any(match): raise ValueError("No matching row-column index pair was found; requested cell might be outside of the model domain") return int(np.arange(length)[match][0])
Maps 2D EASE-Grid 2.0 coordinates to 1D tile coordinates
def index(self, variable, indices)
-
Expand source code
def index(self, variable, indices): ''' Returns values at the given indices, based on row-column pairs from a 2D EASE-Grid 2.0. NOTE: It is MUCH faster (by an order of magnitude) to use the class method index_bulk() if you need to extract the same indices from multiple tile-space netCDF4 files. The recommended pattern is based on this function's signature, where the `tile_idx` are computed first and then passed to index_bulk(). Parameters ---------- variable : str Name of the netCDF4 variable to inflate indices : tuple or list Sequence of (row, column) 2-tuples Returns ------- numpy.ndarray ''' assert hasattr(indices, 'index'), 'Indices should be a sequence of (row, column) pairs' target_variable = self.dataset.variables[variable] tile_idx = [ # This is what takes the longest self.ease2_to_tile(row, col) if row is not None and col is not None else None for row, col in indices ] return self.__class__.index_bulk(self, variable, tile_idx)
Returns values at the given indices, based on row-column pairs from a 2D EASE-Grid 2.0.
NOTE: It is MUCH faster (by an order of magnitude) to use the class method index_bulk() if you need to extract the same indices from multiple tile-space netCDF4 files. The recommended pattern is based on this function's signature, where the
tile_idx
are computed first and then passed to index_bulk().Parameters
variable
:str
- Name of the netCDF4 variable to inflate
indices
:tuple
orlist
- Sequence of (row, column) 2-tuples
Returns
numpy.ndarray
def inflated(self, variable, dtype=numpy.float32, nodata=-9999)
-
Expand source code
def inflated(self, variable, dtype = np.float32, nodata = -9999): ''' Returns a "tile space" variable (1D) as a 2D NumPy array, calculating a daily average as needed. Parameters ---------- variable : str Name of the netCDF4 variable to inflate dtype : type The NumPy data type to use for the resulting array nodata : int or float The NoData value to use in the output array Returns ------- numpy.ndarray ''' target_variable = self.dataset.variables[variable] shp = EASE2_GRID_PARAMS[self._grid]['shape'] result = np.ones(shp, dtype = dtype) * nodata # Check if we're working with a 3-hourly L4SM variable if target_variable.ndim == 2 and target_variable.shape[0] == 8: avg = np.array(target_variable[:]).mean(axis = 0) for i in range(0, target_variable.shape[1]): # Calculate daily mean (collapse first axis) row, col = self.tile_to_ease2(i) result[row, col] = avg[i] # Or, it's a daily mean ("1200z") file with a trivial first axis if target_variable.ndim == 2 and target_variable.shape[0] == 1: target_variable = np.array(target_variable).ravel() for i in range(0, target_variable.shape[0]): row, col = self.tile_to_ease2(i) result[row, col] = target_variable[i] return result
Returns a "tile space" variable (1D) as a 2D NumPy array, calculating a daily average as needed.
Parameters
variable
:str
- Name of the netCDF4 variable to inflate
dtype
:type
- The NumPy data type to use for the resulting array
nodata
:int
orfloat
- The NoData value to use in the output array
Returns
numpy.ndarray
def tile_to_ease2(self, tile_idx)
-
Expand source code
def tile_to_ease2(self, tile_idx): 'Maps 1D tile coordinates to 2D EASE-Grid 2.0 coordinates' return (self._row_idx[tile_idx], self._col_idx[tile_idx])
Maps 1D tile coordinates to 2D EASE-Grid 2.0 coordinates