2022-10-18 18:41:20 +00:00
|
|
|
# Copyright (C) 2022 Richard Stiskalek
|
|
|
|
# This program is free software; you can redistribute it and/or modify it
|
|
|
|
# under the terms of the GNU General Public License as published by the
|
|
|
|
# Free Software Foundation; either version 3 of the License, or (at your
|
|
|
|
# option) any later version.
|
|
|
|
#
|
|
|
|
# This program is distributed in the hope that it will be useful, but
|
|
|
|
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
|
|
|
# Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU General Public License along
|
|
|
|
# with this program; if not, write to the Free Software Foundation, Inc.,
|
|
|
|
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
2022-10-20 22:28:44 +00:00
|
|
|
"""
|
|
|
|
Utilility functions for manipulation structured arrays.
|
|
|
|
"""
|
2022-10-18 18:41:20 +00:00
|
|
|
|
|
|
|
|
|
|
|
import numpy
|
|
|
|
|
2022-10-20 22:28:44 +00:00
|
|
|
|
2022-10-18 18:41:20 +00:00
|
|
|
def cols_to_structured(N, cols):
|
|
|
|
"""
|
|
|
|
Allocate a structured array from `cols`.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
N : int
|
|
|
|
Structured array size.
|
|
|
|
cols: list of tuples
|
|
|
|
Column names and dtypes. Each tuple must written as `(name, dtype)`.
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
out : structured array
|
|
|
|
Initialised structured array.
|
|
|
|
"""
|
|
|
|
if not isinstance(cols, list) and all(isinstance(c, tuple) for c in cols):
|
|
|
|
raise TypeError("`cols` must be a list of tuples.")
|
|
|
|
|
|
|
|
dtype = {"names": [col[0] for col in cols],
|
|
|
|
"formats": [col[1] for col in cols]}
|
|
|
|
return numpy.full(N, numpy.nan, dtype=dtype)
|
|
|
|
|
|
|
|
|
|
|
|
def add_columns(arr, X, cols):
|
|
|
|
"""
|
|
|
|
Add new columns to a record array `arr`. Creates a new array.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
arr : record array
|
|
|
|
The record array to add columns to.
|
|
|
|
X : (list of) 1-dimensional array(s) or 2-dimensional array
|
|
|
|
Columns to be added.
|
|
|
|
cols : str or list of str
|
|
|
|
Column names to be added.
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
out : record array
|
|
|
|
The new record array with added values.
|
|
|
|
"""
|
|
|
|
# Make sure cols is a list of str and X a 2D array
|
|
|
|
cols = [cols] if isinstance(cols, str) else cols
|
|
|
|
if isinstance(X, numpy.ndarray) and X.ndim == 1:
|
|
|
|
X = X.reshape(-1, 1)
|
|
|
|
if isinstance(X, list) and all(x.ndim == 1 for x in X):
|
|
|
|
X = numpy.vstack([X]).T
|
|
|
|
if len(cols) != X.shape[1]:
|
|
|
|
raise ValueError("Number of columns of `X` does not match `cols`.")
|
|
|
|
if arr.size != X.shape[0]:
|
|
|
|
raise ValueError("Number of rows of `X` does not match size of `arr`.")
|
|
|
|
|
|
|
|
# Get the new data types
|
|
|
|
dtype = arr.dtype.descr
|
|
|
|
for i, col in enumerate(cols):
|
|
|
|
dtype.append((col, X[i, :].dtype.descr[0][1]))
|
|
|
|
|
|
|
|
# Fill in the old array
|
|
|
|
out = numpy.full(arr.size, numpy.nan, dtype=dtype)
|
|
|
|
for col in arr.dtype.names:
|
|
|
|
out[col] = arr[col]
|
|
|
|
for i, col in enumerate(cols):
|
|
|
|
out[col] = X[:, i]
|
|
|
|
|
|
|
|
return out
|
|
|
|
|
2022-10-20 22:28:44 +00:00
|
|
|
|
2022-10-18 18:41:20 +00:00
|
|
|
def rm_columns(arr, cols):
|
|
|
|
"""
|
|
|
|
Remove columns `cols` from a record array `arr`. Creates a new array.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
arr : record array
|
|
|
|
The record array to remove columns from.
|
|
|
|
cols : str or list of str
|
|
|
|
Column names to be removed.
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
out : record array
|
|
|
|
Record array with removed columns.
|
|
|
|
"""
|
|
|
|
# Check columns we wish to delete are in the array
|
|
|
|
cols = [cols] if isinstance(cols, str) else cols
|
|
|
|
for col in cols:
|
|
|
|
if col not in arr.dtype.names:
|
|
|
|
raise ValueError("Column `{}` not in `arr`.".format(col))
|
|
|
|
|
|
|
|
# Get a new dtype without the cols to be deleted
|
|
|
|
new_dtype = []
|
|
|
|
for dtype, name in zip(arr.dtype.descr, arr.dtype.names):
|
|
|
|
if name not in cols:
|
|
|
|
new_dtype.append(dtype)
|
|
|
|
|
|
|
|
# Allocate a new array and fill it in.
|
|
|
|
out = numpy.full(arr.size, numpy.nan, new_dtype)
|
|
|
|
for name in out.dtype.names:
|
|
|
|
out[name] = arr[name]
|
|
|
|
|
|
|
|
return out
|
|
|
|
|
|
|
|
|
|
|
|
def list_to_ndarray(arrs, cols):
|
|
|
|
"""
|
|
|
|
Convert a list of structured arrays of CSiBORG simulation catalogues to
|
|
|
|
an 3-dimensional array.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
arrs : list of structured arrays
|
|
|
|
List of CSiBORG catalogues.
|
|
|
|
cols : str or list of str
|
|
|
|
Columns to be extracted from the CSiBORG catalogues.
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
out : 3-dimensional array
|
|
|
|
Catalogue array of shape `(n_realisations, n_samples, n_cols)`, where
|
|
|
|
`n_samples` is the maximum number of samples over the CSiBORG
|
|
|
|
catalogues.
|
|
|
|
"""
|
|
|
|
if not isinstance(arrs, list):
|
|
|
|
raise TypeError("`arrs` must be a list of structured arrays.")
|
|
|
|
cols = [cols] if isinstance(cols, str) else cols
|
|
|
|
|
2022-10-20 22:28:44 +00:00
|
|
|
Narr = len(arrs)
|
2022-10-18 18:41:20 +00:00
|
|
|
Nobj_max = max([arr.size for arr in arrs])
|
|
|
|
Ncol = len(cols)
|
|
|
|
# Preallocate the array and fill it
|
|
|
|
out = numpy.full((Narr, Nobj_max, Ncol), numpy.nan)
|
|
|
|
for i in range(Narr):
|
|
|
|
Nobj = arrs[i].size
|
|
|
|
for j in range(Ncol):
|
|
|
|
out[i, :Nobj, j] = arrs[i][cols[j]]
|
|
|
|
return out
|
2022-10-20 22:28:44 +00:00
|
|
|
|
|
|
|
|
|
|
|
def array_to_structured(arr, cols):
|
|
|
|
"""
|
|
|
|
Create a structured array from a 2-dimensional array.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
arr : 2-dimensional array
|
|
|
|
Original array of shape `(n_samples, n_cols)`.
|
|
|
|
cols : list of str
|
|
|
|
Columns of the structured array
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
out : structured array
|
|
|
|
The output structured array.
|
|
|
|
"""
|
|
|
|
cols = [cols] if isinstance(cols, str) else cols
|
|
|
|
if arr.ndim != 2 and arr.shape[1] != len(cols):
|
|
|
|
raise TypeError("`arr` must be a 2-dimensional array of "
|
|
|
|
"shape `(n_samples, n_cols)`.")
|
|
|
|
|
|
|
|
dtype = {"names": cols, "formats": [arr.dtype] * len(cols)}
|
|
|
|
out = numpy.full(arr.shape[0], numpy.nan, dtype=dtype)
|
|
|
|
for i, col in enumerate(cols):
|
|
|
|
out[col] = arr[:, i]
|
|
|
|
|
|
|
|
return out
|
|
|
|
|
|
|
|
|
|
|
|
def flip_cols(arr, col1, col2):
|
|
|
|
"""
|
|
|
|
Flip values in columns `col1` and `col2`. `arr` is passed by reference and
|
|
|
|
is not explicitly returned back.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
arr : structured array
|
|
|
|
The array whose columns are to be converted.
|
|
|
|
col1 : str
|
|
|
|
The first column name.
|
|
|
|
col2 : str
|
|
|
|
The second column name.
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
nothing
|
|
|
|
"""
|
|
|
|
dum = numpy.copy(arr[col1])
|
|
|
|
arr[col1] = arr[col2]
|
|
|
|
arr[col2] = dum
|