csiborgtools/scripts_plots/plt_utils.py

# Copyright (C) 2023 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

import numpy
from scipy.stats import binned_statistic
from scipy.special import erf

dpi = 600
fout = "../plots/"
mplstyle = ["science"]


def latex_float(*floats, n=2):
    """
    Convert a float or a list of floats to a LaTeX string(s). Taken from [1].

    Parameters
    ----------
    floats : float or list of floats
        The float(s) to be converted.
    n : int, optional
        The number of significant figures to be used in the LaTeX string.

    Returns
    -------
    latex_floats : str or list of str
        The LaTeX string(s) representing the float(s).

    References
    ----------
    [1] https://stackoverflow.com/questions/13490292/format-number-using-latex-notation-in-python  # noqa
    """
    latex_floats = [None] * len(floats)
    for i, f in enumerate(floats):
        float_str = "{0:.{1}g}".format(f, n)
        if "e" in float_str:
            base, exponent = float_str.split("e")
            latex_floats[i] = r"{0} \times 10^{{{1}}}".format(base,
                                                              int(exponent))
        else:
            latex_floats[i] = float_str

    if len(floats) == 1:
        return latex_floats[0]
    return latex_floats


def nan_weighted_average(arr, weights=None, axis=None):
    if weights is None:
        weights = numpy.ones_like(arr)

    valid_entries = ~numpy.isnan(arr)

    # Set NaN entries in arr to 0 for computation
    arr = numpy.where(valid_entries, arr, 0)

    # Set weights of NaN entries to 0
    weights = numpy.where(valid_entries, weights, 0)

    # Compute the weighted sum and the sum of weights along the axis
    weighted_sum = numpy.sum(arr * weights, axis=axis)
    sum_weights = numpy.sum(weights, axis=axis)

    return weighted_sum / sum_weights


def nan_weighted_std(arr, weights=None, axis=None, ddof=0):
    if weights is None:
        weights = numpy.ones_like(arr)

    valid_entries = ~numpy.isnan(arr)

    # Set NaN entries in arr to 0 for computation
    arr = numpy.where(valid_entries, arr, 0)

    # Set weights of NaN entries to 0
    weights = numpy.where(valid_entries, weights, 0)

    # Calculate weighted mean
    weighted_mean = numpy.sum(
        arr * weights, axis=axis) / numpy.sum(weights, axis=axis)

    # Calculate the weighted variance
    variance = numpy.sum(
        weights * (arr - numpy.expand_dims(weighted_mean, axis))**2, axis=axis)
    variance /= numpy.sum(weights, axis=axis) - ddof

    return numpy.sqrt(variance)


def compute_error_bars(x, y, xbins, sigma):
    bin_indices = numpy.digitize(x, xbins)
    y_medians = numpy.array([numpy.median(y[bin_indices == i])
                             for i in range(1, len(xbins))])

    lower_pct = 100 * 0.5 * (1 - erf(sigma / numpy.sqrt(2)))
    upper_pct = 100 - lower_pct

    y_lower = numpy.full(len(y_medians), numpy.nan)
    y_upper = numpy.full(len(y_medians), numpy.nan)

    for i in range(len(y_medians)):
        if numpy.sum(bin_indices == i + 1) == 0:
            continue

        y_lower[i] = numpy.percentile(y[bin_indices == i + 1], lower_pct)
        y_upper[i] = numpy.percentile(y[bin_indices == i + 1], upper_pct)

    yerr = (y_medians - numpy.array(y_lower), numpy.array(y_upper) - y_medians)

    return y_medians, yerr


def normalize_hexbin(hb):
    hexagon_counts = hb.get_array()
    normalized_counts = hexagon_counts / hexagon_counts.sum()
    hb.set_array(normalized_counts)
    hb.set_clim(normalized_counts.min(), normalized_counts.max())
Quijote kNN adding (#62) * Fix small bug * Add fiducial observers * Rename 1D knn * Add new bounds system * rm whitespace * Add boudns * Add simname to paths * Add fiducial obserevrs * apply bounds only if not none * Add TODO * add simnames * update script * Fix distance bug * update yaml * Update file reading * Update gitignore * Add plots * add check if empty list * add func to obtaining cross * Update nb * Remove blank lines * update ignroes * loop over a few ics * update gitignore * add comments 2023-05-16 00:30:10 +02:00			`# Copyright (C) 2023 Richard Stiskalek`
			`# This program is free software; you can redistribute it and/or modify it`
			`# under the terms of the GNU General Public License as published by the`
			`# Free Software Foundation; either version 3 of the License, or (at your`
			`# option) any later version.`
			`#`
			`# This program is distributed in the hope that it will be useful, but`
			`# WITHOUT ANY WARRANTY; without even the implied warranty of`
			`# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General`
			`# Public License for more details.`
			`#`
			`# You should have received a copy of the GNU General Public License along`
			`# with this program; if not, write to the Free Software Foundation, Inc.,`
			`# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.`

More plotting (#74) * Add a new plot * Add a binned trend * Fix bug * Improve plot further * Add new plotting * add max overlap * edit get_overlap * Add max overlap plot * Update plot * Add max overlap key * add max dist flag * Improve plotting 2023-07-03 16:35:10 +02:00			`import numpy`
			`from scipy.stats import binned_statistic`
New plots (#85) * Update verbosity messages * Update verbosity messags * Update more verbosity flags * Update the iterator settings * Add basic plots * Update verbosity flags * Update arg parsre * Update plots * Remove some older code * Fix some definitions * Update plots * Update plotting * Update plots * Add support functions * Update nb * Improve plots, move back to scripts * Update plots * pep8 * Add max overlap plot * Add blank line * Upload changes * Update changes * Add weighted stats * Remove * Add import * Add Max's matching * Edit submission * Add paths to Max's matching * Fix matching * Edit submission * Edit plot * Add max overlap separation plot * Add periodic distance * Update overlap summaries * Add nsim0 for Max matvhing * Add Max's agreement plot * Add Quijote for Max method * Update ploitting * Update name 2023-08-18 20:20:47 +02:00			`from scipy.special import erf`
More plotting (#74) * Add a new plot * Add a binned trend * Fix bug * Improve plot further * Add new plotting * add max overlap * edit get_overlap * Add max overlap plot * Update plot * Add max overlap key * add max dist flag * Improve plotting 2023-07-03 16:35:10 +02:00
Small updates 2023-06-05 23:28:37 +02:00			`dpi = 600`
Quijote kNN adding (#62) * Fix small bug * Add fiducial observers * Rename 1D knn * Add new bounds system * rm whitespace * Add boudns * Add simname to paths * Add fiducial obserevrs * apply bounds only if not none * Add TODO * add simnames * update script * Fix distance bug * update yaml * Update file reading * Update gitignore * Add plots * add check if empty list * add func to obtaining cross * Update nb * Remove blank lines * update ignroes * loop over a few ics * update gitignore * add comments 2023-05-16 00:30:10 +02:00			`fout = "../plots/"`
Lagrangian patch + HMF calculation (#65) * Rename lagpatch * Fix old bug * Fix small bug * Add number of cells calculation * Fix a small bug * Rename column * Move file * Small changes * Edit style * Add plot script * Add delta2ncells * Add HMF calculation * Move definition around * Add HMF plot * pep8 * Update HMF plotting routine * Small edit 2023-06-01 15:45:52 +02:00			`mplstyle = ["science"]`
Better plots (#73) * Edits paths of saved files * Add upper threshold options * Add upper threshold options * add latex_float option * Add weighted stats * add new plot 2023-06-28 16:22:42 +02:00

			`def latex_float(*floats, n=2):`
			`"""`
			`Convert a float or a list of floats to a LaTeX string(s). Taken from [1].`

			`Parameters`
			`----------`
			`floats : float or list of floats`
			`The float(s) to be converted.`
			`n : int, optional`
			`The number of significant figures to be used in the LaTeX string.`

			`Returns`
			`-------`
			`latex_floats : str or list of str`
			`The LaTeX string(s) representing the float(s).`

			`References`
			`----------`
			`[1] https://stackoverflow.com/questions/13490292/format-number-using-latex-notation-in-python # noqa`
			`"""`
			`latex_floats = [None] * len(floats)`
			`for i, f in enumerate(floats):`
			`float_str = "{0:.{1}g}".format(f, n)`
			`if "e" in float_str:`
			`base, exponent = float_str.split("e")`
			`latex_floats[i] = r"{0} \times 10^{{{1}}}".format(base,`
			`int(exponent))`
			`else:`
			`latex_floats[i] = float_str`

			`if len(floats) == 1:`
			`return latex_floats[0]`
			`return latex_floats`
More plotting (#74) * Add a new plot * Add a binned trend * Fix bug * Improve plot further * Add new plotting * add max overlap * edit get_overlap * Add max overlap plot * Update plot * Add max overlap key * add max dist flag * Improve plotting 2023-07-03 16:35:10 +02:00

New plots (#85) * Update verbosity messages * Update verbosity messags * Update more verbosity flags * Update the iterator settings * Add basic plots * Update verbosity flags * Update arg parsre * Update plots * Remove some older code * Fix some definitions * Update plots * Update plotting * Update plots * Add support functions * Update nb * Improve plots, move back to scripts * Update plots * pep8 * Add max overlap plot * Add blank line * Upload changes * Update changes * Add weighted stats * Remove * Add import * Add Max's matching * Edit submission * Add paths to Max's matching * Fix matching * Edit submission * Edit plot * Add max overlap separation plot * Add periodic distance * Update overlap summaries * Add nsim0 for Max matvhing * Add Max's agreement plot * Add Quijote for Max method * Update ploitting * Update name 2023-08-18 20:20:47 +02:00			`def nan_weighted_average(arr, weights=None, axis=None):`
			`if weights is None:`
			`weights = numpy.ones_like(arr)`
More plotting (#74) * Add a new plot * Add a binned trend * Fix bug * Improve plot further * Add new plotting * add max overlap * edit get_overlap * Add max overlap plot * Update plot * Add max overlap key * add max dist flag * Improve plotting 2023-07-03 16:35:10 +02:00
New plots (#85) * Update verbosity messages * Update verbosity messags * Update more verbosity flags * Update the iterator settings * Add basic plots * Update verbosity flags * Update arg parsre * Update plots * Remove some older code * Fix some definitions * Update plots * Update plotting * Update plots * Add support functions * Update nb * Improve plots, move back to scripts * Update plots * pep8 * Add max overlap plot * Add blank line * Upload changes * Update changes * Add weighted stats * Remove * Add import * Add Max's matching * Edit submission * Add paths to Max's matching * Fix matching * Edit submission * Edit plot * Add max overlap separation plot * Add periodic distance * Update overlap summaries * Add nsim0 for Max matvhing * Add Max's agreement plot * Add Quijote for Max method * Update ploitting * Update name 2023-08-18 20:20:47 +02:00			`valid_entries = ~numpy.isnan(arr)`
More plotting (#74) * Add a new plot * Add a binned trend * Fix bug * Improve plot further * Add new plotting * add max overlap * edit get_overlap * Add max overlap plot * Update plot * Add max overlap key * add max dist flag * Improve plotting 2023-07-03 16:35:10 +02:00
New plots (#85) * Update verbosity messages * Update verbosity messags * Update more verbosity flags * Update the iterator settings * Add basic plots * Update verbosity flags * Update arg parsre * Update plots * Remove some older code * Fix some definitions * Update plots * Update plotting * Update plots * Add support functions * Update nb * Improve plots, move back to scripts * Update plots * pep8 * Add max overlap plot * Add blank line * Upload changes * Update changes * Add weighted stats * Remove * Add import * Add Max's matching * Edit submission * Add paths to Max's matching * Fix matching * Edit submission * Edit plot * Add max overlap separation plot * Add periodic distance * Update overlap summaries * Add nsim0 for Max matvhing * Add Max's agreement plot * Add Quijote for Max method * Update ploitting * Update name 2023-08-18 20:20:47 +02:00			`# Set NaN entries in arr to 0 for computation`
			`arr = numpy.where(valid_entries, arr, 0)`

			`# Set weights of NaN entries to 0`
			`weights = numpy.where(valid_entries, weights, 0)`

			`# Compute the weighted sum and the sum of weights along the axis`
			`weighted_sum = numpy.sum(arr * weights, axis=axis)`
			`sum_weights = numpy.sum(weights, axis=axis)`

			`return weighted_sum / sum_weights`


			`def nan_weighted_std(arr, weights=None, axis=None, ddof=0):`
			`if weights is None:`
			`weights = numpy.ones_like(arr)`

			`valid_entries = ~numpy.isnan(arr)`

			`# Set NaN entries in arr to 0 for computation`
			`arr = numpy.where(valid_entries, arr, 0)`

			`# Set weights of NaN entries to 0`
			`weights = numpy.where(valid_entries, weights, 0)`

			`# Calculate weighted mean`
			`weighted_mean = numpy.sum(`
			`arr * weights, axis=axis) / numpy.sum(weights, axis=axis)`

			`# Calculate the weighted variance`
			`variance = numpy.sum(`
			`weights * (arr - numpy.expand_dims(weighted_mean, axis))**2, axis=axis)`
			`variance /= numpy.sum(weights, axis=axis) - ddof`

			`return numpy.sqrt(variance)`


			`def compute_error_bars(x, y, xbins, sigma):`
			`bin_indices = numpy.digitize(x, xbins)`
			`y_medians = numpy.array([numpy.median(y[bin_indices == i])`
			`for i in range(1, len(xbins))])`

			`lower_pct = 100 * 0.5 * (1 - erf(sigma / numpy.sqrt(2)))`
			`upper_pct = 100 - lower_pct`

			`y_lower = numpy.full(len(y_medians), numpy.nan)`
			`y_upper = numpy.full(len(y_medians), numpy.nan)`

			`for i in range(len(y_medians)):`
			`if numpy.sum(bin_indices == i + 1) == 0:`
			`continue`

			`y_lower[i] = numpy.percentile(y[bin_indices == i + 1], lower_pct)`
			`y_upper[i] = numpy.percentile(y[bin_indices == i + 1], upper_pct)`

			`yerr = (y_medians - numpy.array(y_lower), numpy.array(y_upper) - y_medians)`

			`return y_medians, yerr`


			`def normalize_hexbin(hb):`
			`hexagon_counts = hb.get_array()`
			`normalized_counts = hexagon_counts / hexagon_counts.sum()`
			`hb.set_array(normalized_counts)`
			`hb.set_clim(normalized_counts.min(), normalized_counts.max())`