Source code for bossa.postprocessing

# TODO: Add module documentation
# TODO: Complete documentation


import logging
import warnings
import gc
import fnmatch
from concurrent.futures import ProcessPoolExecutor
from time import time, sleep
from datetime import datetime
from pathlib import Path

import inquirer
import numpy as np
import tables as tb
import pandas as pd
#from pandarallel import pandarallel  # pandarallel breaks float types
import astropy.constants as ct
import astropy.units as u
from astropy.cosmology import WMAP9 as cosmo
from scipy.integrate import quad
from scipy.optimize import newton
from scipy.interpolate import interp1d

#import sys
#sys.path.append('..')
from bossa.sfh import ChruslinskaSFRD
from bossa.imf import Star, IGIMF
from bossa.zams import ZAMSSystemGenerator
from bossa.utils import pull_snmass1, pull_snmass2, chirp_mass, bintype, mass_ratio, create_logger, \
    ZOH_to_FeH, format_time
from bossa.constants import LOG_PATH, COMPAS_PROC_OUTPUT_DIR_PATH, COMPAS_WORK_PATH, \
    BINARIES_CORRELATED_TABLE_PATH, COMPAS_21XX_PROC_OUTPUT_DIR_PATH, \
    COMPAS_12XX_PROC_OUTPUT_DIR_PATH, COMPAS_21XX_GRIDS_PATH, COMPAS_12XX_GRIDS_PATH, \
    IGIMF_ZAMS_DIR_PATH, COMPACT_OBJ_DIR_PATH

CO_CODES = [10, 11, 12, 13, 14]  # HeWD, COWD, ONeMgWD, NS, BH
PARQUET_SETTINGS = {'engine': 'pyarrow',
                    'compression': 'snappy',
                    'partition_cols': ['Mass_ZAMS1', 'LogP_ZAMS'],
                    'use_threads': True}
MODULE_NAME = __name__



[docs]
class COMPASOutputTrimmer:
    SNTAB_BASE_COLS_TOLOAD = [
        'Mass(SN)',
        'Mass(CP)',
        'Unbound'
    ]
    SNTAB_PSR_COLS_TOLOAD = [
        'Mass_Core@CO(SN)',
        'Mass_He_Core@CO(SN)',
        'Mass_CO_Core@CO(SN)',
        'Fallback_Fraction(SN)',
        'Kick_Magnitude(uK)',
        'SystemicSpeed',
        'Time'
    ]
    DCOTAB_COLS_TOLOAD = [
        'Mass(1)',
        'Mass(2)',
        'Coalescence_Time'
    ]
    SYSPARAMTAB_COLS_TOLOAD = [
        'Initial_Mass(1)',
        'Initial_Mass(2)',
        'Mass_Ratio',
        'Stellar_Type(1)',
        'Stellar_Type(2)',
        'Metallicity@ZAMS(1)',
        'SemiMajorAxis@ZAMS',
        'Eccentricity@ZAMS'
    ]
    PSRTAB_COLS_TOLOAD = [
        'MT_History',
        'Mass(1)',
        'Mass(2)',
        'Pulsar_Mag_Field(1)',
        'Pulsar_Mag_Field(2)',
        'Pulsar_Spin_Down(1)',
        'Pulsar_Spin_Down(2)',
        'Pulsar_Spin_Freq(1)',
        'Pulsar_Spin_Freq(2)',
        'SemiMajorAxis',
        'Stellar_Type(1)',
        'Stellar_Type(2)',
        'Time',
        'dT'
    ]
    BASE_COLS_TOSAVE = [
        'Mass_ZAMS1',
        'Mass_ZAMS2',
        'Stellar_Type1',
        'Stellar_Type2',
        'Metallicity_ZAMS',
        'SemiMajorAxis_ZAMS',
        'Eccentricity_ZAMS',
        'Mass_PostSN1',
        'Mass_PostSN2',
        'Coalescence_Time',
        'SEED',
        'Chirp_Mass',
        'Total_Mass_PostSN',
        'Total_Mass_ZAMS',
        'Mass_Ratio_PostSN',
        'Mass_Ratio_ZAMS',
        'LogP_ZAMS',
        'Binary_Type'
    ]
    PSR_SN_DOUBLE_COLS_TOSAVE = [
        'PSR_SN_Mass_Core1',
        'PSR_SN_Mass_Core2',
        'PSR_SN_Mass_He_Core1',
        'PSR_SN_Mass_He_Core2',
        'PSR_SN_Mass_CO_Core1',
        'PSR_SN_Mass_CO_Core2',
        'PSR_SN_Fallback_Fraction1',
        'PSR_SN_Fallback_Fraction2',
        'PSR_SN_Kick_Magnitude1',
        'PSR_SN_Kick_Magnitude2',
        'PSR_SN_SystemicSpeed1',
        'PSR_SN_SystemicSpeed2',
    ]
    PSR_DO_COLS_TOSAVE = [
        'PSR_DO_PreSN_Time',
        'PSR_DO_PreSN_Omega1',
        'PSR_DO_PreSN_Omega2',
        'PSR_DO_PreSN_Radius1',
        'PSR_DO_PreSN_Radius2'
    ]
    PSR_COLS_TOSAVE = BASE_COLS_TOSAVE + [
        'PSR_MT_History',
        'PSR_Mass1',
        'PSR_Mass2',
        'PSR_B1',
        'PSR_B2',
        'PSR_Pdot1',
        'PSR_Pdot2',
        'PSR_Omega1',
        'PSR_Omega2',
        'PSR_SemiMajorAxis',
        'PSR_Type1',
        'PSR_Type2',
        'PSR_Time',
        'PSR_dT',
    ] + PSR_SN_DOUBLE_COLS_TOSAVE

    def __init__(self, compas_output_option, compas_sample_option, only_load_dcos=True, load_unbound_systems=False,
                 save_pulsar_columns=False, parent_logger=None):
        self.compas_output_path = Path(COMPAS_WORK_PATH, compas_output_option, compas_sample_option)
        self.trimmed_output_path = Path(COMPAS_PROC_OUTPUT_DIR_PATH, compas_output_option.name + '.snappy.parquet')
        self.input_paths = list()
        self.only_load_dcos = only_load_dcos
        self.load_unbound_systems = load_unbound_systems
        self._save_pulsar_columns = self._set_save_pulsar_columns(save_pulsar_columns)
        self._columns_to_load = None
        self._columns_to_save = None
        self._float32_cols = None
        self.logger = self._get_logger(parent_logger)

    @property
    def columns_to_save(self):
        if self._columns_to_save is None:
            if self._save_pulsar_columns:
                self._columns_to_save = self.PSR_COLS_TOSAVE
            else:
                self._columns_to_save = self.BASE_COLS_TOSAVE
        return self._columns_to_save

    @property
    def float32_cols(self):
        if self._float32_cols is None:
            self._float32_cols = list(col for col in self.columns_to_save if col not in [
                'Stellar_Type1',
                'Stellar_Type2',
                'SEED',
                'Binary_Type',
                'PSR_MT_History',
                'PSR_Type1',
                'PSR_Type2'
            ])
        return self._float32_cols

    @property
    def columns_to_load(self):
        if self._columns_to_load is None:
            if self.only_load_dcos:
                self._columns_to_load = self.SYSPARAMTAB_COLS_TOLOAD + self.DCOTAB_COLS_TOLOAD
            elif self._save_pulsar_columns:
                self._columns_to_load = (self.SYSPARAMTAB_COLS_TOLOAD + self.DCOTAB_COLS_TOLOAD +
                                         self.SNTAB_BASE_COLS_TOLOAD + self.SNTAB_PSR_COLS_TOLOAD +
                                         self.PSRTAB_COLS_TOLOAD)
                self._columns_to_load = list(set(self._columns_to_load))
            else:
                self._columns_to_load = (self.SYSPARAMTAB_COLS_TOLOAD + self.DCOTAB_COLS_TOLOAD +
                                         self.SNTAB_BASE_COLS_TOLOAD)
        return self._columns_to_load

    def _set_save_pulsar_columns(self, save_pulsar_columns):
        if save_pulsar_columns is True and self.only_load_dcos is True:
            raise ValueError('save_pulsar_columns was passed as True but only_load_dcos is also True.')
        else:
            return save_pulsar_columns

    def _get_logger(self, parent_logger):
        if parent_logger is None:
            loggername = '.'.join([__name__, self.__class__.__name__])
            log_path = Path(LOG_PATH, loggername, datetime.now().strftime('%d-%m-%Y_%H:%M:%S.log'))
            log_path.parent.mkdir(parents=True, exist_ok=True)
            logger = create_logger(name=loggername, fpath=log_path, propagate=False)
        else:
            loggername = '.'.join([__name__, self.__class__.__name__])
            logger = logging.getLogger(name=loggername)
            logger.setLevel(logging.DEBUG)
        return logger

    def _set_paths(self):
        self.input_paths = list(self.compas_output_path.glob('COMPAS_Output*.h5'))

    def _get_psr_sn_double_cols_dict(self, no=1):
        no = str(no)
        col_dict = {
            'Mass_Core@CO(SN)': f'PSR_SN_Mass_Core{no}',
            'Mass_He_Core@CO(SN)': f'PSR_SN_Mass_He_Core{no}',
            'Mass_CO_Core@CO(SN)': f'PSR_SN_Mass_CO_Core{no}',
        }
        return col_dict

    def _get_df_from_hdf(self, hdf_path):
        self.logger.debug(f'Attempting to load {hdf_path}')
        self.logger.debug(f'Columns to load: {self.columns_to_load}')

        empty = False
        try:
            table = tb.open_file(hdf_path, 'r')
        except tb.HDF5ExtError:
            empty = True

        if empty:
            self.logger.error(f'File {hdf_path} empty')
            return None
        else:
            try:
                sysparam_tab = table.root.BSE_System_Parameters
            except tb.NoSuchNodeError:
                self.logger.error(f'No BSE_System_Parameters table in {hdf_path}.')
                print(f'No BSE_System_Parameters table in {hdf_path}.')
                return
            else:
                self.logger.debug(f'Now loading {hdf_path}.')
                start_time = time()
                sort_sysparam_tab = np.argsort(sysparam_tab.SEED)
                full_seed_arr = np.array(sysparam_tab.SEED)[sort_sysparam_tab]
                samplesize = len(full_seed_arr)
                sample_metallicity = sysparam_tab['Metallicity@ZAMS(1)'][0]
                sample_m1 = sysparam_tab['Initial_Mass(1)'][0]
                self.logger.info(f'Table is valid, with M1 = {sample_m1} and Z = {sample_metallicity}.')

            if self.only_load_dcos:
                dcos_present = True
                sns_present = True
                psrs_present = True
                try:
                    dco_tab = table.root.BSE_Double_Compact_Objects
                except tb.NoSuchNodeError:
                    dcos_present = False
                    sns_present = False
                    psrs_present = False
                    self.logger.info('No DCOs produced.')
                else:
                    sort_dco_tab = np.argsort(dco_tab.SEED)
                    dco_seed_arr = np.array(dco_tab.SEED)[sort_dco_tab]

                    sn_tab = table.root.BSE_Supernovae
                    # sort_sn_tab = np.argsort(sn_tab.SEED)
                    sort_sn_tab = np.lexsort((sn_tab.Time, sn_tab.SEED))
                    sn_full_seed_arr = np.array(sn_tab.SEED)[sort_sn_tab]

                    if self.load_unbound_systems:
                        sn_partial_seed_arr = sn_full_seed_arr
                    else:
                        sn_bound_mask = np.invert(np.array(sn_tab.Unbound)[sort_sn_tab].astype(bool))
                        sn_partial_seed_arr = sn_full_seed_arr[sn_bound_mask]
                    sn_bco_mask = np.isin(sn_partial_seed_arr, dco_seed_arr)
                    sn_seed_arr = sn_partial_seed_arr[sn_bco_mask]
                    first_sn_valid_indices = np.unique(np.searchsorted(sn_full_seed_arr,
                                                                       sn_seed_arr,
                                                                       side='left') + 1)
                    second_sn_valid_indices = np.unique(np.searchsorted(sn_full_seed_arr,
                                                                        sn_seed_arr,
                                                                        side='right') - 1)

                    if self._save_pulsar_columns:
                        try:
                            psr_tab = table.root.BSE_Pulsar_Evolution
                        except tb.NoSuchNodeError:
                            psrs_present = False
                            self.logger.info('No PSRs formed.')
                        else:
                            sort_psr_tab = np.lexsort((psr_tab.Time, psr_tab.SEED))
                            psr_partial_seed_arr = np.array(psr_tab.SEED)[sort_psr_tab]
                            psr_bco_mask = np.isin(psr_partial_seed_arr, dco_seed_arr)
                            psr_seed_arr = psr_partial_seed_arr[psr_bco_mask]

            else:
                dcos_present = True
                sns_present = True
                psrs_present = True
                try:
                    dco_tab = table.root.BSE_Double_Compact_Objects
                except tb.NoSuchNodeError:
                    dcos_present = False
                    self.logger.info('No DCOs produced.')
                else:
                    sort_dco_tab = np.argsort(dco_tab.SEED)
                    dco_seed_arr = np.array(dco_tab.SEED)[sort_dco_tab]

                    sn_tab = table.root.BSE_Supernovae
                    # sort_sn_tab = np.argsort(sn_tab.SEED)
                    sort_sn_tab = np.lexsort((sn_tab.Time, sn_tab.SEED))
                    sn_full_seed_arr = np.array(sn_tab.SEED)[sort_sn_tab]

                    if self.load_unbound_systems:
                        sn_seed_arr = sn_full_seed_arr
                    else:
                        sn_bound_mask = np.invert(np.array(sn_tab.Unbound)[sort_sn_tab].astype(bool))
                        sn_seed_arr = sn_full_seed_arr[sn_bound_mask]
                    first_sn_valid_indices = np.unique(np.searchsorted(sn_full_seed_arr,
                                                                       sn_seed_arr,
                                                                       side='left') + 1)
                    second_sn_valid_indices = np.unique(np.searchsorted(sn_full_seed_arr,
                                                                        sn_seed_arr,
                                                                        side='right') - 1)

                if not dcos_present:
                    try:
                        sn_tab = table.root.BSE_Supernovae
                    except tb.NoSuchNodeError:
                        sns_present = False
                        psrs_present = False
                        self.logger.info('No SNs occurred.')
                    else:
                        # sort_sn_tab = np.argsort(sn_tab.SEED)
                        sort_sn_tab = np.lexsort((sn_tab.Time, sn_tab.SEED))
                        sn_full_seed_arr = np.array(sn_tab.SEED)[sort_sn_tab]

                        if self.load_unbound_systems:
                            sn_seed_arr = sn_full_seed_arr
                        else:
                            sn_bound_mask = np.invert(np.array(sn_tab.Unbound)[sort_sn_tab].astype(bool))
                            sn_seed_arr = sn_full_seed_arr[sn_bound_mask]
                        first_sn_valid_indices = np.unique(np.searchsorted(sn_full_seed_arr,
                                                                            sn_seed_arr,
                                                                            side='right') - 1)

                if sns_present and self._save_pulsar_columns:
                    try:
                        psr_tab = table.root.BSE_Pulsar_Evolution
                    except tb.NoSuchNodeError:
                        psrs_present = False
                        self.logger.info('No PSRs formed.')
                    else:
                        #sort_psr_tab = np.lexsort((psr_tab.Time, psr_tab.SEED))
                        sort_psr_tab = np.argsort(psr_tab.SEED)
                        psr_seed_arr = np.array(psr_tab.SEED)[sort_psr_tab]

            df = pd.DataFrame(columns=self.columns_to_load, dtype=np.float32)
            if self._save_pulsar_columns:
                psr_df = pd.DataFrame(columns=self.columns_to_load, dtype=np.float32)
            else:
                psr_df = None

            self.logger.debug('Loading System Parameters table...')
            for col in self.SYSPARAMTAB_COLS_TOLOAD:
                df[col] = np.array(sysparam_tab[col], np.float32)[sort_sysparam_tab]

            if dcos_present:
                self.logger.debug('Loading Double Compact Objects table...')
                fullsample_indices = np.searchsorted(full_seed_arr, dco_seed_arr)
                for col in self.DCOTAB_COLS_TOLOAD:
                    col_arr = np.zeros(samplesize, np.float32)
                    col_arr[fullsample_indices] = np.array(dco_tab[col])[sort_dco_tab]
                    df[col] = col_arr
            else:
                for col in self.DCOTAB_COLS_TOLOAD:
                    df[col] = np.zeros(samplesize, np.float32)

            if sns_present:
                self.logger.debug('Loading Supernovae table...')
                fullsample_indices = np.searchsorted(full_seed_arr, sn_seed_arr[first_sn_valid_indices])
                # print('fukllsampl', len(full_seed_arr), len(sn_seed_arr[second_sn_valid_indices]), len(fullsample_indices))
                for col in self.SNTAB_BASE_COLS_TOLOAD:
                    if col == 'Unbound':
                        # print('Unbound', print(len(second_sn_valid_indices)), np.unique(np.array(sn_tab[col])[second_sn_valid_indices], return_counts=True))
                        col_arr = np.ones(samplesize, np.float32)
                    else:
                        col_arr = np.zeros(samplesize, np.float32)
                    col_arr[fullsample_indices] = np.array(sn_tab[col])[sort_sn_tab][first_sn_valid_indices]
                    df[col] = col_arr
                if self._save_pulsar_columns:
                    self.logger.debug('Loading pulsar columns from the Supernovae table...')
                    if psrs_present:
                        first_sn_fullsample_indices = np.searchsorted(full_seed_arr,
                                                                      sn_seed_arr[first_sn_valid_indices])
                        if dcos_present:
                            second_sn_fullsample_indices = np.searchsorted(full_seed_arr,
                                                                           sn_seed_arr[second_sn_valid_indices])
                        for col in self.SNTAB_PSR_COLS_TOLOAD:
                            col_arr1 = np.zeros(samplesize, np.float32)
                            col_arr2 = np.zeros(samplesize, np.float32)
                            col_arr1[first_sn_fullsample_indices] = (
                                np.array(sn_tab[col]))[sort_sn_tab][first_sn_valid_indices]
                            psr_df[col + '1'] = col_arr1
                            if dcos_present:
                                col_arr2[second_sn_fullsample_indices] = (
                                    np.array(sn_tab[col]))[sort_sn_tab][second_sn_valid_indices]
                                psr_df[col + '2'] = col_arr2
                    else:
                        for col in self.SNTAB_PSR_COLS_TOLOAD:
                            psr_df[col + '1'] = np.zeros(samplesize, np.float32)
                            if dcos_present:
                                psr_df[col + '2'] = np.zeros(samplesize, np.float32)
            else:
                for col in self.SNTAB_BASE_COLS_TOLOAD:
                    df[col] = np.zeros(samplesize, np.float32)
                if self._save_pulsar_columns:
                    for col in self.SNTAB_PSR_COLS_TOLOAD:
                        psr_df[col + '1'] = np.zeros(samplesize, np.float32)
                        if dcos_present:
                            psr_df[col + '2'] = np.zeros(samplesize, np.float32)

            if self._save_pulsar_columns:
                if psr_df is None:
                    psr_df = pd.DataFrame(columns=self.columns_to_load, dtype=np.float32)
                if psrs_present:
                    self.logger.debug('Loading Pulsars table...')
                    fullsample_indices = np.searchsorted(full_seed_arr, psr_seed_arr)
                    for col in self.PSRTAB_COLS_TOLOAD:
                        col_arr = np.zeros(samplesize, np.float32)
                        col_arr[fullsample_indices] = np.array(psr_tab[col])[sort_psr_tab]
                        psr_df[col] = col_arr
                else:
                    for col in self.PSRTAB_COLS_TOLOAD:
                        psr_df[col] = np.zeros(samplesize, np.float32)

            table.close()

            self.logger.debug('Fixing seeds...')
            new_seed_arr = np.empty(samplesize, object)
            for i, seed in enumerate(full_seed_arr):
                new_seed_arr[i] = f'{seed}_1e6Z_{1e6 * sample_metallicity:.0f}_1e3M1_{1e3 * sample_m1:.0f}'
            df['SEED'] = new_seed_arr

            total_time = time() - start_time
            self.logger.debug(f'Done loading table {hdf_path.name}. Elapsed time: {total_time:.6f} s.')
            return df, psr_df

    def _process_df(self, df, psr_df=None):
        self.logger.debug('Processing table...')
        start_time = time()

        self.logger.debug('Fixing final masses...')
        # df.parallel_apply(symmetrize_masses, axis=1)
        df['Mass_PostSN1'] = df.apply(pull_snmass1, axis=1)
        df['Mass_PostSN2'] = df.apply(pull_snmass2, axis=1)
        df = df.drop(['Mass(SN)', 'Mass(CP)', 'Mass(1)', 'Mass(2)'], axis=1)

        self.logger.debug('Computing new mass columns...')
        df['Chirp_Mass'] = df.apply(chirp_mass, axis=1)
        df['Total_Mass_PostSN'] = df['Mass_PostSN1'] + df['Mass_PostSN2']
        df['Initial_Mass(2)'] = df['Initial_Mass(1)'] * df['Mass_Ratio']
        df['Total_Mass_ZAMS'] = df['Initial_Mass(1)'] + df['Initial_Mass(2)']
        df['Mass_Ratio_PostSN'] = df.apply(lambda x: mass_ratio(x['Mass_PostSN1'], x['Mass_PostSN2']), axis=1)
        # df['Mass_Ratio_ZAMS'] = df.apply(lambda x: mass_ratio(x['Initial_Mass(1)'], x['Initial_Mass(2)']), axis=1)
        df['Coalescence_Time'] = np.float32(1e6) * df['Coalescence_Time']  # time in years

        self.logger.debug('Computing ZAMS orbital period...')
        # df['LogP_ZAMS'] = df.parallel_apply(lambda x: np.float32(a_to_logp_table(x['SemiMajorAxis@ZAMS'],
        #                                                                         x['Total_Mass_ZAMS'])), axis=1)
        df['LogP_ZAMS'] = np.zeros(len(df))
        logp_finder = LogPFinder()
        for m1 in df['Initial_Mass(1)'].unique():
            logp_finder.set_m1(float(m1))
            subdf = df[df['Initial_Mass(1)'] == m1]
            subdf['LogP_ZAMS'] = \
                subdf.apply(lambda x: np.float32(logp_finder.a_to_logp_table(x['SemiMajorAxis@ZAMS'],
                                                                             x['Total_Mass_ZAMS'])), axis=1)
            #print(subdf['LogP_ZAMS'])
            df['LogP_ZAMS'].loc[subdf.index.values] = subdf['LogP_ZAMS']

        self.logger.debug('Fixing categorical columns and column names...')
        df['Stellar_Type(1)'] = df['Stellar_Type(1)'].astype(np.int8)
        df['Stellar_Type(2)'] = df['Stellar_Type(2)'].astype(np.int8)
        df['Binary_Type'] = (df['Stellar_Type(1)'].apply(str) + '+' + df['Stellar_Type(2)'].apply(str)).apply(bintype)
        df['Stellar_Type(1)'] = df['Stellar_Type(1)'].astype('category')
        df['Stellar_Type(2)'] = df['Stellar_Type(2)'].astype('category')
        df['Binary_Type'] = df['Binary_Type'].astype('category')

        df.rename(columns={
            'Initial_Mass(1)': 'Mass_ZAMS1',
            'Initial_Mass(2)': 'Mass_ZAMS2',
            'Mass_Ratio': 'Mass_Ratio_ZAMS',
            'Stellar_Type(1)': 'Stellar_Type1',
            'Stellar_Type(2)': 'Stellar_Type2',
            'Metallicity@ZAMS(1)': 'Metallicity_ZAMS',
            'SemiMajorAxis@ZAMS': 'SemiMajorAxis_ZAMS',
            'Eccentricity@ZAMS': 'Eccentricity_ZAMS',
        }, inplace=True)

        if self._save_pulsar_columns and psr_df is not None:
            psr_col_rename_dict = {
                'MT_History': 'PSR_MT_History',
                'Mass(1)': 'PSR_Mass1',
                'Mass(2)': 'PSR_Mass2',
                'Pulsar_Mag_Field(1)': 'PSR_B1',
                'Pulsar_Mag_Field(2)': 'PSR_B2',
                'Pulsar_Spin_Down(1)': 'PSR_Pdot1',
                'Pulsar_Spin_Down(2)': 'PSR_Pdot2',
                'Pulsar_Spin_Freq(1)': 'PSR_Omega1',
                'Pulsar_Spin_Freq(2)': 'PSR_Omega2',
                'SemiMajorAxis': 'PSR_SemiMajorAxis',
                'Stellar_Type(1)': 'PSR_Type1',
                'Stellar_Type(2)': 'PSR_Type2',
                'Time': 'PSR_Time',
                'dT': 'PSR_dT'
            }
            psr_sn_col_rename_dict = {
                'Mass_Core@CO(SN)1': 'PSR_SN_Mass_Core1',
                'Mass_Core@CO(SN)2': 'PSR_SN_Mass_Core2',
                'Mass_He_Core@CO(SN)1': 'PSR_SN_Mass_He_Core1',
                'Mass_He_Core@CO(SN)2': 'PSR_SN_Mass_He_Core2',
                'Mass_CO_Core@CO(SN)1': 'PSR_SN_Mass_CO_Core1',
                'Mass_CO_Core@CO(SN)2': 'PSR_SN_Mass_CO_Core2',
                'Fallback_Fraction(SN)1': 'PSR_SN_Fallback_Fraction1',
                'Fallback_Fraction(SN)2': 'PSR_SN_Fallback_Fraction2',
                'Kick_Magnitude(uK)1': 'PSR_SN_Kick_Magnitude1',
                'Kick_Magnitude(uK)2': 'PSR_SN_Kick_Magnitude2',
                'SystemicSpeed1': 'PSR_SN_SystemicSpeed1',
                'SystemicSpeed2': 'PSR_SN_SystemicSpeed2',
                'Time1': 'PSR_SN_Time1',
                'Time2': 'PSR_SN_Time2'
            }
            psr_new_cols = [psr_col_rename_dict[old_col] for old_col in psr_col_rename_dict]
            psr_new_cols += [psr_sn_col_rename_dict[old_col] for old_col in psr_sn_col_rename_dict]
            psr_df.rename(columns=psr_col_rename_dict, inplace=True)
            psr_df.rename(columns=psr_sn_col_rename_dict, inplace=True)
            df.rename(columns=psr_col_rename_dict, inplace=True)
            df.rename(columns=psr_sn_col_rename_dict, inplace=True)
            for col in psr_new_cols:
                if col in psr_df.columns:
                    df[col] = psr_df[col]
            del psr_df
            gc.collect()
        elif self._save_pulsar_columns:
            raise ValueError('_save_pulsars_columns is True, but psr_df is None.')
        else:
            pass

        df.drop(labels=self.SNTAB_PSR_COLS_TOLOAD, axis=1, inplace=True)

        # pandarallel turns all float32 cols to float64, so we have to convert them back to float32
        for col in self.float32_cols:
            df[col] = pd.to_numeric(df[col], downcast='float')
        elapsed_time = time() - start_time
        self.logger.debug(f'Done. Table processed in {elapsed_time} s.')
        return df


[docs]
    def gen_subdfs(self, df):
        all_soft_cos_df = df[df['Stellar_Type1'].isin(CO_CODES) | df['Stellar_Type2'].isin(CO_CODES)]
        bound_soft_cos_df = all_soft_cos_df[all_soft_cos_df['Unbound'] == 0]
        bound_hard_cos_df = bound_soft_cos_df[bound_soft_cos_df['Stellar_Type1'].isin([13, 14]) & \
                                              bound_soft_cos_df['Stellar_Type2'].isin([13, 14])]
        return all_soft_cos_df, bound_soft_cos_df, bound_hard_cos_df



[docs]
    def trim(self):
        self.logger.info('Initializing trimmer...')
        self._set_paths()
        #pandarallel.initialize()
        total_time0 = time()
        for path in self.input_paths:
            start_time = time()
            df, psr_df = self._get_df_from_hdf(path)
            if df is None:
                warnings.warn('No data loaded from HDF file, stopping trimmer.')
                return
            else:
                df = self._process_df(df, psr_df)
                # allsoft_df, boundsoft_df, boundhard_df = self.gen_subdfs(df)
                write_time0 = time()
                self.logger.info(f'Writing to {self.trimmed_output_path}.')
                df.to_parquet(path=self.trimmed_output_path, **PARQUET_SETTINGS)
                # allsoft_df.to_parquet(path=Path(self.trimmed_output_path.parent, 'allsoft'+self.trimmed_output_path.name), **PARQUET_SETTINGS)
                # boundsoft_df.to_parquet(path=Path(self.trimmed_output_path.parent, 'boundsoft'+self.trimmed_output_path.name), **PARQUET_SETTINGS)
                # boundhard_df.to_parquet(path=Path(self.trimmed_output_path.parent, 'boundhard'+self.trimmed_output_path.name), **PARQUET_SETTINGS)
                write_time = time() - write_time0
                self.logger.debug(f'Done. Table written in {write_time} s.')
                table_time = time() - start_time
                self.logger.debug(f'Total table processing time: {table_time} s.')
        total_time = time() - total_time0
        self.logger.info(f'Done trimming {self.compas_output_path.stem}. Total elapsed time: {total_time} s.')





[docs]
class LogPFinder:

    def __init__(self):
        self.zams = ZAMSSystemGenerator(np.linspace(0.1, 150, 100), BINARIES_CORRELATED_TABLE_PATH)
        self.zams.setup_sampler()
        self.m1group = None
        self.logp_options = None
        self.logp_categories = None


[docs]
    def set_m1(self, m1):
        self.zams.m1_array = m1
        self.m1group = self.zams._get_m1()[1]
        self.logp_options = []
        self.logp_categories = []
        for group in self.m1group._v_children:
            self.logp_options.append(np.float32(self.m1group[group].title))
            self.logp_categories.append(self.m1group[group].title)
        self.logp_options = np.array(self.logp_options)
        self.logp_categories = np.array(self.logp_categories)
        sorting = np.argsort(self.logp_options)
        self.logp_options = self.logp_options[sorting]
        self.logp_categories = self.logp_categories[sorting]



[docs]
    def get_closest_logp(self, logp):
        closest_logp_i = np.argmin(np.abs(self.logp_options - logp))
        closest_logp = self.logp_categories[closest_logp_i]
        return closest_logp



[docs]
    def a_to_logp_table(self, a, m_tot):
        cgs_a = (a * u.au).to(u.cm)
        g = np.sqrt(4 * np.pi ** 2 / (ct.G.cgs * m_tot * ct.M_sun))
        cgs_p = g * cgs_a ** (3 / 2)
        logp = np.log10(cgs_p.to(u.d).value)
        logp_table = self.get_closest_logp(logp)
        # logp_table = np.around(logp, 4)
        return logp_table





[docs]
class CompactBinaryPopulation:
    """Cross-match generated ZAMS populations and COMPAS output to produce a binary compact object population."""

    def __init__(self, print_progress=True, parent_logger=None, canonical_distributions=False):
        self.zams_population_path = None
        self.compas_feh_dict = dict()
        self.compas_feh_options = None
        self.zams_redshift_feh_dict = dict()
        self.n_processes = None
        self.crossmatch_tol = 1e-5
        self.print_progress = print_progress
        self.canonical_initial_distributions = canonical_distributions
        self._compas_proc_output_dir_path = None
        self._compas_grids_folder = None
        self.logger = self._get_logger(parent_logger)

    @property
    def compas_proc_output_dir_path(self):
        if self._compas_proc_output_dir_path is None:
            if self.canonical_initial_distributions:
                self._compas_proc_output_dir_path = COMPAS_21XX_PROC_OUTPUT_DIR_PATH
            else:
                self._compas_proc_output_dir_path = COMPAS_12XX_PROC_OUTPUT_DIR_PATH
        return self._compas_proc_output_dir_path

    @property
    def compas_grids_folder(self):
        if self._compas_grids_folder is None:
            if self.canonical_initial_distributions:
                self._compas_grids_folder = COMPAS_21XX_GRIDS_PATH
            else:
                self._compas_grids_folder = COMPAS_12XX_GRIDS_PATH
        return self._compas_grids_folder

    def _get_logger(self, parent_logger):
        if parent_logger is None:
            loggername = '.'.join([__name__, self.__class__.__name__])
            log_path = Path(LOG_PATH, loggername, datetime.now().strftime('%d-%m-%Y_%H:%M:%S.log'))
            log_path.parent.mkdir(parents=True, exist_ok=True)
            logger = create_logger(name=loggername, fpath=log_path, propagate=False)
        else:
            loggername = '.'.join([__name__, self.__class__.__name__])
            logger = logging.getLogger(name=loggername)
            logger.setLevel(logging.DEBUG)
        return logger

    def _get_compas_samples(self):
        """Load COMPAS output paths and corresponding metallicities."""
        for sample_folder in self.compas_proc_output_dir_path.glob('*.parquet'):
            feh = float(sample_folder.stem.split('=')[1].split('.')[0])/1e2
            self.compas_feh_dict[feh] = sample_folder
        self.compas_feh_options = np.array(sorted(self.compas_feh_dict.keys()))

    def _get_zams_samples(self):
        """Load ZAMS sample paths and corresponding redshift, metallicity and other parameters."""
        for sample_file in IGIMF_ZAMS_DIR_PATH.glob('z=*_feh=*_logsfr=*_1e4ZOH=*_1e2Mgal=*_1e2logn=*_logpool=*_' \
                                                    'igimf_zams_sample.parquet'):
            z, feh, logsfr, zoh, mgal, logn, logpool, *_ = sample_file.stem.split('_')
            z = float(z.split('=')[1])  # sample redshift
            feh = float(feh.split('=')[1])  # sample [Fe/H]
            logsfr = float(logsfr.split('=')[1])  # sample log10(SFR)
            zoh = str(float(zoh.split('=')[1])/1e4)  # sample Z_O/H
            mgal = str(float(mgal.split('=')[1])/1e2)  # sample galaxy stellar mass
            logn = str(float(logn.split('=')[1])/1e2)  # sample log10(galaxy number density)
            logpool = float(logpool.split('=')[1])  # sample log10(mass pool size)
            if z not in self.zams_redshift_feh_dict:
                self.zams_redshift_feh_dict[z] = dict()
            sample_params = {'redshift' : z,
                             'feh' : feh,
                             'logsfr' : logsfr,
                             'zoh' : zoh,
                             'mgal' : mgal,
                             'logn' : logn,
                             'logpoolsize' : logpool,
                             'path' : sample_file
                             }
            self.zams_redshift_feh_dict[z][feh] = sample_params

    def _crossmatch_df(self, zams_df, compas_sample_path, feh):
        """Cross-match a dataframe holding the ZAMS population with the nearest metallicity COMPAS output."""
        # First, set up arrays to hold columns of the final cross-matched dataframe.
        sample_size = len(zams_df)
        m1_col = np.zeros(sample_size, np.float32)
        m2_col = np.zeros(sample_size, np.float32)
        coalescence_t_col = np.zeros(sample_size, np.float32)
        unbound_col = np.ones(sample_size, np.float32)  # whether the binary became unbound following a supernova (bool)
        seed_col = np.empty(sample_size, object)  # unique identifier from binary parameters
        binary_type_col = np.empty(sample_size, object)  # BHBH, BHNS, NSBH, NSNS or isolated (for mass accounting)

        prev_progress = 0
        progress = 0
        hit_count = 0  # successful matches counter
        iteration_times = []  # for progress updates to stdout
        matched_df_i = 0  # index
        for zams_i, row in zams_df.iterrows():
            time0 = time()
            # In the ZAMS sample, the 'Found' m1, as well as LogP, 'Found' q and e, are values taken directly from the
            # same sampling table from which the initial binary grid for COMPAS is built, allowing for an exact match
            # within the table's original float32 precision.
            zams_m1 = np.float32(row['Mass_ZAMS1_Found'])
            zams_logp = np.float32(row['LogOrbitalPeriod_ZAMS'])
            zams_q = np.float32(row['MassRatioFound_ZAMS'])
            zams_e = np.float32(row['Eccentricity_ZAMS'])

            isolated_star = False
            unevolved_binary = False
            if zams_logp == 0.0:
                isolated_star = True
            elif zams_m1*zams_q < 0.1:
                unevolved_binary = True

            if isolated_star:
                #print('ISOLATED')
                match_dict = {'Mass_PostSN1': 0,
                              'Mass_PostSN2': 0,
                              'Unbound' : False,
                              'Coalescence_Time' : 0,
                              'SEED' : 'isolated',
                              'Binary_Type' : 'isolated'}
                match_df = pd.DataFrame(match_dict, index=[0])

            elif unevolved_binary:
                match_dict = {'Mass_PostSN1': 0,
                              'Mass_PostSN2': 0,
                              'Unbound': False,
                              'Coalescence_Time': 0,
                              'SEED': 'unevolved',
                              'Binary_Type': 'unevolved'}
                match_df = pd.DataFrame(match_dict, index=[0])

            else:
                # The COMPAS sample files are structured hierarchically: first-level folders correspond each to a m1, second
                # -level to a m1,logP pair, and third-level files each to a m1,logP,q,e set. The corresponding m1 and logP
                # are stored in the folder title to which the ZAMS binaries are matched.
                compas_path_options = list(compas_sample_path.glob('*'))
                m1_options = np.array([np.float32(path.name.split('=')[1]) for path in compas_path_options])
                # Finding the closet parameters instead of an exact match helps avoid eventual floating-point errors.
                m1path_closest_i = np.argmin(np.abs(m1_options - zams_m1))
                compas_m1_path = compas_path_options[m1path_closest_i]

                logp_path_options = list(compas_m1_path.glob('*'))
                logp_options = np.array([np.float32(path.name.split('=')[1]) for path in logp_path_options])
                logp_path_closest_i = np.argmin(np.abs(logp_options - zams_logp))
                compas_logp_path = logp_path_options[logp_path_closest_i]

                tol = self.crossmatch_tol
                match_df = pd.read_parquet(
                    path=compas_logp_path,
                    engine='pyarrow',
                    filters=[
                        ('Eccentricity_ZAMS', '<=', zams_e + np.float32(tol)),
                        ('Eccentricity_ZAMS', '>=', zams_e - np.float32(tol)),
                        ('Mass_Ratio_ZAMS', '<=', zams_q + np.float32(tol)),
                        ('Mass_Ratio_ZAMS', '>=', zams_q - np.float32(tol))
                    ],
                    columns=[
                        'Mass_PostSN1',
                        'Mass_PostSN2',
                        'Unbound',
                        'Coalescence_Time',
                        'SEED',
                        'Binary_Type',
                        'Mass_Ratio_ZAMS',
                        'Eccentricity_ZAMS'
                    ],
                    use_threads=True
                )

            time1 = time() - time0
            iteration_times.append(time1)

            tolerance_warning = True
            while match_df.SEED.nunique() > 1:
                if self.canonical_initial_distributions:
                    # Because in this case e=0 always, for a given (m1,logp,q) there will be 10 repeated (q,e=0) lines
                    # in the table, which get passed to COMPAS.
                    match_qs = match_df.Mass_Ratio_ZAMS.unique()
                    match_es = match_df.Eccentricity_ZAMS.unique()
                    if len(match_qs) == 1 and len(match_es) == 1:
                        match_df = match_df.iloc[[0]]
                        continue
                if tolerance_warning:
                    print(f'Lowering tolerance for {zams_m1, zams_logp, zams_q, zams_e} in {compas_logp_path}')
                    tolerance_warning = False
                tol *= 0.9
                match_df = pd.read_parquet(
                    path=compas_logp_path,
                    engine='pyarrow',
                    filters=[
                        ('Eccentricity_ZAMS', '<=', zams_e + np.float32(tol)),
                        ('Eccentricity_ZAMS', '>=', zams_e - np.float32(tol)),
                        ('Mass_Ratio_ZAMS', '<=', zams_q + np.float32(tol)),
                        ('Mass_Ratio_ZAMS', '>=', zams_q - np.float32(tol))
                    ],
                    columns=[
                        'Mass_PostSN1',
                        'Mass_PostSN2',
                        'Unbound',
                        'Coalescence_Time',
                        'SEED',
                        'Binary_Type',
                        'Mass_Ratio_ZAMS',
                        'Eccentricity_ZAMS'
                    ],
                    use_threads=True
                )

                if tol <= 1e-8:
                    print('Tolerance too low, checking gridfiles...')
                    gridfile_folder = Path(self.compas_grids_folder, compas_sample_path.name.split('.')[0],
                                           'run_gridfiles')
                    m1_gridfile_options = list(gridfile_folder.glob('*_grid.txt'))
                    m1_grid_options = list(file.name.split('_')[1].split('=')[1] for file in m1_gridfile_options)
                    m1 = np.float32(compas_m1_path.name.split('=')[1])

                    closest_m1_i = np.argmin(np.abs(np.float32(m1_grid_options) - m1))
                    m1_grid = m1_gridfile_options[closest_m1_i]
                    match_seeds = match_df.SEED.to_numpy()
                    seed_suffix = match_seeds[0].split('_')[1:]
                    match_seeds = [seed.split('_')[0] for seed in match_seeds]
                    print(f'Looking for {match_seeds} in {m1_grid}')

                    def look_for_seeds(match_seeds):
                        match_lines = []
                        with m1_grid.open('r') as f:
                            for line in f.readlines():
                                for seed in match_seeds:
                                    if seed in line:
                                        match_lines.append([seed, line])
                                        match_seeds.remove(seed)
                                        if len(match_seeds) == 0:
                                            return match_lines

                    match_lines = look_for_seeds(match_seeds)
                    matched_seed = 0
                    prev_d_logp = 10
                    for seed_line in match_lines:
                        seed, line = seed_line
                        settings = line.split(' ')
                        logp_i = settings.index('--orbital-period')
                        logp = np.log10(np.float32(settings[logp_i+1]))
                        d_logp = np.abs(logp - zams_logp)
                        if d_logp < prev_d_logp:
                            prev_d_logp = d_logp
                            matched_seed = seed

                        match_df = pd.read_parquet(
                            path=compas_logp_path,
                            engine='pyarrow',
                            filters=[
                                ('SEED', '==', '_'.join([matched_seed, *seed_suffix]))
                            ],
                            columns=[
                                'Mass_PostSN1',
                                'Mass_PostSN2',
                                'Unbound',
                                'Coalescence_Time',
                                'SEED',
                                'Binary_Type',
                                'Mass_Ratio_ZAMS',
                                'Eccentricity_ZAMS'
                            ],
                            use_threads=True
                        )

            if match_df.SEED.nunique() == 0:
                #print(zams_m1, zams_logp, zams_q, np.float32(row['MassRatioChoice_ZAMS']), zams_e)
                #print(compas_logp_path)
                full_df = pd.read_parquet(
                    path=compas_logp_path,
                    engine='pyarrow',
                    columns=[
                        'Mass_Ratio_ZAMS',
                        'Eccentricity_ZAMS'
                    ],
                    use_threads=True
                )
                #print(match_df)
                #print(len(match_df), 'LEN')
                #print('q,e pairs', np.array([match_df.Mass_Ratio_ZAMS.to_numpy(),
                #                             match_df.Eccentricity_ZAMS.to_numpy()]).T)
                if len(full_df.Eccentricity_ZAMS.unique()) == 1 and float(full_df.Eccentricity_ZAMS.unique()[0]) == 0.0:
                    max_q = max(full_df.Mass_Ratio_ZAMS.unique())
                    if zams_q > max_q:
                        print('Close massive binary')
                        match_dict = {'Mass_PostSN1': 0,
                                      'Mass_PostSN2': 0,
                                      'Unbound': False,
                                      'Coalescence_Time': 0,
                                      'SEED': 'merged_at_birth',
                                      'Binary_Type': 'merged_at_birth'}
                        match_df = pd.DataFrame(match_dict, index=[0])

                else:
                    full_df = pd.read_parquet(
                        path=compas_logp_path,
                        engine='pyarrow',
                        filters=[
                            ('Mass_Ratio_ZAMS', '<=', zams_q + np.float32(tol)),
                            ('Mass_Ratio_ZAMS', '>=', zams_q - np.float32(tol))
                        ],
                        columns=[
                            'Eccentricity_ZAMS'
                        ],
                        use_threads=True
                    )
                    if zams_e > max(full_df.Eccentricity_ZAMS.unique()):
                        print('Eccentric massive binary')
                        match_dict = {'Mass_PostSN1': 0,
                                      'Mass_PostSN2': 0,
                                      'Unbound': False,
                                      'Coalescence_Time': 0,
                                      'SEED': 'merged_at_birth',
                                      'Binary_Type': 'merged_at_birth'}
                        match_df = pd.DataFrame(match_dict, index=[0])

            if match_df.SEED.nunique() == 1:
                hit_count += 1
                match = match_df.iloc[0]
                m1_col[matched_df_i] = match['Mass_PostSN1']
                m2_col[matched_df_i] = match['Mass_PostSN2']
                coalescence_t_col[matched_df_i] = match['Coalescence_Time']
                unbound_col[matched_df_i] = match['Unbound']
                seed_col[matched_df_i] = match['SEED']
                binary_type_col[matched_df_i] = match['Binary_Type']
                matched_df_i += 1
                progress = 100*matched_df_i/sample_size
            elif match_df.SEED.nunique() == 0:
                print(zams_m1, zams_logp, zams_q, np.float32(row['MassRatioChoice_ZAMS']), zams_e)
                print(compas_logp_path)
                match_df = pd.read_parquet(
                    path=compas_logp_path,
                    engine='pyarrow',
                    filters=[
                        ('Eccentricity_ZAMS', '<=', zams_e + np.float32(5e-1)),
                        ('Eccentricity_ZAMS', '>=', zams_e - np.float32(5e-1)),
                        ('Mass_Ratio_ZAMS', '<=', zams_q + np.float32(5e-1)),
                        ('Mass_Ratio_ZAMS', '>=', zams_q - np.float32(5e-1))
                    ],
                    columns=[
                        'Mass_PostSN1',
                        'Mass_PostSN2',
                        'Unbound',
                        'Coalescence_Time',
                        'SEED',
                        'Binary_Type',
                        'Mass_Ratio_ZAMS',
                        'Eccentricity_ZAMS'
                    ],
                    use_threads=True
                )
                print(match_df)
                print(len(match_df), 'LEN')
                print('q,e pairs', np.array([match_df.Mass_Ratio_ZAMS.to_numpy(),
                                             match_df.Eccentricity_ZAMS.to_numpy()]).T)
                warnings.warn('Could not match system. Consider raising parameter match tolerances.')
                break
            else:
                print(zams_m1, zams_logp, zams_q, zams_e)
                print(match_df)
                warnings.warn('Multiple matches found in COMPAS sample. Consider lowering parameter match tolerances.')
                break

            if self.print_progress and progress > prev_progress+1:
                elapsed_time = np.sum(iteration_times)
                avg_match_rate = matched_df_i/elapsed_time
                expected_time = (sample_size-matched_df_i) / avg_match_rate
                print(f'Progress: {progress:.4f} %    {format_time(elapsed_time)}<'
                      f'{format_time(expected_time)} at {avg_match_rate:.2f} match / s')
                prev_progress = progress

        # As zams_df already holds the initial parameter columns, simply add the final parameter columns to it.
        zams_df['Mass_PostSN1'] = m1_col
        zams_df['Mass_PostSN2'] = m2_col
        zams_df['Coalescence_Time'] = coalescence_t_col
        zams_df['Unbound'] = unbound_col
        zams_df['SEED'] = seed_col
        zams_df['Binary_Type'] = binary_type_col
        return zams_df

    def _pick_sample(self):
        sample_options = []
        for z in self.zams_redshift_feh_dict.keys():
            zams_feh_dict = self.zams_redshift_feh_dict[z]
            for feh in zams_feh_dict.keys():
                zams_dict = zams_feh_dict[feh]
                zams_z = zams_dict['redshift']
                zams_feh = zams_dict['feh']
                sample_option = f'z={zams_z:.3f}, [Fe/H]={zams_feh:.2f}'
                sample_options.append((sample_option,
                                       (zams_z, zams_feh)))

        question = [
            inquirer.List(
                'choice',
                message='Please pick a ZAMS sample to match to the COMPAS output',
                choices=sample_options
            )
        ]
        sample_choice = inquirer.prompt(question)['choice']
        return sample_choice

    def _crossmatch_sample(self, zams_sample_redshift, zams_sample_feh):
        zams_sample_dict = self.zams_redshift_feh_dict[zams_sample_redshift][zams_sample_feh]
        zams_sample_path = zams_sample_dict['path']

        self.logger.info(f'Now matching z={zams_sample_redshift}, [Fe/H]={zams_sample_feh} ZAMS sample ' \
                         f'at {zams_sample_path}.')

        feh_match = self.compas_feh_options[np.argmin(np.abs(self.compas_feh_options - zams_sample_feh))]
        compas_sample_path = self.compas_feh_dict[feh_match]

        matched_sample_path = Path(COMPACT_OBJ_DIR_PATH,
                                   zams_sample_path.stem + f'_compasfeh={zams_sample_feh:.2f}.snappy.parquet')

        self.logger.info(f'Closest COMPAS sample: [Fe/H] = {zams_sample_feh} at {compas_sample_path}')

        zams_df = pd.read_parquet(path=zams_sample_path,
                                  engine='pyarrow',
                                  use_threads=True)
        sub_zams_dfs_array = np.array_split(zams_df, self.n_processes)
        compas_sample_path_array = np.tile(compas_sample_path, self.n_processes)
        feh_match_array = np.tile(feh_match, self.n_processes)

        matched_df_list = []
        with ProcessPoolExecutor(self.n_processes) as executor:
            #for df in executor.map(crossmatch, sub_zams_dfs_array):
            for df in executor.map(self._crossmatch_df, sub_zams_dfs_array, compas_sample_path_array, feh_match_array):
                matched_df_list.append(df)
        matched_df = pd.concat(matched_df_list)
        matched_df.reset_index(inplace=True, drop=True)
        del matched_df_list
        gc.collect()

        matched_m1s = np.unique(matched_df['Mass_ZAMS1_Found'])
        for m1 in matched_m1s:
            temp_df = matched_df[matched_df['Mass_ZAMS1_Found'] == m1]
            temp_df.to_parquet(path=matched_sample_path,
                               engine='pyarrow',
                               compression='snappy',
                               partition_cols=['Mass_ZAMS1_Found',
                                               'LogOrbitalPeriod_ZAMS'],
                               use_threads=True)
            del temp_df
            gc.collect()
        del matched_df
        gc.collect()

    def _crossmatch_single_sample(self):
        sample_choice = self._pick_sample()
        self._crossmatch_sample(*sample_choice)

    def _crossmatch_full_sample(self):
        for z in self.zams_redshift_feh_dict.keys():
            zams_feh_dict = self.zams_redshift_feh_dict[z]
            for feh in zams_feh_dict.keys():
                self._crossmatch_sample(z, feh)


[docs]
    def crossmatch_sample(self):
        self.logger.info('Initializing...')
        self._get_zams_samples()
        self._get_compas_samples()

        print('Please enter the number of parallel processes to run:')
        self.n_processes = int(input())

        choices = ['All', 'Single']
        question = [
            inquirer.List(
                'choice',
                message=f'Cross-match a single ZAMS sample or all ZAMS samples in {IGIMF_ZAMS_DIR_PATH}?',
                choices=choices
            )
        ]
        choice = inquirer.prompt(question)['choice']

        if choice == 'All':
            self._crossmatch_full_sample()
        else:
            self._crossmatch_single_sample()





[docs]
class MergerRates:
    COLS_TO_LOAD_ESSENTIAL = [
        'Unbound',
        'Coalescence_Time',
        'Mass_ZAMS1_Found',
        'Mass_PostSN1',
        'Mass_PostSN2',
        'Binary_Type',
        'SystemMass',
        'CompanionNumber'
    ]
    COLS_TO_LOAD_EXTENDED = [
                                'Mass_ZAMS2_Found',
                                'Mass_ZAMS3_Found',
                                'Mass_ZAMS4_Found',
                                'Mass_ZAMS5_Found',
                                'LogOrbitalPeriod_ZAMS',
                                'Eccentricity_ZAMS'
                            ] + COLS_TO_LOAD_ESSENTIAL
    CATEGORY_COLS = [
        'Unbound',
        'Binary_Type',
        'CompanionNumber'
    ]

    def __init__(self, sample_dir_path, zams_grid_path, sfrd_model, invariant_imf=False,
                 extended_load=False, load_bcos_only=False, min_redshift=0.0, max_redshift=10.0, min_feh=-5.0,
                 max_feh=0.5, progenitor_m_min=0.8, progenitor_m_max=150.0, convert_time=True, new_method=False,
                 parent_logger=None):
        self.sample_dir_path = sample_dir_path
        self.zams_grid_path = zams_grid_path
        self.merger_class = None
        self.sfrd_model = sfrd_model
        self.invariant_imf = invariant_imf
        self.canon_sfrd = invariant_imf
        self.extended_load = extended_load
        self.load_bcos_only = load_bcos_only
        self.min_redshift = min_redshift
        self.max_redshift = max_redshift
        self.min_feh = min_feh
        self.max_feh = max_feh
        self.time_resolution = None
        self.sample_progenitor_m_min = progenitor_m_min
        self.sample_progenitor_m_max = progenitor_m_max
        self._star_m_min = min(0.08, self.sample_progenitor_m_min)
        self._star_m_max = max(150.0, self.sample_progenitor_m_max)
        self._convert_time = convert_time
        self._new_method = new_method
        self.sfrd = self._get_sfrd()
        self._cols_to_load = None
        self._sample_paths = None
        self.sample_properties_dict = dict()
        self.sample_mtot_per_ncomp_dict = dict()
        self.sample_starforming_mass_dict = dict()
        self.sample_redshift_arr = None
        self.ip_redshift_arr = None
        self.sample_feh_arr = None
        self.sample_redshift_feh_bins_dict = dict()
        self.sample_df = None
        self.merger_df = None
        self._full_age_bin_edges = None
        self._full_age_bin_widths = None
        self._full_age_bin_centers = None
        self._physical_age_bin_edges = None
        self._physical_age_bin_widths = None
        self._physical_age_bin_centers = None
        self._dz_dfeh_mrates_dict = dict()
        self._dz_dfeh_dage_mrates_arr = None
        self._per_age_redshift_feh_fits = None
        self._dz_dage_mrates_arr = None
        self._dz_dpopage_mrates_arr = None
        self._dpopage_interpolators = list()
        self._ip_dz_dpopage_mrates_arr = None
        self._ip_dz_dage_mrates_arr = None
        self._per_age_redshift_fits = None
        self.mrate_arr = None
        self.logger = self._get_logger(parent_logger)

    def _get_logger(self, parent_logger):
        if parent_logger is None:
            loggername = '.'.join([__name__, self.__class__.__name__])
            log_path = Path(LOG_PATH, loggername, datetime.now().strftime('%d-%m-%Y_%H:%M:%S.log'))
            log_path.parent.mkdir(parents=True, exist_ok=True)
            logger = create_logger(name=loggername, fpath=log_path, propagate=False)
        else:
            loggername = '.'.join([__name__, self.__class__.__name__])
            logger = logging.getLogger(name=loggername)
            logger.setLevel(logging.DEBUG)
        return logger

    @staticmethod
    def _get_total_bco_mass(row):
        """Calculate the total binary compact object mass for a dataframe row."""
        return row.Mass_PostSN1 + row.Mass_PostSN2

    @staticmethod
    def _get_chirp_mass(row):
        """Calculate the chirp mass for a dataframe row."""
        m1 = row.Mass_PostSN1
        m2 = row.Mass_PostSN2
        if m1 == 0:
            return 0
        else:
            return (m1 * m2) ** (3 / 5) / (m1 + m2) ** (1 / 5)

    @staticmethod
    def _get_merger_age(df):
        """Calculate Universe age at merger for a dataframe row."""
        redshift_zams = df.Redshift_ZAMS.to_numpy()
        age_zams = cosmo.age(redshift_zams).value
        t_col = df.Coalescence_Time.to_numpy()
        age_merger = age_zams + t_col
        return age_merger

    @staticmethod
    def _get_redshift_from_age(age_arr):
        """Calculate redshift from Universe age."""
        redshift_arr = newton(lambda z_arr: cosmo.age(z_arr).value - age_arr, np.zeros(len(age_arr)))
        return redshift_arr

    @staticmethod
    def _get_bin_frequency_heights(x_arr, x_bin_edges):
        bin_frequencies = np.zeros(x_bin_edges.shape[0] - 1, np.float32)
        previous_x_arr_len = len(x_arr)
        for i, (edge0, edge1) in enumerate(zip(x_bin_edges[:-1], x_bin_edges[1:])):
            x_arr = x_arr[x_arr >= edge0]
            x_arr_len = len(x_arr)
            bin_count = previous_x_arr_len - x_arr_len
            previous_x_arr_len = x_arr_len
            bin_frequencies[i] = bin_count / (edge1 - edge0)
        return bin_frequencies

    @staticmethod
    def _get_linear_fit(xy0, xy1):
        x0, y0 = xy0
        x1, y1 = xy1
        slope = (y1 - y0) / (x1 - x0)
        intercept = y0 - slope * x0
        return np.array([x0, x1, slope, intercept])

    @staticmethod
    def _get_linear_fit_area(linear_fit):
        x0, x1, slope, intercept = linear_fit
        if x0 == x1:
            return 0.0
        else:
            return slope * (x1 * x1 - x0 * x0) / 2 + intercept * (x1 - x0)

    @property
    def sfrd_model(self):
        """Which star formation rate density model from Chrulinska et al. (2020) to use."""
        return self._sfrd_model

    @sfrd_model.setter
    def sfrd_model(self, model):
        models = ['lowmet', 'midmet', 'highmet']
        if model not in models:
            raise ValueError(f'sfrd_model must be one of {models}.')
        self._sfrd_model = model

    @property
    def canon_sfrd(self):
        """Whether the star formation rate grid to be loaded should assume a Canonical Kroupa IMF."""
        return self._canon_sfrd

    @canon_sfrd.setter
    def canon_sfrd(self, invariant_imf):
        if invariant_imf:
            self._canon_sfrd = True
        else:
            self._canon_sfrd = False

    @property
    def cols_to_load(self):
        if self._cols_to_load is None:
            if self.extended_load:
                self._cols_to_load = self.COLS_TO_LOAD_EXTENDED
            else:
                self._cols_to_load = self.COLS_TO_LOAD_ESSENTIAL
        return self._cols_to_load

    @property
    def sample_paths(self):
        if self._sample_paths is None:
            self._sample_paths = list(self.sample_dir_path.glob('*.parquet'))
        return self._sample_paths

    def _get_sfrd(self):
        sfrd = ChruslinskaSFRD(self.sfrd_model, self.canon_sfrd, per_redshift_met_bin=self._new_method)
        sfrd.set_grid()
        return sfrd

    def _set_merger_class(self, class_):
        classes = ['BHBH', 'BHNS', 'NSBH', 'NSNS']
        if class_ not in classes:
            raise ValueError(f'merger_class must be one of {classes}.')
        elif class_ == 'NSBH' or class_ == 'BHNS':
            class_ = ['BHNS', 'NSBH']
        else:
            class_ = [class_]
        self.merger_class = class_

    def _set_sample_properties_dict(self):
        for path in self.sample_paths:
            redshift, feh, logsfr, zoh1e4, mgal1e2, logn1e2, logpool, *_, compasfeh = path.stem.split('_')
            redshift = np.float32(redshift.split('=')[1])  # z_{ZAMS}
            feh = np.float32(feh.split('=')[1])  # [Fe/H]
            logsfr = np.float32(logsfr.split('=')[1])  # log10(SFR)
            zoh = np.float32(zoh1e4.split('=')[1]) / 1e4  # Z_{O/H}
            mgal = np.float32(mgal1e2.split('=')[1]) / 1e2  # stellar mass of the corresponding galaxy
            logn = np.float32(logn1e2.split('=')[1]) / 1e2  # log(number of stars in sample)
            logpool = np.float32(logpool.split('=')[1])  # log(length of the original mass sampling pool)
            compas_feh = np.float32(compasfeh.split('=')[1].rstrip('.snappy'))  # closest [Fe/H] from the COMPAS samples
            sample_properties = {
                'redshift': redshift,
                'feh': feh,
                'compas_feh': compas_feh,
                'zoh': zoh,
                'logsfr': logsfr,
                'mgal': mgal,
                'logn': logn,
                'logpool': logpool,
                'path': path
            }

            if redshift not in self.sample_properties_dict:
                self.sample_properties_dict[redshift] = dict()
            self.sample_properties_dict[redshift][feh] = sample_properties

    def _fix_df_dtypes(self, df):
        for col in self.CATEGORY_COLS:
            if df[col].dtype != 'category':
                df[col] = df[col].astype('category')
        for col in self.cols_to_load:
            if col not in self.CATEGORY_COLS and df[col].dtype != 'float32':
                df[col] = df[col].astype('float32')
        return df

    def _get_sample_df(self, redshift_batch):
        sample_df = pd.DataFrame(columns=self.cols_to_load)

        sample_n = sum([len(self.sample_properties_dict[redshift].keys()) for redshift in redshift_batch])
        sample_counter = 1
        for redshift in redshift_batch:
            redshift_dict = self.sample_properties_dict[redshift]
            for feh_ in redshift_dict:
                sample_dict = redshift_dict[feh_]
                z_zams = sample_dict['redshift']
                feh = sample_dict['feh']
                self.logger.info(f'Loading z={z_zams:.2f}, [Fe/H]={feh:.2f} ({sample_counter}/{sample_n}).')
                if self.load_bcos_only:
                    filters = [('Binary_Type', 'in', ['BHBH', 'BHNS', 'NSBH', 'NSNS'])]
                else:
                    filters = None
                df = pd.read_parquet(
                    path=sample_dict['path'],
                    columns=self.cols_to_load,
                    filters=filters,
                    # Although loading only BCOs would be much faster and cheaper, we need to load all binary types at
                    # first in order to properly count the sample size when normalizing the imf for computing the
                    # star-forming mass. In the future this will be done, and the information stored, while generating
                    # the initial sample.
                    engine='pyarrow',
                    use_threads=True
                )
                df['Redshift_ZAMS'] = [z_zams] * len(df)
                df['FeH'] = [feh] * len(df)

                df = self._fix_df_dtypes((df))
                sample_df = pd.concat((sample_df, df))
                self.logger.info(f'Added {len(df):.2e} rows to sub-sample dataframe.')

                del df
                gc.collect()
                sample_counter += 1
        sample_df.reset_index(inplace=True, drop=True)
        sample_df = self._fix_df_dtypes(sample_df)
        return sample_df

    def _set_sample_df(self, batches_n=1):
        redshift_batches = np.array_split(np.array([*self.sample_properties_dict.keys()]), batches_n)
        self.sample_df = pd.DataFrame(columns=self.cols_to_load)
        self.sample_df = self._fix_df_dtypes(self.sample_df)

        for redshift_batch in redshift_batches:
            sample_subdf = self._get_sample_df(redshift_batch)
            self.sample_df = pd.concat((self.sample_df, sample_subdf))
            self.logger.info(f'Added {len(sample_subdf):.2e} rows sub-sample dataframe to sample dataframe.')
            del sample_subdf
            gc.collect()
            sample_df_memory_usage_gb = self.sample_df.memory_usage(deep=True).sum() / (1024 ** 3)
            self.logger.info(f'Done loading subsample to memory. Total memory usage from sample dataframe: '
                             f'{sample_df_memory_usage_gb:.2f} GB')
            sleep(10)

        self.sample_df.reset_index(inplace=True, drop=True)
        #self.sample_df = self._fix_df_dtypes(self.sample_df)
        self.logger.info(f'Done loading {len(self.sample_df):.2e} rows to sample dataframe.')


[docs]
    def load_sample_to_memory(self, batches_n=1):
        self.logger.info('Search sample folder for valid files...')
        self._set_sample_properties_dict()
        self.logger.info(f'Found {len(self.sample_paths)} valid files, loading to sample dataframe...')
        self._set_sample_df(batches_n)
        sample_df_memory_usage_gb = self.sample_df.memory_usage(deep=True).sum() / (1024 ** 3)
        ncomp_summary = self.sample_df.CompanionNumber.value_counts()
        self.logger.info(f'Multiplicity summary: \n{ncomp_summary} \n '
                         f'Please check this is the correct sample before proceeding.')



[docs]
    def get_mtot_per_ncomp(self):
        self.logger.info('Getting raw sample star-forming masses...')
        sample_n = len(self.sample_paths)
        sample_counter = 1
        for redshift in self.sample_properties_dict:
            if redshift not in self.sample_mtot_per_ncomp_dict:
                self.sample_mtot_per_ncomp_dict[redshift] = dict()
            redshift_dict = self.sample_mtot_per_ncomp_dict[redshift]
            for feh in self.sample_properties_dict[redshift]:
                self.logger.info(f'Working for z={redshift:.2f}, [Fe/H]={feh:.2f} sample ({sample_counter}/{sample_n})')
                if feh not in redshift_dict:
                    redshift_dict[feh] = dict()
                feh_dict = redshift_dict[feh]
                for ncp in self.sample_df.CompanionNumber.unique():
                    ncp_df = self.sample_df[np.logical_and(self.sample_df.Redshift_ZAMS == redshift,
                                                           self.sample_df.FeH == feh,
                                                           self.sample_df.CompanionNumber == ncp)]
                    sf_mass = ncp_df.SystemMass.sum()
                    feh_dict[ncp] = sf_mass
                    del ncp_df
                    gc.collect()
                sample_counter += 1


    @staticmethod
    def _kroupa_integral(k1, k2, m0, m1, variable='number'):
        a1 = 1.3
        a2 = 2.3
        if variable == 'mass':
            a1 += 1
            a2 += 1
        if m0 < 0.5:
            if m1 < 0.5:
                int = k1 / (1 - a1) * (m1 ** (1 - a1) - m0 ** (1 - a1))
            else:
                int = k1/(1-a1) * (0.5**(1-a1) - m0**(1-a1))
                int += k2/(1-a2) * (m1**(1-a2) - 0.5**(1-a2))
        else:
            int = k2 / (1 - a2) * (m1 ** (1 - a2) - m0 ** (1 - a2))
        return int


[docs]
    def get_starforming_masses(self, mass_normalization=False, debugging=False):
        self.logger.info('Getting corresponding star-forming masses...')
        sample_n = len(self.sample_paths)
        sample_counter = 1
        for redshift in self.sample_properties_dict:
            if redshift not in self.sample_starforming_mass_dict:
                self.sample_starforming_mass_dict[redshift] = dict()
            redshift_dict = self.sample_starforming_mass_dict[redshift]
            for feh in self.sample_properties_dict[redshift]:
                self.logger.info(f'Working for z={redshift:.2f}, [Fe/H]={feh:.2f} sample ({sample_counter}/{sample_n})')
                if debugging:
                    starforming_mass = 1e7
                else:
                    if self.invariant_imf:
                        imf = Star(invariant=True)
                        imf.get_mmax_k()

                        #imf_norm = self._kroupa_integral(imf.k1, imf.k2, self._star_m_min, self._star_m_max, 'number')
                        imf_mtot = self._kroupa_integral(imf.k1, imf.k2, self._star_m_min, self._star_m_max, 'mass')
                        #imf_mtot = quad(lambda m: m * imf.imf(m), self._star_m_min, self._star_m_max)[0]
                    else:
                        logsfr = self.sample_properties_dict[redshift][feh]['logsfr']
                        imf = IGIMF(10 ** logsfr, feh)
                        imf.set_clusters()
                        imf_mtot = quad(lambda m: m * imf.imf(m), self._star_m_min, self._star_m_max)[0]

                    df = self.sample_df[np.logical_and(self.sample_df.Redshift_ZAMS == redshift,
                                                       self.sample_df.FeH == feh,
                                                       self.sample_df.Mass_ZAMS1_Found >= self.sample_progenitor_m_min)]

                    if mass_normalization:
                        if self.invariant_imf:
                            imf_starmass_highmass = self._kroupa_integral(imf.k1, imf.k2, self.sample_progenitor_m_min,
                                                                          self.sample_progenitor_m_max, 'mass')
                        else:
                            imf_starmass_highmass = quad(lambda m: m * imf.imf(m), self.sample_progenitor_m_min,
                                                         self.sample_progenitor_m_max)[0]
                        sample_starmass_highmass = 0
                        mass_cols = fnmatch.filter(df.columns, 'Mass_ZAMS?_Found')
                        for col in mass_cols:
                            masses = df[col].to_numpy()
                            masses = masses[masses >= self.sample_progenitor_m_min]
                            sample_starmass_highmass += masses.sum()
                        imf_newnorm = sample_starmass_highmass / imf_starmass_highmass
                    else:
                        if self.invariant_imf:
                            #imf_starcount_highmass = self._kroupa_integral(imf.k1, imf.k2, self.sample_progenitor_m_min,
                            #                                               self.sample_progenitor_m_max, 'number')
                            imf_starcount_highmass = quad(imf.imf, self.sample_progenitor_m_min,
                                                          self.sample_progenitor_m_max)[0]
                        else:
                            imf_starcount_highmass = quad(imf.imf, self.sample_progenitor_m_min,
                                                          self.sample_progenitor_m_max)[0]
                        mass_cols = fnmatch.filter(df.columns, 'Mass_ZAMS?_Found')
                        sample_starcount_highmass = 0
                        for col in mass_cols:
                            masses = df[col].to_numpy()
                            masses = masses[masses >= self.sample_progenitor_m_min]
                            sample_starcount_highmass += masses.shape[0]
                        imf_newnorm = sample_starcount_highmass / imf_starcount_highmass
                    starforming_mass = imf_newnorm * imf_mtot
                    del df
                redshift_dict[feh] = starforming_mass
                sample_counter += 1



[docs]
    def set_merger_df(self, merger_class):
        self._set_merger_class(merger_class)
        #pandarallel.initialize(progress_bar=False)
        self.merger_df = self.sample_df[(self.sample_df.Unbound == 0) &
                                        (self.sample_df.Binary_Type.isin(self.merger_class))]
        # remove systems that would not have merged by now no matter how old the progenitors
        self.merger_df.Coalescence_Time = self.merger_df.Coalescence_Time / 1e9  # yr -> Gyr
        self.merger_df = self.merger_df[self.merger_df.Coalescence_Time <= cosmo.age(0)]
        # set up the rest of the dataframe
        #self.merger_df['Chirp_Mass'] = self.merger_df.parallel_apply(self._get_chirp_mass, axis=1)
        #self.merger_df['Total_Mass'] = self.merger_df.parallel_apply(self._get_total_bco_mass, axis=1)
        self.merger_df['Age_Merger'] = self._get_merger_age(self.merger_df)
        #self.merger_df['Age_Merger'] = self.merger_df.parallel_apply(self._get_merger_age, axis=1)
        self.merger_df['Redshift_Merger'] = self._get_redshift_from_age(self.merger_df.Age_Merger.to_numpy())

        #self.merger_df['Redshift_Merger'] = self.merger_df.parallel_apply(
        #    lambda row: self._get_redshift_from_age(row.Age_Merger),
        #    axis=1
        #)

    def _set_bins(self, time_resolution=0.1):
        self.time_resolution = time_resolution
        self._full_age_bin_edges = np.arange(cosmo.age(self.max_redshift).value,
                                             self.merger_df.Age_Merger.max() + self.time_resolution,
                                             self.time_resolution)  # Gyr
        self._full_age_bin_widths = np.tile([self.time_resolution], self._full_age_bin_edges.shape[0] - 1)
        self._full_age_bin_centers = np.array([(age0+age1)/2 for age0, age1 in zip(self._full_age_bin_edges[:-1],
                                                                                   self._full_age_bin_edges[1:])])

        self._physical_age_bin_edges = self._full_age_bin_edges[self._full_age_bin_edges < cosmo.age(0).value]
        self._physical_age_bin_widths = np.tile([self.time_resolution], self._physical_age_bin_edges.shape[0] - 1)
        self._physical_age_bin_centers = np.array([
            (age0+age1)/2 for age0, age1 in zip(self._physical_age_bin_edges[:-1], self._physical_age_bin_edges[1:])
        ])

        self._full_redshift_bin_edges = self._get_redshift_from_age(self._full_age_bin_edges)
        #self._full_redshift_bin_edges = np.array([self._get_redshift_from_age(age) for age in self._full_age_bin_edges])
        self._physical_redshift_bin_edges = self._full_redshift_bin_edges[self._full_redshift_bin_edges >= 0.0]

        sample_redshifts = list()
        sample_fehs = list()
        for redshift in self.sample_properties_dict:
            sample_redshifts.append(redshift)
            subsample_fehs = list()
            for feh in self.sample_properties_dict[redshift]:
                subsample_fehs.append(feh)
            subsample_fehs = np.sort(subsample_fehs)
            sample_fehs.append(subsample_fehs)
        redshift_sorted_indices = np.argsort(sample_redshifts)
        self.sample_redshift_arr = np.array(sample_redshifts)[redshift_sorted_indices]
        self.sample_feh_arr = np.array(sample_fehs)[redshift_sorted_indices]

        sfrd_grid_df = pd.read_pickle(self.zams_grid_path)
        sfrd_grid_df = sfrd_grid_df[['Redshift_ZAMS', 'FeH', 'Redshift_Bin_Edges', 'ZOH_Bin_Edges']]
        for redshift in sfrd_grid_df.Redshift_ZAMS.unique():
            df = sfrd_grid_df[sfrd_grid_df.Redshift_ZAMS == redshift]
            sample_redshift_i = np.argmin(np.abs(self.sample_redshift_arr - redshift))
            sample_redshift = self.sample_redshift_arr[sample_redshift_i]
            sample_fehs = self.sample_feh_arr[sample_redshift_i]
            for feh in df.FeH:
                row = df[df.FeH == feh].iloc[0]
                sample_feh_i = np.argmin(np.abs(sample_fehs - feh))
                sample_feh = sample_fehs[sample_feh_i]
                if sample_redshift not in self.sample_redshift_feh_bins_dict:
                    self.sample_redshift_feh_bins_dict[sample_redshift] = dict()
                    self.sample_redshift_feh_bins_dict[sample_redshift]['redshift_bin_edges'] = row.Redshift_Bin_Edges
                redshift_dict = self.sample_redshift_feh_bins_dict[sample_redshift]
                feh_bin_edges = (ZOH_to_FeH(row.ZOH_Bin_Edges[0]), ZOH_to_FeH(row.ZOH_Bin_Edges[1]))
                redshift_dict[sample_feh] = feh_bin_edges

        min_sfrd_redshift = min(list(self.sample_redshift_feh_bins_dict.keys()))
        if self._get_redshift_from_age(self._physical_age_bin_centers[[-1]])[0] < min_sfrd_redshift:
            new_limit = cosmo.age(min_sfrd_redshift).value
            self._physical_age_bin_centers[-1] = new_limit

    def _set_dz_dfeh_dage_mrates_arr(self):
        """Get the volumetric merger rate per z_zams and [Fe/H] bin, in yr-1 Gpc-3."""
        self._dz_dfeh_dage_mrates_arr = np.zeros((*self.sample_feh_arr.shape, self._full_age_bin_centers.shape[0]),
                                                 np.float32)
        for i_redshift_zams, redshift_zams in enumerate(self.sample_redshift_arr):
            # self._dz_dfeh_mrates_dict[redshift_zams] = dict()
            for i_feh, feh in enumerate(self.sample_feh_arr[i_redshift_zams]):
                starforming_mass = self.sample_starforming_mass_dict[redshift_zams][feh]  # (Mo)
                dz_dfeh_merger_df = self.merger_df[(self.merger_df.Redshift_ZAMS == redshift_zams) &
                                                   (self.merger_df.FeH == feh)]
                dz_dfeh_merger_age_arr = np.sort(dz_dfeh_merger_df.Age_Merger.to_numpy())  # (Gyr)

                age_bin_heights = self._get_bin_frequency_heights(x_arr=dz_dfeh_merger_age_arr,  # dN/dt (Gyr-1)
                                                                  x_bin_edges=self._full_age_bin_edges)
                age_bin_heights /= starforming_mass  # dN/dt dMsf (Gyr-1  Mo-1)
                age_bin_heights *= 1e9  # dN/dt dMsf (yr-1 Mo-1)

                log_sfrd = self.sfrd.get_logsfrd(feh, redshift_zams)  # log10(dMsf/dVc dz_zams dFeH)
                                                                      # ~log10(dMsf/dt dVc dz_zams dFeH)~
                                                                      # log10(Mo Mpc-3)
                                                                      # ~(log(Mo Myr-1 Mpc-3))~
                age_bin_sfmass_density = 10**log_sfrd / 1e9 # dMsf/dVc dz_zams dFeH (Mo Gpc-3)
                                            # dMsf/dVc dz_zams dFeH(Mo Gpc-3)
                if self._convert_time:
                    # convert time from the source (in which the SFR is measured) to the observer frame (in which the
                    # merger rate is measured), dt_s/dt_o = 1 / 1+z, and SFRD = dM_sf/dt_s dVc
                    age_bin_heights /= 1 + redshift_zams
                #age_bin_sfmass_densities = sfrd * self._full_age_bin_widths  # dMsf/dVc dz_zams dFeH (Mo Gpc-3)
                age_bin_heights *= age_bin_sfmass_density  # dN/dt dVc dz_zams dFeH (yr-1 Gpc-3)

                if not self._new_method:
                    delta_z_zams = np.abs(self.sample_redshift_feh_bins_dict[redshift_zams]['redshift_bin_edges'][0] -
                                          self.sample_redshift_feh_bins_dict[redshift_zams]['redshift_bin_edges'][1])
                    delta_feh = np.abs(self.sample_redshift_feh_bins_dict[redshift_zams][feh][0] -
                                       self.sample_redshift_feh_bins_dict[redshift_zams][feh][1])
                    age_bin_heights /= delta_z_zams * delta_feh  # dN/dt dVc delta_z_zams delta FeH (yr-1 Gpc-3)

                # self._dz_dfeh_mrates_dict[redshift_zams][feh] = age_bin_heights
                self._dz_dfeh_dage_mrates_arr[i_redshift_zams, i_feh] = age_bin_heights

    def _get_feh_fits(self):
        # We reorder the array columns to the order we must access them in for fitting over [Fe/H].
        dage_dz_dfeh_mrates_arr = np.transpose(self._dz_dfeh_dage_mrates_arr, (2, 0, 1))
        max_fehs_per_redshift = max([arr.shape[0] for arr in self.sample_feh_arr])
        per_age_redshift_feh_fits = np.zeros((self._full_age_bin_widths.shape[0],
                                              self.sample_redshift_arr.shape[0],
                                              max_fehs_per_redshift - 1 + 2,
                                              4),
                                             np.float32)

        for i_age, dz_dfeh_mrates_arr in enumerate(dage_dz_dfeh_mrates_arr):
            for i_redshift, dfeh_mrates_arr in enumerate(dz_dfeh_mrates_arr):
                feh_mrate0 = [self.min_feh,
                              0.0]
                for i_feh, mrate1 in enumerate(dfeh_mrates_arr):
                    feh_mrate1 = [self.sample_feh_arr[i_redshift, i_feh],
                                  mrate1]
                    per_age_redshift_feh_fits[i_age, i_redshift, i_feh] = self._get_linear_fit(feh_mrate0, feh_mrate1)
                    feh_mrate0 = feh_mrate1
                feh_mrate1 = [self.max_feh,
                              0.0]
                per_age_redshift_feh_fits[i_age, i_redshift, i_feh+1] = self._get_linear_fit(feh_mrate0, feh_mrate1)

        return per_age_redshift_feh_fits

    def _get_boundary_fit_limit(self, fit, variable, boundary):
        x0, x1, slope, intercept = fit
        if variable == 'feh':
            min_var = self.min_feh
            max_var = self.max_feh
        elif variable == 'redshift':
            min_var = self.min_redshift
            max_var = self.max_redshift
        else:
            raise ValueError(f'Variable should be "feh" or "redshift", but {variable} was passed.')
        if slope != 0:
            x_intercept = -intercept / slope
        else:
            x_intercept = None
        if boundary == 'lower':
            if x_intercept is None or slope < 0:
                var_mrate0 = (min_var, 0)
                var_mrate1 = (x0, slope * x0 + intercept)
                boundary_fit = self._get_linear_fit(var_mrate0, var_mrate1)
                fixed_fit_edge = x0
            else:
                boundary_fit = (0, 0, 0, 0)
                fixed_fit_edge = max([min_var, x_intercept])
        elif boundary == 'upper':
            if x_intercept is None or slope > 0:
                var_mrate0 = (x1, slope * x1 + intercept)
                var_mrate1 = (max_var, 0)
                boundary_fit = self._get_linear_fit(var_mrate0, var_mrate1)
                fixed_fit_edge = x1
            else:
                boundary_fit = (0, 0, 0, 0)
                fixed_fit_edge = min([max_var, x_intercept])
        else:
            raise ValueError(f'Boundary parameter must be one of ["lower","upper"], not {boundary}.')
        return boundary_fit, fixed_fit_edge

    def _set_dz_dage_mrates_arr(self):
        """Get the volumetric merger rate per z_zams bin, integrated over [Fe/H], in yr-1 Gpc-3."""
        self._per_age_redshift_feh_fits = self._get_feh_fits()
        self._dz_dage_mrates_arr = np.zeros((self._per_age_redshift_feh_fits.shape[1],
                                             self._per_age_redshift_feh_fits.shape[0]),
                                            np.float32)

        for i_age, per_redshift_feh_fits in enumerate(self._per_age_redshift_feh_fits):
            for i_redshift, per_feh_fits in enumerate(per_redshift_feh_fits):
                per_redshift_age_mrate = 0
                for i_feh, fit in enumerate(per_feh_fits):
                    x0, x1, slope, intercept = fit

                    if i_feh == 0:
                        boundary_fit, x0 = self._get_boundary_fit_limit(fit, boundary='lower', variable='feh')
                    elif i_feh == per_feh_fits.shape[0] - 1:
                        boundary_fit, x1 = self._get_boundary_fit_limit(fit, boundary='upper', variable='feh')
                    else:
                        boundary_fit = np.zeros(4)

                    mrate = self._get_linear_fit_area((x0, x1, slope, intercept))
                    boundary_mrate = self._get_linear_fit_area(boundary_fit)
                    per_redshift_age_mrate += mrate + boundary_mrate
                self._dz_dage_mrates_arr[i_redshift, i_age] = per_redshift_age_mrate

    def _set_dz_dpopage_mrates_arr(self):
        self._dz_dpopage_mrates_arr = np.zeros(self._dz_dage_mrates_arr.shape, np.float32)

        for i_redshift, dage_mrates_arr in enumerate(self._dz_dage_mrates_arr):
            start_i = next((i for i, x in enumerate(dage_mrates_arr) if x), -1)
            nonzero_dage_mrates_arr = dage_mrates_arr[start_i:]
            dpopage_mrates_arr = np.concatenate((
                nonzero_dage_mrates_arr,
                np.zeros(self._full_age_bin_centers.shape[0]-nonzero_dage_mrates_arr.shape[0], np.float32)
            ))
            self._dz_dpopage_mrates_arr[i_redshift] = dpopage_mrates_arr

    def _set_ip_dz_dpopage_mrates_arr(self):
        self.ip_redshift_arr = self._get_redshift_from_age(self._physical_age_bin_centers)
        #self.ip_redshift_arr = np.array([self._get_redshift_from_age(age) for age in self._physical_age_bin_centers])
        self._ip_dz_dpopage_mrates_arr = np.zeros([self.ip_redshift_arr.shape[0], self._dz_dpopage_mrates_arr.shape[1]])

        dpopage_dz_mrates_arr = self._dz_dpopage_mrates_arr.T
        ip_dpopage_dz_mrates_arr = self._ip_dz_dpopage_mrates_arr.T
        for i_age, dz_mrates_arr in enumerate(dpopage_dz_mrates_arr):
            interpolator = interp1d(self.sample_redshift_arr, dz_mrates_arr, kind='linear')
            ip_dpopage_dz_mrates_arr[i_age] = interpolator(self.ip_redshift_arr)
        self._ip_dz_dpopage_mrates_arr = ip_dpopage_dz_mrates_arr.T

    def _set_ip_dz_dage_mrates_arr(self):
        self._ip_dz_dage_mrates_arr = np.zeros(self._ip_dz_dpopage_mrates_arr.shape, np.float32)
        for ip_redshift_i, dpopage_mrates_arr in enumerate(self._ip_dz_dpopage_mrates_arr):
            redshift = self.ip_redshift_arr[ip_redshift_i]
            first_physical_redshift_bin_edge_i = next((i for i, z in enumerate(self._full_redshift_bin_edges)
                                                       if z < redshift)) - 1
            if first_physical_redshift_bin_edge_i == 0:
                last_physical_age_bin_edge_i = None
            else:
                last_physical_age_bin_edge_i = -first_physical_redshift_bin_edge_i
            dage_mrates_arr = dpopage_mrates_arr[:last_physical_age_bin_edge_i]
            dage_mrates_arr = np.concatenate((np.zeros(self._full_age_bin_centers.shape[0] - dage_mrates_arr.shape[0]),
                                                       dage_mrates_arr))
            self._ip_dz_dage_mrates_arr[ip_redshift_i] = dage_mrates_arr

    def _get_z_fits(self):
        ip_dage_dz_mrates_arr = self._ip_dz_dage_mrates_arr.T
        per_age_redshift_fits = np.zeros((self._physical_age_bin_centers.shape[0],
                                          self.ip_redshift_arr.shape[0] -1 + 2,
                                          4),
                                         np.float32)

        for i_age, dz_mrates_arr in enumerate(ip_dage_dz_mrates_arr):
            if i_age == self._physical_age_bin_centers.shape[0]:
                break
            z_mrate0 = [self.max_redshift,
                        0]
            for i_z, mrate1 in enumerate(dz_mrates_arr):
                if i_z == self.ip_redshift_arr.shape[0]:
                    break
                z_mrate1 = [self.ip_redshift_arr[i_z],
                            mrate1]
                per_age_redshift_fits[i_age, i_z] = self._get_linear_fit(z_mrate0, z_mrate1)
                z_mrate0 = z_mrate1
            z_mrate1 = [self.min_redshift,
                        0]
            per_age_redshift_fits[i_age, i_z+1] = self._get_linear_fit(z_mrate0, z_mrate1)

        return per_age_redshift_fits

    def _set_dage_mrates_arr(self):
        "Get the volumetric merger rate, integrated over z_zams and [Fe/H], in yr-1 Gpc-3."
        self._per_age_redshift_fits = self._get_z_fits()
        dage_mrates_arr = np.zeros((self._per_age_redshift_fits.shape[0]), np.float32)

        for i_age, per_redshift_fits in enumerate(self._per_age_redshift_fits):
            per_age_mrate = 0
            for i_z, fit in enumerate(per_redshift_fits):
                x1, x0, slope, intercept = fit
                fit = (x0, x1, slope, intercept)

                if i_z == 0:
                    boundary_fit, x1 = self._get_boundary_fit_limit(fit, boundary='upper', variable='redshift')
                elif i_z == per_redshift_fits.shape[0] - 1:
                    boundary_fit, x0 = self._get_boundary_fit_limit(fit, boundary='lower', variable='redshift')
                else:
                    boundary_fit = np.zeros(4)

                mrate = self._get_linear_fit_area((x0, x1, slope, intercept))
                boundary_mrate = self._get_linear_fit_area(boundary_fit)
                per_age_mrate += mrate + boundary_mrate
            dage_mrates_arr[i_age] = per_age_mrate

        self.mrate_arr = dage_mrates_arr


[docs]
    def delete_sample_df_from_memory(self):
        self.sample_df = None
        gc.collect()