# TODO: Add module documentation
# TODO: Complete documentation
import logging
import warnings
import gc
import fnmatch
from concurrent.futures import ProcessPoolExecutor
from time import time, sleep
from datetime import datetime
from pathlib import Path
import inquirer
import numpy as np
import tables as tb
import pandas as pd
#from pandarallel import pandarallel # pandarallel breaks float types
import astropy.constants as ct
import astropy.units as u
from astropy.cosmology import WMAP9 as cosmo
from scipy.integrate import quad
from scipy.optimize import newton
from scipy.interpolate import interp1d
#import sys
#sys.path.append('..')
from bossa.sfh import ChruslinskaSFRD
from bossa.imf import Star, IGIMF
from bossa.zams import ZAMSSystemGenerator
from bossa.utils import pull_snmass1, pull_snmass2, chirp_mass, bintype, mass_ratio, create_logger, \
ZOH_to_FeH, format_time
from bossa.constants import LOG_PATH, COMPAS_PROC_OUTPUT_DIR_PATH, COMPAS_WORK_PATH, \
BINARIES_CORRELATED_TABLE_PATH, COMPAS_21XX_PROC_OUTPUT_DIR_PATH, \
COMPAS_12XX_PROC_OUTPUT_DIR_PATH, COMPAS_21XX_GRIDS_PATH, COMPAS_12XX_GRIDS_PATH, \
IGIMF_ZAMS_DIR_PATH, COMPACT_OBJ_DIR_PATH
CO_CODES = [10, 11, 12, 13, 14] # HeWD, COWD, ONeMgWD, NS, BH
PARQUET_SETTINGS = {'engine': 'pyarrow',
'compression': 'snappy',
'partition_cols': ['Mass_ZAMS1', 'LogP_ZAMS'],
'use_threads': True}
MODULE_NAME = __name__
[docs]
class COMPASOutputTrimmer:
SNTAB_BASE_COLS_TOLOAD = [
'Mass(SN)',
'Mass(CP)',
'Unbound'
]
SNTAB_PSR_COLS_TOLOAD = [
'Mass_Core@CO(SN)',
'Mass_He_Core@CO(SN)',
'Mass_CO_Core@CO(SN)',
'Fallback_Fraction(SN)',
'Kick_Magnitude(uK)',
'SystemicSpeed',
'Time'
]
DCOTAB_COLS_TOLOAD = [
'Mass(1)',
'Mass(2)',
'Coalescence_Time'
]
SYSPARAMTAB_COLS_TOLOAD = [
'Initial_Mass(1)',
'Initial_Mass(2)',
'Mass_Ratio',
'Stellar_Type(1)',
'Stellar_Type(2)',
'Metallicity@ZAMS(1)',
'SemiMajorAxis@ZAMS',
'Eccentricity@ZAMS'
]
PSRTAB_COLS_TOLOAD = [
'MT_History',
'Mass(1)',
'Mass(2)',
'Pulsar_Mag_Field(1)',
'Pulsar_Mag_Field(2)',
'Pulsar_Spin_Down(1)',
'Pulsar_Spin_Down(2)',
'Pulsar_Spin_Freq(1)',
'Pulsar_Spin_Freq(2)',
'SemiMajorAxis',
'Stellar_Type(1)',
'Stellar_Type(2)',
'Time',
'dT'
]
BASE_COLS_TOSAVE = [
'Mass_ZAMS1',
'Mass_ZAMS2',
'Stellar_Type1',
'Stellar_Type2',
'Metallicity_ZAMS',
'SemiMajorAxis_ZAMS',
'Eccentricity_ZAMS',
'Mass_PostSN1',
'Mass_PostSN2',
'Coalescence_Time',
'SEED',
'Chirp_Mass',
'Total_Mass_PostSN',
'Total_Mass_ZAMS',
'Mass_Ratio_PostSN',
'Mass_Ratio_ZAMS',
'LogP_ZAMS',
'Binary_Type'
]
PSR_SN_DOUBLE_COLS_TOSAVE = [
'PSR_SN_Mass_Core1',
'PSR_SN_Mass_Core2',
'PSR_SN_Mass_He_Core1',
'PSR_SN_Mass_He_Core2',
'PSR_SN_Mass_CO_Core1',
'PSR_SN_Mass_CO_Core2',
'PSR_SN_Fallback_Fraction1',
'PSR_SN_Fallback_Fraction2',
'PSR_SN_Kick_Magnitude1',
'PSR_SN_Kick_Magnitude2',
'PSR_SN_SystemicSpeed1',
'PSR_SN_SystemicSpeed2',
]
PSR_DO_COLS_TOSAVE = [
'PSR_DO_PreSN_Time',
'PSR_DO_PreSN_Omega1',
'PSR_DO_PreSN_Omega2',
'PSR_DO_PreSN_Radius1',
'PSR_DO_PreSN_Radius2'
]
PSR_COLS_TOSAVE = BASE_COLS_TOSAVE + [
'PSR_MT_History',
'PSR_Mass1',
'PSR_Mass2',
'PSR_B1',
'PSR_B2',
'PSR_Pdot1',
'PSR_Pdot2',
'PSR_Omega1',
'PSR_Omega2',
'PSR_SemiMajorAxis',
'PSR_Type1',
'PSR_Type2',
'PSR_Time',
'PSR_dT',
] + PSR_SN_DOUBLE_COLS_TOSAVE
def __init__(self, compas_output_option, compas_sample_option, only_load_dcos=True, load_unbound_systems=False,
save_pulsar_columns=False, parent_logger=None):
self.compas_output_path = Path(COMPAS_WORK_PATH, compas_output_option, compas_sample_option)
self.trimmed_output_path = Path(COMPAS_PROC_OUTPUT_DIR_PATH, compas_output_option.name + '.snappy.parquet')
self.input_paths = list()
self.only_load_dcos = only_load_dcos
self.load_unbound_systems = load_unbound_systems
self._save_pulsar_columns = self._set_save_pulsar_columns(save_pulsar_columns)
self._columns_to_load = None
self._columns_to_save = None
self._float32_cols = None
self.logger = self._get_logger(parent_logger)
@property
def columns_to_save(self):
if self._columns_to_save is None:
if self._save_pulsar_columns:
self._columns_to_save = self.PSR_COLS_TOSAVE
else:
self._columns_to_save = self.BASE_COLS_TOSAVE
return self._columns_to_save
@property
def float32_cols(self):
if self._float32_cols is None:
self._float32_cols = list(col for col in self.columns_to_save if col not in [
'Stellar_Type1',
'Stellar_Type2',
'SEED',
'Binary_Type',
'PSR_MT_History',
'PSR_Type1',
'PSR_Type2'
])
return self._float32_cols
@property
def columns_to_load(self):
if self._columns_to_load is None:
if self.only_load_dcos:
self._columns_to_load = self.SYSPARAMTAB_COLS_TOLOAD + self.DCOTAB_COLS_TOLOAD
elif self._save_pulsar_columns:
self._columns_to_load = (self.SYSPARAMTAB_COLS_TOLOAD + self.DCOTAB_COLS_TOLOAD +
self.SNTAB_BASE_COLS_TOLOAD + self.SNTAB_PSR_COLS_TOLOAD +
self.PSRTAB_COLS_TOLOAD)
self._columns_to_load = list(set(self._columns_to_load))
else:
self._columns_to_load = (self.SYSPARAMTAB_COLS_TOLOAD + self.DCOTAB_COLS_TOLOAD +
self.SNTAB_BASE_COLS_TOLOAD)
return self._columns_to_load
def _set_save_pulsar_columns(self, save_pulsar_columns):
if save_pulsar_columns is True and self.only_load_dcos is True:
raise ValueError('save_pulsar_columns was passed as True but only_load_dcos is also True.')
else:
return save_pulsar_columns
def _get_logger(self, parent_logger):
if parent_logger is None:
loggername = '.'.join([__name__, self.__class__.__name__])
log_path = Path(LOG_PATH, loggername, datetime.now().strftime('%d-%m-%Y_%H:%M:%S.log'))
log_path.parent.mkdir(parents=True, exist_ok=True)
logger = create_logger(name=loggername, fpath=log_path, propagate=False)
else:
loggername = '.'.join([__name__, self.__class__.__name__])
logger = logging.getLogger(name=loggername)
logger.setLevel(logging.DEBUG)
return logger
def _set_paths(self):
self.input_paths = list(self.compas_output_path.glob('COMPAS_Output*.h5'))
def _get_psr_sn_double_cols_dict(self, no=1):
no = str(no)
col_dict = {
'Mass_Core@CO(SN)': f'PSR_SN_Mass_Core{no}',
'Mass_He_Core@CO(SN)': f'PSR_SN_Mass_He_Core{no}',
'Mass_CO_Core@CO(SN)': f'PSR_SN_Mass_CO_Core{no}',
}
return col_dict
def _get_df_from_hdf(self, hdf_path):
self.logger.debug(f'Attempting to load {hdf_path}')
self.logger.debug(f'Columns to load: {self.columns_to_load}')
empty = False
try:
table = tb.open_file(hdf_path, 'r')
except tb.HDF5ExtError:
empty = True
if empty:
self.logger.error(f'File {hdf_path} empty')
return None
else:
try:
sysparam_tab = table.root.BSE_System_Parameters
except tb.NoSuchNodeError:
self.logger.error(f'No BSE_System_Parameters table in {hdf_path}.')
print(f'No BSE_System_Parameters table in {hdf_path}.')
return
else:
self.logger.debug(f'Now loading {hdf_path}.')
start_time = time()
sort_sysparam_tab = np.argsort(sysparam_tab.SEED)
full_seed_arr = np.array(sysparam_tab.SEED)[sort_sysparam_tab]
samplesize = len(full_seed_arr)
sample_metallicity = sysparam_tab['Metallicity@ZAMS(1)'][0]
sample_m1 = sysparam_tab['Initial_Mass(1)'][0]
self.logger.info(f'Table is valid, with M1 = {sample_m1} and Z = {sample_metallicity}.')
if self.only_load_dcos:
dcos_present = True
sns_present = True
psrs_present = True
try:
dco_tab = table.root.BSE_Double_Compact_Objects
except tb.NoSuchNodeError:
dcos_present = False
sns_present = False
psrs_present = False
self.logger.info('No DCOs produced.')
else:
sort_dco_tab = np.argsort(dco_tab.SEED)
dco_seed_arr = np.array(dco_tab.SEED)[sort_dco_tab]
sn_tab = table.root.BSE_Supernovae
# sort_sn_tab = np.argsort(sn_tab.SEED)
sort_sn_tab = np.lexsort((sn_tab.Time, sn_tab.SEED))
sn_full_seed_arr = np.array(sn_tab.SEED)[sort_sn_tab]
if self.load_unbound_systems:
sn_partial_seed_arr = sn_full_seed_arr
else:
sn_bound_mask = np.invert(np.array(sn_tab.Unbound)[sort_sn_tab].astype(bool))
sn_partial_seed_arr = sn_full_seed_arr[sn_bound_mask]
sn_bco_mask = np.isin(sn_partial_seed_arr, dco_seed_arr)
sn_seed_arr = sn_partial_seed_arr[sn_bco_mask]
first_sn_valid_indices = np.unique(np.searchsorted(sn_full_seed_arr,
sn_seed_arr,
side='left') + 1)
second_sn_valid_indices = np.unique(np.searchsorted(sn_full_seed_arr,
sn_seed_arr,
side='right') - 1)
if self._save_pulsar_columns:
try:
psr_tab = table.root.BSE_Pulsar_Evolution
except tb.NoSuchNodeError:
psrs_present = False
self.logger.info('No PSRs formed.')
else:
sort_psr_tab = np.lexsort((psr_tab.Time, psr_tab.SEED))
psr_partial_seed_arr = np.array(psr_tab.SEED)[sort_psr_tab]
psr_bco_mask = np.isin(psr_partial_seed_arr, dco_seed_arr)
psr_seed_arr = psr_partial_seed_arr[psr_bco_mask]
else:
dcos_present = True
sns_present = True
psrs_present = True
try:
dco_tab = table.root.BSE_Double_Compact_Objects
except tb.NoSuchNodeError:
dcos_present = False
self.logger.info('No DCOs produced.')
else:
sort_dco_tab = np.argsort(dco_tab.SEED)
dco_seed_arr = np.array(dco_tab.SEED)[sort_dco_tab]
sn_tab = table.root.BSE_Supernovae
# sort_sn_tab = np.argsort(sn_tab.SEED)
sort_sn_tab = np.lexsort((sn_tab.Time, sn_tab.SEED))
sn_full_seed_arr = np.array(sn_tab.SEED)[sort_sn_tab]
if self.load_unbound_systems:
sn_seed_arr = sn_full_seed_arr
else:
sn_bound_mask = np.invert(np.array(sn_tab.Unbound)[sort_sn_tab].astype(bool))
sn_seed_arr = sn_full_seed_arr[sn_bound_mask]
first_sn_valid_indices = np.unique(np.searchsorted(sn_full_seed_arr,
sn_seed_arr,
side='left') + 1)
second_sn_valid_indices = np.unique(np.searchsorted(sn_full_seed_arr,
sn_seed_arr,
side='right') - 1)
if not dcos_present:
try:
sn_tab = table.root.BSE_Supernovae
except tb.NoSuchNodeError:
sns_present = False
psrs_present = False
self.logger.info('No SNs occurred.')
else:
# sort_sn_tab = np.argsort(sn_tab.SEED)
sort_sn_tab = np.lexsort((sn_tab.Time, sn_tab.SEED))
sn_full_seed_arr = np.array(sn_tab.SEED)[sort_sn_tab]
if self.load_unbound_systems:
sn_seed_arr = sn_full_seed_arr
else:
sn_bound_mask = np.invert(np.array(sn_tab.Unbound)[sort_sn_tab].astype(bool))
sn_seed_arr = sn_full_seed_arr[sn_bound_mask]
first_sn_valid_indices = np.unique(np.searchsorted(sn_full_seed_arr,
sn_seed_arr,
side='right') - 1)
if sns_present and self._save_pulsar_columns:
try:
psr_tab = table.root.BSE_Pulsar_Evolution
except tb.NoSuchNodeError:
psrs_present = False
self.logger.info('No PSRs formed.')
else:
#sort_psr_tab = np.lexsort((psr_tab.Time, psr_tab.SEED))
sort_psr_tab = np.argsort(psr_tab.SEED)
psr_seed_arr = np.array(psr_tab.SEED)[sort_psr_tab]
df = pd.DataFrame(columns=self.columns_to_load, dtype=np.float32)
if self._save_pulsar_columns:
psr_df = pd.DataFrame(columns=self.columns_to_load, dtype=np.float32)
else:
psr_df = None
self.logger.debug('Loading System Parameters table...')
for col in self.SYSPARAMTAB_COLS_TOLOAD:
df[col] = np.array(sysparam_tab[col], np.float32)[sort_sysparam_tab]
if dcos_present:
self.logger.debug('Loading Double Compact Objects table...')
fullsample_indices = np.searchsorted(full_seed_arr, dco_seed_arr)
for col in self.DCOTAB_COLS_TOLOAD:
col_arr = np.zeros(samplesize, np.float32)
col_arr[fullsample_indices] = np.array(dco_tab[col])[sort_dco_tab]
df[col] = col_arr
else:
for col in self.DCOTAB_COLS_TOLOAD:
df[col] = np.zeros(samplesize, np.float32)
if sns_present:
self.logger.debug('Loading Supernovae table...')
fullsample_indices = np.searchsorted(full_seed_arr, sn_seed_arr[first_sn_valid_indices])
# print('fukllsampl', len(full_seed_arr), len(sn_seed_arr[second_sn_valid_indices]), len(fullsample_indices))
for col in self.SNTAB_BASE_COLS_TOLOAD:
if col == 'Unbound':
# print('Unbound', print(len(second_sn_valid_indices)), np.unique(np.array(sn_tab[col])[second_sn_valid_indices], return_counts=True))
col_arr = np.ones(samplesize, np.float32)
else:
col_arr = np.zeros(samplesize, np.float32)
col_arr[fullsample_indices] = np.array(sn_tab[col])[sort_sn_tab][first_sn_valid_indices]
df[col] = col_arr
if self._save_pulsar_columns:
self.logger.debug('Loading pulsar columns from the Supernovae table...')
if psrs_present:
first_sn_fullsample_indices = np.searchsorted(full_seed_arr,
sn_seed_arr[first_sn_valid_indices])
if dcos_present:
second_sn_fullsample_indices = np.searchsorted(full_seed_arr,
sn_seed_arr[second_sn_valid_indices])
for col in self.SNTAB_PSR_COLS_TOLOAD:
col_arr1 = np.zeros(samplesize, np.float32)
col_arr2 = np.zeros(samplesize, np.float32)
col_arr1[first_sn_fullsample_indices] = (
np.array(sn_tab[col]))[sort_sn_tab][first_sn_valid_indices]
psr_df[col + '1'] = col_arr1
if dcos_present:
col_arr2[second_sn_fullsample_indices] = (
np.array(sn_tab[col]))[sort_sn_tab][second_sn_valid_indices]
psr_df[col + '2'] = col_arr2
else:
for col in self.SNTAB_PSR_COLS_TOLOAD:
psr_df[col + '1'] = np.zeros(samplesize, np.float32)
if dcos_present:
psr_df[col + '2'] = np.zeros(samplesize, np.float32)
else:
for col in self.SNTAB_BASE_COLS_TOLOAD:
df[col] = np.zeros(samplesize, np.float32)
if self._save_pulsar_columns:
for col in self.SNTAB_PSR_COLS_TOLOAD:
psr_df[col + '1'] = np.zeros(samplesize, np.float32)
if dcos_present:
psr_df[col + '2'] = np.zeros(samplesize, np.float32)
if self._save_pulsar_columns:
if psr_df is None:
psr_df = pd.DataFrame(columns=self.columns_to_load, dtype=np.float32)
if psrs_present:
self.logger.debug('Loading Pulsars table...')
fullsample_indices = np.searchsorted(full_seed_arr, psr_seed_arr)
for col in self.PSRTAB_COLS_TOLOAD:
col_arr = np.zeros(samplesize, np.float32)
col_arr[fullsample_indices] = np.array(psr_tab[col])[sort_psr_tab]
psr_df[col] = col_arr
else:
for col in self.PSRTAB_COLS_TOLOAD:
psr_df[col] = np.zeros(samplesize, np.float32)
table.close()
self.logger.debug('Fixing seeds...')
new_seed_arr = np.empty(samplesize, object)
for i, seed in enumerate(full_seed_arr):
new_seed_arr[i] = f'{seed}_1e6Z_{1e6 * sample_metallicity:.0f}_1e3M1_{1e3 * sample_m1:.0f}'
df['SEED'] = new_seed_arr
total_time = time() - start_time
self.logger.debug(f'Done loading table {hdf_path.name}. Elapsed time: {total_time:.6f} s.')
return df, psr_df
def _process_df(self, df, psr_df=None):
self.logger.debug('Processing table...')
start_time = time()
self.logger.debug('Fixing final masses...')
# df.parallel_apply(symmetrize_masses, axis=1)
df['Mass_PostSN1'] = df.apply(pull_snmass1, axis=1)
df['Mass_PostSN2'] = df.apply(pull_snmass2, axis=1)
df = df.drop(['Mass(SN)', 'Mass(CP)', 'Mass(1)', 'Mass(2)'], axis=1)
self.logger.debug('Computing new mass columns...')
df['Chirp_Mass'] = df.apply(chirp_mass, axis=1)
df['Total_Mass_PostSN'] = df['Mass_PostSN1'] + df['Mass_PostSN2']
df['Initial_Mass(2)'] = df['Initial_Mass(1)'] * df['Mass_Ratio']
df['Total_Mass_ZAMS'] = df['Initial_Mass(1)'] + df['Initial_Mass(2)']
df['Mass_Ratio_PostSN'] = df.apply(lambda x: mass_ratio(x['Mass_PostSN1'], x['Mass_PostSN2']), axis=1)
# df['Mass_Ratio_ZAMS'] = df.apply(lambda x: mass_ratio(x['Initial_Mass(1)'], x['Initial_Mass(2)']), axis=1)
df['Coalescence_Time'] = np.float32(1e6) * df['Coalescence_Time'] # time in years
self.logger.debug('Computing ZAMS orbital period...')
# df['LogP_ZAMS'] = df.parallel_apply(lambda x: np.float32(a_to_logp_table(x['SemiMajorAxis@ZAMS'],
# x['Total_Mass_ZAMS'])), axis=1)
df['LogP_ZAMS'] = np.zeros(len(df))
logp_finder = LogPFinder()
for m1 in df['Initial_Mass(1)'].unique():
logp_finder.set_m1(float(m1))
subdf = df[df['Initial_Mass(1)'] == m1]
subdf['LogP_ZAMS'] = \
subdf.apply(lambda x: np.float32(logp_finder.a_to_logp_table(x['SemiMajorAxis@ZAMS'],
x['Total_Mass_ZAMS'])), axis=1)
#print(subdf['LogP_ZAMS'])
df['LogP_ZAMS'].loc[subdf.index.values] = subdf['LogP_ZAMS']
self.logger.debug('Fixing categorical columns and column names...')
df['Stellar_Type(1)'] = df['Stellar_Type(1)'].astype(np.int8)
df['Stellar_Type(2)'] = df['Stellar_Type(2)'].astype(np.int8)
df['Binary_Type'] = (df['Stellar_Type(1)'].apply(str) + '+' + df['Stellar_Type(2)'].apply(str)).apply(bintype)
df['Stellar_Type(1)'] = df['Stellar_Type(1)'].astype('category')
df['Stellar_Type(2)'] = df['Stellar_Type(2)'].astype('category')
df['Binary_Type'] = df['Binary_Type'].astype('category')
df.rename(columns={
'Initial_Mass(1)': 'Mass_ZAMS1',
'Initial_Mass(2)': 'Mass_ZAMS2',
'Mass_Ratio': 'Mass_Ratio_ZAMS',
'Stellar_Type(1)': 'Stellar_Type1',
'Stellar_Type(2)': 'Stellar_Type2',
'Metallicity@ZAMS(1)': 'Metallicity_ZAMS',
'SemiMajorAxis@ZAMS': 'SemiMajorAxis_ZAMS',
'Eccentricity@ZAMS': 'Eccentricity_ZAMS',
}, inplace=True)
if self._save_pulsar_columns and psr_df is not None:
psr_col_rename_dict = {
'MT_History': 'PSR_MT_History',
'Mass(1)': 'PSR_Mass1',
'Mass(2)': 'PSR_Mass2',
'Pulsar_Mag_Field(1)': 'PSR_B1',
'Pulsar_Mag_Field(2)': 'PSR_B2',
'Pulsar_Spin_Down(1)': 'PSR_Pdot1',
'Pulsar_Spin_Down(2)': 'PSR_Pdot2',
'Pulsar_Spin_Freq(1)': 'PSR_Omega1',
'Pulsar_Spin_Freq(2)': 'PSR_Omega2',
'SemiMajorAxis': 'PSR_SemiMajorAxis',
'Stellar_Type(1)': 'PSR_Type1',
'Stellar_Type(2)': 'PSR_Type2',
'Time': 'PSR_Time',
'dT': 'PSR_dT'
}
psr_sn_col_rename_dict = {
'Mass_Core@CO(SN)1': 'PSR_SN_Mass_Core1',
'Mass_Core@CO(SN)2': 'PSR_SN_Mass_Core2',
'Mass_He_Core@CO(SN)1': 'PSR_SN_Mass_He_Core1',
'Mass_He_Core@CO(SN)2': 'PSR_SN_Mass_He_Core2',
'Mass_CO_Core@CO(SN)1': 'PSR_SN_Mass_CO_Core1',
'Mass_CO_Core@CO(SN)2': 'PSR_SN_Mass_CO_Core2',
'Fallback_Fraction(SN)1': 'PSR_SN_Fallback_Fraction1',
'Fallback_Fraction(SN)2': 'PSR_SN_Fallback_Fraction2',
'Kick_Magnitude(uK)1': 'PSR_SN_Kick_Magnitude1',
'Kick_Magnitude(uK)2': 'PSR_SN_Kick_Magnitude2',
'SystemicSpeed1': 'PSR_SN_SystemicSpeed1',
'SystemicSpeed2': 'PSR_SN_SystemicSpeed2',
'Time1': 'PSR_SN_Time1',
'Time2': 'PSR_SN_Time2'
}
psr_new_cols = [psr_col_rename_dict[old_col] for old_col in psr_col_rename_dict]
psr_new_cols += [psr_sn_col_rename_dict[old_col] for old_col in psr_sn_col_rename_dict]
psr_df.rename(columns=psr_col_rename_dict, inplace=True)
psr_df.rename(columns=psr_sn_col_rename_dict, inplace=True)
df.rename(columns=psr_col_rename_dict, inplace=True)
df.rename(columns=psr_sn_col_rename_dict, inplace=True)
for col in psr_new_cols:
if col in psr_df.columns:
df[col] = psr_df[col]
del psr_df
gc.collect()
elif self._save_pulsar_columns:
raise ValueError('_save_pulsars_columns is True, but psr_df is None.')
else:
pass
df.drop(labels=self.SNTAB_PSR_COLS_TOLOAD, axis=1, inplace=True)
# pandarallel turns all float32 cols to float64, so we have to convert them back to float32
for col in self.float32_cols:
df[col] = pd.to_numeric(df[col], downcast='float')
elapsed_time = time() - start_time
self.logger.debug(f'Done. Table processed in {elapsed_time} s.')
return df
[docs]
def gen_subdfs(self, df):
all_soft_cos_df = df[df['Stellar_Type1'].isin(CO_CODES) | df['Stellar_Type2'].isin(CO_CODES)]
bound_soft_cos_df = all_soft_cos_df[all_soft_cos_df['Unbound'] == 0]
bound_hard_cos_df = bound_soft_cos_df[bound_soft_cos_df['Stellar_Type1'].isin([13, 14]) & \
bound_soft_cos_df['Stellar_Type2'].isin([13, 14])]
return all_soft_cos_df, bound_soft_cos_df, bound_hard_cos_df
[docs]
def trim(self):
self.logger.info('Initializing trimmer...')
self._set_paths()
#pandarallel.initialize()
total_time0 = time()
for path in self.input_paths:
start_time = time()
df, psr_df = self._get_df_from_hdf(path)
if df is None:
warnings.warn('No data loaded from HDF file, stopping trimmer.')
return
else:
df = self._process_df(df, psr_df)
# allsoft_df, boundsoft_df, boundhard_df = self.gen_subdfs(df)
write_time0 = time()
self.logger.info(f'Writing to {self.trimmed_output_path}.')
df.to_parquet(path=self.trimmed_output_path, **PARQUET_SETTINGS)
# allsoft_df.to_parquet(path=Path(self.trimmed_output_path.parent, 'allsoft'+self.trimmed_output_path.name), **PARQUET_SETTINGS)
# boundsoft_df.to_parquet(path=Path(self.trimmed_output_path.parent, 'boundsoft'+self.trimmed_output_path.name), **PARQUET_SETTINGS)
# boundhard_df.to_parquet(path=Path(self.trimmed_output_path.parent, 'boundhard'+self.trimmed_output_path.name), **PARQUET_SETTINGS)
write_time = time() - write_time0
self.logger.debug(f'Done. Table written in {write_time} s.')
table_time = time() - start_time
self.logger.debug(f'Total table processing time: {table_time} s.')
total_time = time() - total_time0
self.logger.info(f'Done trimming {self.compas_output_path.stem}. Total elapsed time: {total_time} s.')
[docs]
class LogPFinder:
def __init__(self):
self.zams = ZAMSSystemGenerator(np.linspace(0.1, 150, 100), BINARIES_CORRELATED_TABLE_PATH)
self.zams.setup_sampler()
self.m1group = None
self.logp_options = None
self.logp_categories = None
[docs]
def set_m1(self, m1):
self.zams.m1_array = m1
self.m1group = self.zams._get_m1()[1]
self.logp_options = []
self.logp_categories = []
for group in self.m1group._v_children:
self.logp_options.append(np.float32(self.m1group[group].title))
self.logp_categories.append(self.m1group[group].title)
self.logp_options = np.array(self.logp_options)
self.logp_categories = np.array(self.logp_categories)
sorting = np.argsort(self.logp_options)
self.logp_options = self.logp_options[sorting]
self.logp_categories = self.logp_categories[sorting]
[docs]
def get_closest_logp(self, logp):
closest_logp_i = np.argmin(np.abs(self.logp_options - logp))
closest_logp = self.logp_categories[closest_logp_i]
return closest_logp
[docs]
def a_to_logp_table(self, a, m_tot):
cgs_a = (a * u.au).to(u.cm)
g = np.sqrt(4 * np.pi ** 2 / (ct.G.cgs * m_tot * ct.M_sun))
cgs_p = g * cgs_a ** (3 / 2)
logp = np.log10(cgs_p.to(u.d).value)
logp_table = self.get_closest_logp(logp)
# logp_table = np.around(logp, 4)
return logp_table
[docs]
class CompactBinaryPopulation:
"""Cross-match generated ZAMS populations and COMPAS output to produce a binary compact object population."""
def __init__(self, print_progress=True, parent_logger=None, canonical_distributions=False):
self.zams_population_path = None
self.compas_feh_dict = dict()
self.compas_feh_options = None
self.zams_redshift_feh_dict = dict()
self.n_processes = None
self.crossmatch_tol = 1e-5
self.print_progress = print_progress
self.canonical_initial_distributions = canonical_distributions
self._compas_proc_output_dir_path = None
self._compas_grids_folder = None
self.logger = self._get_logger(parent_logger)
@property
def compas_proc_output_dir_path(self):
if self._compas_proc_output_dir_path is None:
if self.canonical_initial_distributions:
self._compas_proc_output_dir_path = COMPAS_21XX_PROC_OUTPUT_DIR_PATH
else:
self._compas_proc_output_dir_path = COMPAS_12XX_PROC_OUTPUT_DIR_PATH
return self._compas_proc_output_dir_path
@property
def compas_grids_folder(self):
if self._compas_grids_folder is None:
if self.canonical_initial_distributions:
self._compas_grids_folder = COMPAS_21XX_GRIDS_PATH
else:
self._compas_grids_folder = COMPAS_12XX_GRIDS_PATH
return self._compas_grids_folder
def _get_logger(self, parent_logger):
if parent_logger is None:
loggername = '.'.join([__name__, self.__class__.__name__])
log_path = Path(LOG_PATH, loggername, datetime.now().strftime('%d-%m-%Y_%H:%M:%S.log'))
log_path.parent.mkdir(parents=True, exist_ok=True)
logger = create_logger(name=loggername, fpath=log_path, propagate=False)
else:
loggername = '.'.join([__name__, self.__class__.__name__])
logger = logging.getLogger(name=loggername)
logger.setLevel(logging.DEBUG)
return logger
def _get_compas_samples(self):
"""Load COMPAS output paths and corresponding metallicities."""
for sample_folder in self.compas_proc_output_dir_path.glob('*.parquet'):
feh = float(sample_folder.stem.split('=')[1].split('.')[0])/1e2
self.compas_feh_dict[feh] = sample_folder
self.compas_feh_options = np.array(sorted(self.compas_feh_dict.keys()))
def _get_zams_samples(self):
"""Load ZAMS sample paths and corresponding redshift, metallicity and other parameters."""
for sample_file in IGIMF_ZAMS_DIR_PATH.glob('z=*_feh=*_logsfr=*_1e4ZOH=*_1e2Mgal=*_1e2logn=*_logpool=*_' \
'igimf_zams_sample.parquet'):
z, feh, logsfr, zoh, mgal, logn, logpool, *_ = sample_file.stem.split('_')
z = float(z.split('=')[1]) # sample redshift
feh = float(feh.split('=')[1]) # sample [Fe/H]
logsfr = float(logsfr.split('=')[1]) # sample log10(SFR)
zoh = str(float(zoh.split('=')[1])/1e4) # sample Z_O/H
mgal = str(float(mgal.split('=')[1])/1e2) # sample galaxy stellar mass
logn = str(float(logn.split('=')[1])/1e2) # sample log10(galaxy number density)
logpool = float(logpool.split('=')[1]) # sample log10(mass pool size)
if z not in self.zams_redshift_feh_dict:
self.zams_redshift_feh_dict[z] = dict()
sample_params = {'redshift' : z,
'feh' : feh,
'logsfr' : logsfr,
'zoh' : zoh,
'mgal' : mgal,
'logn' : logn,
'logpoolsize' : logpool,
'path' : sample_file
}
self.zams_redshift_feh_dict[z][feh] = sample_params
def _crossmatch_df(self, zams_df, compas_sample_path, feh):
"""Cross-match a dataframe holding the ZAMS population with the nearest metallicity COMPAS output."""
# First, set up arrays to hold columns of the final cross-matched dataframe.
sample_size = len(zams_df)
m1_col = np.zeros(sample_size, np.float32)
m2_col = np.zeros(sample_size, np.float32)
coalescence_t_col = np.zeros(sample_size, np.float32)
unbound_col = np.ones(sample_size, np.float32) # whether the binary became unbound following a supernova (bool)
seed_col = np.empty(sample_size, object) # unique identifier from binary parameters
binary_type_col = np.empty(sample_size, object) # BHBH, BHNS, NSBH, NSNS or isolated (for mass accounting)
prev_progress = 0
progress = 0
hit_count = 0 # successful matches counter
iteration_times = [] # for progress updates to stdout
matched_df_i = 0 # index
for zams_i, row in zams_df.iterrows():
time0 = time()
# In the ZAMS sample, the 'Found' m1, as well as LogP, 'Found' q and e, are values taken directly from the
# same sampling table from which the initial binary grid for COMPAS is built, allowing for an exact match
# within the table's original float32 precision.
zams_m1 = np.float32(row['Mass_ZAMS1_Found'])
zams_logp = np.float32(row['LogOrbitalPeriod_ZAMS'])
zams_q = np.float32(row['MassRatioFound_ZAMS'])
zams_e = np.float32(row['Eccentricity_ZAMS'])
isolated_star = False
unevolved_binary = False
if zams_logp == 0.0:
isolated_star = True
elif zams_m1*zams_q < 0.1:
unevolved_binary = True
if isolated_star:
#print('ISOLATED')
match_dict = {'Mass_PostSN1': 0,
'Mass_PostSN2': 0,
'Unbound' : False,
'Coalescence_Time' : 0,
'SEED' : 'isolated',
'Binary_Type' : 'isolated'}
match_df = pd.DataFrame(match_dict, index=[0])
elif unevolved_binary:
match_dict = {'Mass_PostSN1': 0,
'Mass_PostSN2': 0,
'Unbound': False,
'Coalescence_Time': 0,
'SEED': 'unevolved',
'Binary_Type': 'unevolved'}
match_df = pd.DataFrame(match_dict, index=[0])
else:
# The COMPAS sample files are structured hierarchically: first-level folders correspond each to a m1, second
# -level to a m1,logP pair, and third-level files each to a m1,logP,q,e set. The corresponding m1 and logP
# are stored in the folder title to which the ZAMS binaries are matched.
compas_path_options = list(compas_sample_path.glob('*'))
m1_options = np.array([np.float32(path.name.split('=')[1]) for path in compas_path_options])
# Finding the closet parameters instead of an exact match helps avoid eventual floating-point errors.
m1path_closest_i = np.argmin(np.abs(m1_options - zams_m1))
compas_m1_path = compas_path_options[m1path_closest_i]
logp_path_options = list(compas_m1_path.glob('*'))
logp_options = np.array([np.float32(path.name.split('=')[1]) for path in logp_path_options])
logp_path_closest_i = np.argmin(np.abs(logp_options - zams_logp))
compas_logp_path = logp_path_options[logp_path_closest_i]
tol = self.crossmatch_tol
match_df = pd.read_parquet(
path=compas_logp_path,
engine='pyarrow',
filters=[
('Eccentricity_ZAMS', '<=', zams_e + np.float32(tol)),
('Eccentricity_ZAMS', '>=', zams_e - np.float32(tol)),
('Mass_Ratio_ZAMS', '<=', zams_q + np.float32(tol)),
('Mass_Ratio_ZAMS', '>=', zams_q - np.float32(tol))
],
columns=[
'Mass_PostSN1',
'Mass_PostSN2',
'Unbound',
'Coalescence_Time',
'SEED',
'Binary_Type',
'Mass_Ratio_ZAMS',
'Eccentricity_ZAMS'
],
use_threads=True
)
time1 = time() - time0
iteration_times.append(time1)
tolerance_warning = True
while match_df.SEED.nunique() > 1:
if self.canonical_initial_distributions:
# Because in this case e=0 always, for a given (m1,logp,q) there will be 10 repeated (q,e=0) lines
# in the table, which get passed to COMPAS.
match_qs = match_df.Mass_Ratio_ZAMS.unique()
match_es = match_df.Eccentricity_ZAMS.unique()
if len(match_qs) == 1 and len(match_es) == 1:
match_df = match_df.iloc[[0]]
continue
if tolerance_warning:
print(f'Lowering tolerance for {zams_m1, zams_logp, zams_q, zams_e} in {compas_logp_path}')
tolerance_warning = False
tol *= 0.9
match_df = pd.read_parquet(
path=compas_logp_path,
engine='pyarrow',
filters=[
('Eccentricity_ZAMS', '<=', zams_e + np.float32(tol)),
('Eccentricity_ZAMS', '>=', zams_e - np.float32(tol)),
('Mass_Ratio_ZAMS', '<=', zams_q + np.float32(tol)),
('Mass_Ratio_ZAMS', '>=', zams_q - np.float32(tol))
],
columns=[
'Mass_PostSN1',
'Mass_PostSN2',
'Unbound',
'Coalescence_Time',
'SEED',
'Binary_Type',
'Mass_Ratio_ZAMS',
'Eccentricity_ZAMS'
],
use_threads=True
)
if tol <= 1e-8:
print('Tolerance too low, checking gridfiles...')
gridfile_folder = Path(self.compas_grids_folder, compas_sample_path.name.split('.')[0],
'run_gridfiles')
m1_gridfile_options = list(gridfile_folder.glob('*_grid.txt'))
m1_grid_options = list(file.name.split('_')[1].split('=')[1] for file in m1_gridfile_options)
m1 = np.float32(compas_m1_path.name.split('=')[1])
closest_m1_i = np.argmin(np.abs(np.float32(m1_grid_options) - m1))
m1_grid = m1_gridfile_options[closest_m1_i]
match_seeds = match_df.SEED.to_numpy()
seed_suffix = match_seeds[0].split('_')[1:]
match_seeds = [seed.split('_')[0] for seed in match_seeds]
print(f'Looking for {match_seeds} in {m1_grid}')
def look_for_seeds(match_seeds):
match_lines = []
with m1_grid.open('r') as f:
for line in f.readlines():
for seed in match_seeds:
if seed in line:
match_lines.append([seed, line])
match_seeds.remove(seed)
if len(match_seeds) == 0:
return match_lines
match_lines = look_for_seeds(match_seeds)
matched_seed = 0
prev_d_logp = 10
for seed_line in match_lines:
seed, line = seed_line
settings = line.split(' ')
logp_i = settings.index('--orbital-period')
logp = np.log10(np.float32(settings[logp_i+1]))
d_logp = np.abs(logp - zams_logp)
if d_logp < prev_d_logp:
prev_d_logp = d_logp
matched_seed = seed
match_df = pd.read_parquet(
path=compas_logp_path,
engine='pyarrow',
filters=[
('SEED', '==', '_'.join([matched_seed, *seed_suffix]))
],
columns=[
'Mass_PostSN1',
'Mass_PostSN2',
'Unbound',
'Coalescence_Time',
'SEED',
'Binary_Type',
'Mass_Ratio_ZAMS',
'Eccentricity_ZAMS'
],
use_threads=True
)
if match_df.SEED.nunique() == 0:
#print(zams_m1, zams_logp, zams_q, np.float32(row['MassRatioChoice_ZAMS']), zams_e)
#print(compas_logp_path)
full_df = pd.read_parquet(
path=compas_logp_path,
engine='pyarrow',
columns=[
'Mass_Ratio_ZAMS',
'Eccentricity_ZAMS'
],
use_threads=True
)
#print(match_df)
#print(len(match_df), 'LEN')
#print('q,e pairs', np.array([match_df.Mass_Ratio_ZAMS.to_numpy(),
# match_df.Eccentricity_ZAMS.to_numpy()]).T)
if len(full_df.Eccentricity_ZAMS.unique()) == 1 and float(full_df.Eccentricity_ZAMS.unique()[0]) == 0.0:
max_q = max(full_df.Mass_Ratio_ZAMS.unique())
if zams_q > max_q:
print('Close massive binary')
match_dict = {'Mass_PostSN1': 0,
'Mass_PostSN2': 0,
'Unbound': False,
'Coalescence_Time': 0,
'SEED': 'merged_at_birth',
'Binary_Type': 'merged_at_birth'}
match_df = pd.DataFrame(match_dict, index=[0])
else:
full_df = pd.read_parquet(
path=compas_logp_path,
engine='pyarrow',
filters=[
('Mass_Ratio_ZAMS', '<=', zams_q + np.float32(tol)),
('Mass_Ratio_ZAMS', '>=', zams_q - np.float32(tol))
],
columns=[
'Eccentricity_ZAMS'
],
use_threads=True
)
if zams_e > max(full_df.Eccentricity_ZAMS.unique()):
print('Eccentric massive binary')
match_dict = {'Mass_PostSN1': 0,
'Mass_PostSN2': 0,
'Unbound': False,
'Coalescence_Time': 0,
'SEED': 'merged_at_birth',
'Binary_Type': 'merged_at_birth'}
match_df = pd.DataFrame(match_dict, index=[0])
if match_df.SEED.nunique() == 1:
hit_count += 1
match = match_df.iloc[0]
m1_col[matched_df_i] = match['Mass_PostSN1']
m2_col[matched_df_i] = match['Mass_PostSN2']
coalescence_t_col[matched_df_i] = match['Coalescence_Time']
unbound_col[matched_df_i] = match['Unbound']
seed_col[matched_df_i] = match['SEED']
binary_type_col[matched_df_i] = match['Binary_Type']
matched_df_i += 1
progress = 100*matched_df_i/sample_size
elif match_df.SEED.nunique() == 0:
print(zams_m1, zams_logp, zams_q, np.float32(row['MassRatioChoice_ZAMS']), zams_e)
print(compas_logp_path)
match_df = pd.read_parquet(
path=compas_logp_path,
engine='pyarrow',
filters=[
('Eccentricity_ZAMS', '<=', zams_e + np.float32(5e-1)),
('Eccentricity_ZAMS', '>=', zams_e - np.float32(5e-1)),
('Mass_Ratio_ZAMS', '<=', zams_q + np.float32(5e-1)),
('Mass_Ratio_ZAMS', '>=', zams_q - np.float32(5e-1))
],
columns=[
'Mass_PostSN1',
'Mass_PostSN2',
'Unbound',
'Coalescence_Time',
'SEED',
'Binary_Type',
'Mass_Ratio_ZAMS',
'Eccentricity_ZAMS'
],
use_threads=True
)
print(match_df)
print(len(match_df), 'LEN')
print('q,e pairs', np.array([match_df.Mass_Ratio_ZAMS.to_numpy(),
match_df.Eccentricity_ZAMS.to_numpy()]).T)
warnings.warn('Could not match system. Consider raising parameter match tolerances.')
break
else:
print(zams_m1, zams_logp, zams_q, zams_e)
print(match_df)
warnings.warn('Multiple matches found in COMPAS sample. Consider lowering parameter match tolerances.')
break
if self.print_progress and progress > prev_progress+1:
elapsed_time = np.sum(iteration_times)
avg_match_rate = matched_df_i/elapsed_time
expected_time = (sample_size-matched_df_i) / avg_match_rate
print(f'Progress: {progress:.4f} % {format_time(elapsed_time)}<'
f'{format_time(expected_time)} at {avg_match_rate:.2f} match / s')
prev_progress = progress
# As zams_df already holds the initial parameter columns, simply add the final parameter columns to it.
zams_df['Mass_PostSN1'] = m1_col
zams_df['Mass_PostSN2'] = m2_col
zams_df['Coalescence_Time'] = coalescence_t_col
zams_df['Unbound'] = unbound_col
zams_df['SEED'] = seed_col
zams_df['Binary_Type'] = binary_type_col
return zams_df
def _pick_sample(self):
sample_options = []
for z in self.zams_redshift_feh_dict.keys():
zams_feh_dict = self.zams_redshift_feh_dict[z]
for feh in zams_feh_dict.keys():
zams_dict = zams_feh_dict[feh]
zams_z = zams_dict['redshift']
zams_feh = zams_dict['feh']
sample_option = f'z={zams_z:.3f}, [Fe/H]={zams_feh:.2f}'
sample_options.append((sample_option,
(zams_z, zams_feh)))
question = [
inquirer.List(
'choice',
message='Please pick a ZAMS sample to match to the COMPAS output',
choices=sample_options
)
]
sample_choice = inquirer.prompt(question)['choice']
return sample_choice
def _crossmatch_sample(self, zams_sample_redshift, zams_sample_feh):
zams_sample_dict = self.zams_redshift_feh_dict[zams_sample_redshift][zams_sample_feh]
zams_sample_path = zams_sample_dict['path']
self.logger.info(f'Now matching z={zams_sample_redshift}, [Fe/H]={zams_sample_feh} ZAMS sample ' \
f'at {zams_sample_path}.')
feh_match = self.compas_feh_options[np.argmin(np.abs(self.compas_feh_options - zams_sample_feh))]
compas_sample_path = self.compas_feh_dict[feh_match]
matched_sample_path = Path(COMPACT_OBJ_DIR_PATH,
zams_sample_path.stem + f'_compasfeh={zams_sample_feh:.2f}.snappy.parquet')
self.logger.info(f'Closest COMPAS sample: [Fe/H] = {zams_sample_feh} at {compas_sample_path}')
zams_df = pd.read_parquet(path=zams_sample_path,
engine='pyarrow',
use_threads=True)
sub_zams_dfs_array = np.array_split(zams_df, self.n_processes)
compas_sample_path_array = np.tile(compas_sample_path, self.n_processes)
feh_match_array = np.tile(feh_match, self.n_processes)
matched_df_list = []
with ProcessPoolExecutor(self.n_processes) as executor:
#for df in executor.map(crossmatch, sub_zams_dfs_array):
for df in executor.map(self._crossmatch_df, sub_zams_dfs_array, compas_sample_path_array, feh_match_array):
matched_df_list.append(df)
matched_df = pd.concat(matched_df_list)
matched_df.reset_index(inplace=True, drop=True)
del matched_df_list
gc.collect()
matched_m1s = np.unique(matched_df['Mass_ZAMS1_Found'])
for m1 in matched_m1s:
temp_df = matched_df[matched_df['Mass_ZAMS1_Found'] == m1]
temp_df.to_parquet(path=matched_sample_path,
engine='pyarrow',
compression='snappy',
partition_cols=['Mass_ZAMS1_Found',
'LogOrbitalPeriod_ZAMS'],
use_threads=True)
del temp_df
gc.collect()
del matched_df
gc.collect()
def _crossmatch_single_sample(self):
sample_choice = self._pick_sample()
self._crossmatch_sample(*sample_choice)
def _crossmatch_full_sample(self):
for z in self.zams_redshift_feh_dict.keys():
zams_feh_dict = self.zams_redshift_feh_dict[z]
for feh in zams_feh_dict.keys():
self._crossmatch_sample(z, feh)
[docs]
def crossmatch_sample(self):
self.logger.info('Initializing...')
self._get_zams_samples()
self._get_compas_samples()
print('Please enter the number of parallel processes to run:')
self.n_processes = int(input())
choices = ['All', 'Single']
question = [
inquirer.List(
'choice',
message=f'Cross-match a single ZAMS sample or all ZAMS samples in {IGIMF_ZAMS_DIR_PATH}?',
choices=choices
)
]
choice = inquirer.prompt(question)['choice']
if choice == 'All':
self._crossmatch_full_sample()
else:
self._crossmatch_single_sample()
[docs]
class MergerRates:
COLS_TO_LOAD_ESSENTIAL = [
'Unbound',
'Coalescence_Time',
'Mass_ZAMS1_Found',
'Mass_PostSN1',
'Mass_PostSN2',
'Binary_Type',
'SystemMass',
'CompanionNumber'
]
COLS_TO_LOAD_EXTENDED = [
'Mass_ZAMS2_Found',
'Mass_ZAMS3_Found',
'Mass_ZAMS4_Found',
'Mass_ZAMS5_Found',
'LogOrbitalPeriod_ZAMS',
'Eccentricity_ZAMS'
] + COLS_TO_LOAD_ESSENTIAL
CATEGORY_COLS = [
'Unbound',
'Binary_Type',
'CompanionNumber'
]
def __init__(self, sample_dir_path, zams_grid_path, sfrd_model, invariant_imf=False,
extended_load=False, load_bcos_only=False, min_redshift=0.0, max_redshift=10.0, min_feh=-5.0,
max_feh=0.5, progenitor_m_min=0.8, progenitor_m_max=150.0, convert_time=True, new_method=False,
parent_logger=None):
self.sample_dir_path = sample_dir_path
self.zams_grid_path = zams_grid_path
self.merger_class = None
self.sfrd_model = sfrd_model
self.invariant_imf = invariant_imf
self.canon_sfrd = invariant_imf
self.extended_load = extended_load
self.load_bcos_only = load_bcos_only
self.min_redshift = min_redshift
self.max_redshift = max_redshift
self.min_feh = min_feh
self.max_feh = max_feh
self.time_resolution = None
self.sample_progenitor_m_min = progenitor_m_min
self.sample_progenitor_m_max = progenitor_m_max
self._star_m_min = min(0.08, self.sample_progenitor_m_min)
self._star_m_max = max(150.0, self.sample_progenitor_m_max)
self._convert_time = convert_time
self._new_method = new_method
self.sfrd = self._get_sfrd()
self._cols_to_load = None
self._sample_paths = None
self.sample_properties_dict = dict()
self.sample_mtot_per_ncomp_dict = dict()
self.sample_starforming_mass_dict = dict()
self.sample_redshift_arr = None
self.ip_redshift_arr = None
self.sample_feh_arr = None
self.sample_redshift_feh_bins_dict = dict()
self.sample_df = None
self.merger_df = None
self._full_age_bin_edges = None
self._full_age_bin_widths = None
self._full_age_bin_centers = None
self._physical_age_bin_edges = None
self._physical_age_bin_widths = None
self._physical_age_bin_centers = None
self._dz_dfeh_mrates_dict = dict()
self._dz_dfeh_dage_mrates_arr = None
self._per_age_redshift_feh_fits = None
self._dz_dage_mrates_arr = None
self._dz_dpopage_mrates_arr = None
self._dpopage_interpolators = list()
self._ip_dz_dpopage_mrates_arr = None
self._ip_dz_dage_mrates_arr = None
self._per_age_redshift_fits = None
self.mrate_arr = None
self.logger = self._get_logger(parent_logger)
def _get_logger(self, parent_logger):
if parent_logger is None:
loggername = '.'.join([__name__, self.__class__.__name__])
log_path = Path(LOG_PATH, loggername, datetime.now().strftime('%d-%m-%Y_%H:%M:%S.log'))
log_path.parent.mkdir(parents=True, exist_ok=True)
logger = create_logger(name=loggername, fpath=log_path, propagate=False)
else:
loggername = '.'.join([__name__, self.__class__.__name__])
logger = logging.getLogger(name=loggername)
logger.setLevel(logging.DEBUG)
return logger
@staticmethod
def _get_total_bco_mass(row):
"""Calculate the total binary compact object mass for a dataframe row."""
return row.Mass_PostSN1 + row.Mass_PostSN2
@staticmethod
def _get_chirp_mass(row):
"""Calculate the chirp mass for a dataframe row."""
m1 = row.Mass_PostSN1
m2 = row.Mass_PostSN2
if m1 == 0:
return 0
else:
return (m1 * m2) ** (3 / 5) / (m1 + m2) ** (1 / 5)
@staticmethod
def _get_merger_age(df):
"""Calculate Universe age at merger for a dataframe row."""
redshift_zams = df.Redshift_ZAMS.to_numpy()
age_zams = cosmo.age(redshift_zams).value
t_col = df.Coalescence_Time.to_numpy()
age_merger = age_zams + t_col
return age_merger
@staticmethod
def _get_redshift_from_age(age_arr):
"""Calculate redshift from Universe age."""
redshift_arr = newton(lambda z_arr: cosmo.age(z_arr).value - age_arr, np.zeros(len(age_arr)))
return redshift_arr
@staticmethod
def _get_bin_frequency_heights(x_arr, x_bin_edges):
bin_frequencies = np.zeros(x_bin_edges.shape[0] - 1, np.float32)
previous_x_arr_len = len(x_arr)
for i, (edge0, edge1) in enumerate(zip(x_bin_edges[:-1], x_bin_edges[1:])):
x_arr = x_arr[x_arr >= edge0]
x_arr_len = len(x_arr)
bin_count = previous_x_arr_len - x_arr_len
previous_x_arr_len = x_arr_len
bin_frequencies[i] = bin_count / (edge1 - edge0)
return bin_frequencies
@staticmethod
def _get_linear_fit(xy0, xy1):
x0, y0 = xy0
x1, y1 = xy1
slope = (y1 - y0) / (x1 - x0)
intercept = y0 - slope * x0
return np.array([x0, x1, slope, intercept])
@staticmethod
def _get_linear_fit_area(linear_fit):
x0, x1, slope, intercept = linear_fit
if x0 == x1:
return 0.0
else:
return slope * (x1 * x1 - x0 * x0) / 2 + intercept * (x1 - x0)
@property
def sfrd_model(self):
"""Which star formation rate density model from Chrulinska et al. (2020) to use."""
return self._sfrd_model
@sfrd_model.setter
def sfrd_model(self, model):
models = ['lowmet', 'midmet', 'highmet']
if model not in models:
raise ValueError(f'sfrd_model must be one of {models}.')
self._sfrd_model = model
@property
def canon_sfrd(self):
"""Whether the star formation rate grid to be loaded should assume a Canonical Kroupa IMF."""
return self._canon_sfrd
@canon_sfrd.setter
def canon_sfrd(self, invariant_imf):
if invariant_imf:
self._canon_sfrd = True
else:
self._canon_sfrd = False
@property
def cols_to_load(self):
if self._cols_to_load is None:
if self.extended_load:
self._cols_to_load = self.COLS_TO_LOAD_EXTENDED
else:
self._cols_to_load = self.COLS_TO_LOAD_ESSENTIAL
return self._cols_to_load
@property
def sample_paths(self):
if self._sample_paths is None:
self._sample_paths = list(self.sample_dir_path.glob('*.parquet'))
return self._sample_paths
def _get_sfrd(self):
sfrd = ChruslinskaSFRD(self.sfrd_model, self.canon_sfrd, per_redshift_met_bin=self._new_method)
sfrd.set_grid()
return sfrd
def _set_merger_class(self, class_):
classes = ['BHBH', 'BHNS', 'NSBH', 'NSNS']
if class_ not in classes:
raise ValueError(f'merger_class must be one of {classes}.')
elif class_ == 'NSBH' or class_ == 'BHNS':
class_ = ['BHNS', 'NSBH']
else:
class_ = [class_]
self.merger_class = class_
def _set_sample_properties_dict(self):
for path in self.sample_paths:
redshift, feh, logsfr, zoh1e4, mgal1e2, logn1e2, logpool, *_, compasfeh = path.stem.split('_')
redshift = np.float32(redshift.split('=')[1]) # z_{ZAMS}
feh = np.float32(feh.split('=')[1]) # [Fe/H]
logsfr = np.float32(logsfr.split('=')[1]) # log10(SFR)
zoh = np.float32(zoh1e4.split('=')[1]) / 1e4 # Z_{O/H}
mgal = np.float32(mgal1e2.split('=')[1]) / 1e2 # stellar mass of the corresponding galaxy
logn = np.float32(logn1e2.split('=')[1]) / 1e2 # log(number of stars in sample)
logpool = np.float32(logpool.split('=')[1]) # log(length of the original mass sampling pool)
compas_feh = np.float32(compasfeh.split('=')[1].rstrip('.snappy')) # closest [Fe/H] from the COMPAS samples
sample_properties = {
'redshift': redshift,
'feh': feh,
'compas_feh': compas_feh,
'zoh': zoh,
'logsfr': logsfr,
'mgal': mgal,
'logn': logn,
'logpool': logpool,
'path': path
}
if redshift not in self.sample_properties_dict:
self.sample_properties_dict[redshift] = dict()
self.sample_properties_dict[redshift][feh] = sample_properties
def _fix_df_dtypes(self, df):
for col in self.CATEGORY_COLS:
if df[col].dtype != 'category':
df[col] = df[col].astype('category')
for col in self.cols_to_load:
if col not in self.CATEGORY_COLS and df[col].dtype != 'float32':
df[col] = df[col].astype('float32')
return df
def _get_sample_df(self, redshift_batch):
sample_df = pd.DataFrame(columns=self.cols_to_load)
sample_n = sum([len(self.sample_properties_dict[redshift].keys()) for redshift in redshift_batch])
sample_counter = 1
for redshift in redshift_batch:
redshift_dict = self.sample_properties_dict[redshift]
for feh_ in redshift_dict:
sample_dict = redshift_dict[feh_]
z_zams = sample_dict['redshift']
feh = sample_dict['feh']
self.logger.info(f'Loading z={z_zams:.2f}, [Fe/H]={feh:.2f} ({sample_counter}/{sample_n}).')
if self.load_bcos_only:
filters = [('Binary_Type', 'in', ['BHBH', 'BHNS', 'NSBH', 'NSNS'])]
else:
filters = None
df = pd.read_parquet(
path=sample_dict['path'],
columns=self.cols_to_load,
filters=filters,
# Although loading only BCOs would be much faster and cheaper, we need to load all binary types at
# first in order to properly count the sample size when normalizing the imf for computing the
# star-forming mass. In the future this will be done, and the information stored, while generating
# the initial sample.
engine='pyarrow',
use_threads=True
)
df['Redshift_ZAMS'] = [z_zams] * len(df)
df['FeH'] = [feh] * len(df)
df = self._fix_df_dtypes((df))
sample_df = pd.concat((sample_df, df))
self.logger.info(f'Added {len(df):.2e} rows to sub-sample dataframe.')
del df
gc.collect()
sample_counter += 1
sample_df.reset_index(inplace=True, drop=True)
sample_df = self._fix_df_dtypes(sample_df)
return sample_df
def _set_sample_df(self, batches_n=1):
redshift_batches = np.array_split(np.array([*self.sample_properties_dict.keys()]), batches_n)
self.sample_df = pd.DataFrame(columns=self.cols_to_load)
self.sample_df = self._fix_df_dtypes(self.sample_df)
for redshift_batch in redshift_batches:
sample_subdf = self._get_sample_df(redshift_batch)
self.sample_df = pd.concat((self.sample_df, sample_subdf))
self.logger.info(f'Added {len(sample_subdf):.2e} rows sub-sample dataframe to sample dataframe.')
del sample_subdf
gc.collect()
sample_df_memory_usage_gb = self.sample_df.memory_usage(deep=True).sum() / (1024 ** 3)
self.logger.info(f'Done loading subsample to memory. Total memory usage from sample dataframe: '
f'{sample_df_memory_usage_gb:.2f} GB')
sleep(10)
self.sample_df.reset_index(inplace=True, drop=True)
#self.sample_df = self._fix_df_dtypes(self.sample_df)
self.logger.info(f'Done loading {len(self.sample_df):.2e} rows to sample dataframe.')
[docs]
def load_sample_to_memory(self, batches_n=1):
self.logger.info('Search sample folder for valid files...')
self._set_sample_properties_dict()
self.logger.info(f'Found {len(self.sample_paths)} valid files, loading to sample dataframe...')
self._set_sample_df(batches_n)
sample_df_memory_usage_gb = self.sample_df.memory_usage(deep=True).sum() / (1024 ** 3)
ncomp_summary = self.sample_df.CompanionNumber.value_counts()
self.logger.info(f'Multiplicity summary: \n{ncomp_summary} \n '
f'Please check this is the correct sample before proceeding.')
[docs]
def get_mtot_per_ncomp(self):
self.logger.info('Getting raw sample star-forming masses...')
sample_n = len(self.sample_paths)
sample_counter = 1
for redshift in self.sample_properties_dict:
if redshift not in self.sample_mtot_per_ncomp_dict:
self.sample_mtot_per_ncomp_dict[redshift] = dict()
redshift_dict = self.sample_mtot_per_ncomp_dict[redshift]
for feh in self.sample_properties_dict[redshift]:
self.logger.info(f'Working for z={redshift:.2f}, [Fe/H]={feh:.2f} sample ({sample_counter}/{sample_n})')
if feh not in redshift_dict:
redshift_dict[feh] = dict()
feh_dict = redshift_dict[feh]
for ncp in self.sample_df.CompanionNumber.unique():
ncp_df = self.sample_df[np.logical_and(self.sample_df.Redshift_ZAMS == redshift,
self.sample_df.FeH == feh,
self.sample_df.CompanionNumber == ncp)]
sf_mass = ncp_df.SystemMass.sum()
feh_dict[ncp] = sf_mass
del ncp_df
gc.collect()
sample_counter += 1
@staticmethod
def _kroupa_integral(k1, k2, m0, m1, variable='number'):
a1 = 1.3
a2 = 2.3
if variable == 'mass':
a1 += 1
a2 += 1
if m0 < 0.5:
if m1 < 0.5:
int = k1 / (1 - a1) * (m1 ** (1 - a1) - m0 ** (1 - a1))
else:
int = k1/(1-a1) * (0.5**(1-a1) - m0**(1-a1))
int += k2/(1-a2) * (m1**(1-a2) - 0.5**(1-a2))
else:
int = k2 / (1 - a2) * (m1 ** (1 - a2) - m0 ** (1 - a2))
return int
[docs]
def set_merger_df(self, merger_class):
self._set_merger_class(merger_class)
#pandarallel.initialize(progress_bar=False)
self.merger_df = self.sample_df[(self.sample_df.Unbound == 0) &
(self.sample_df.Binary_Type.isin(self.merger_class))]
# remove systems that would not have merged by now no matter how old the progenitors
self.merger_df.Coalescence_Time = self.merger_df.Coalescence_Time / 1e9 # yr -> Gyr
self.merger_df = self.merger_df[self.merger_df.Coalescence_Time <= cosmo.age(0)]
# set up the rest of the dataframe
#self.merger_df['Chirp_Mass'] = self.merger_df.parallel_apply(self._get_chirp_mass, axis=1)
#self.merger_df['Total_Mass'] = self.merger_df.parallel_apply(self._get_total_bco_mass, axis=1)
self.merger_df['Age_Merger'] = self._get_merger_age(self.merger_df)
#self.merger_df['Age_Merger'] = self.merger_df.parallel_apply(self._get_merger_age, axis=1)
self.merger_df['Redshift_Merger'] = self._get_redshift_from_age(self.merger_df.Age_Merger.to_numpy())
#self.merger_df['Redshift_Merger'] = self.merger_df.parallel_apply(
# lambda row: self._get_redshift_from_age(row.Age_Merger),
# axis=1
#)
def _set_bins(self, time_resolution=0.1):
self.time_resolution = time_resolution
self._full_age_bin_edges = np.arange(cosmo.age(self.max_redshift).value,
self.merger_df.Age_Merger.max() + self.time_resolution,
self.time_resolution) # Gyr
self._full_age_bin_widths = np.tile([self.time_resolution], self._full_age_bin_edges.shape[0] - 1)
self._full_age_bin_centers = np.array([(age0+age1)/2 for age0, age1 in zip(self._full_age_bin_edges[:-1],
self._full_age_bin_edges[1:])])
self._physical_age_bin_edges = self._full_age_bin_edges[self._full_age_bin_edges < cosmo.age(0).value]
self._physical_age_bin_widths = np.tile([self.time_resolution], self._physical_age_bin_edges.shape[0] - 1)
self._physical_age_bin_centers = np.array([
(age0+age1)/2 for age0, age1 in zip(self._physical_age_bin_edges[:-1], self._physical_age_bin_edges[1:])
])
self._full_redshift_bin_edges = self._get_redshift_from_age(self._full_age_bin_edges)
#self._full_redshift_bin_edges = np.array([self._get_redshift_from_age(age) for age in self._full_age_bin_edges])
self._physical_redshift_bin_edges = self._full_redshift_bin_edges[self._full_redshift_bin_edges >= 0.0]
sample_redshifts = list()
sample_fehs = list()
for redshift in self.sample_properties_dict:
sample_redshifts.append(redshift)
subsample_fehs = list()
for feh in self.sample_properties_dict[redshift]:
subsample_fehs.append(feh)
subsample_fehs = np.sort(subsample_fehs)
sample_fehs.append(subsample_fehs)
redshift_sorted_indices = np.argsort(sample_redshifts)
self.sample_redshift_arr = np.array(sample_redshifts)[redshift_sorted_indices]
self.sample_feh_arr = np.array(sample_fehs)[redshift_sorted_indices]
sfrd_grid_df = pd.read_pickle(self.zams_grid_path)
sfrd_grid_df = sfrd_grid_df[['Redshift_ZAMS', 'FeH', 'Redshift_Bin_Edges', 'ZOH_Bin_Edges']]
for redshift in sfrd_grid_df.Redshift_ZAMS.unique():
df = sfrd_grid_df[sfrd_grid_df.Redshift_ZAMS == redshift]
sample_redshift_i = np.argmin(np.abs(self.sample_redshift_arr - redshift))
sample_redshift = self.sample_redshift_arr[sample_redshift_i]
sample_fehs = self.sample_feh_arr[sample_redshift_i]
for feh in df.FeH:
row = df[df.FeH == feh].iloc[0]
sample_feh_i = np.argmin(np.abs(sample_fehs - feh))
sample_feh = sample_fehs[sample_feh_i]
if sample_redshift not in self.sample_redshift_feh_bins_dict:
self.sample_redshift_feh_bins_dict[sample_redshift] = dict()
self.sample_redshift_feh_bins_dict[sample_redshift]['redshift_bin_edges'] = row.Redshift_Bin_Edges
redshift_dict = self.sample_redshift_feh_bins_dict[sample_redshift]
feh_bin_edges = (ZOH_to_FeH(row.ZOH_Bin_Edges[0]), ZOH_to_FeH(row.ZOH_Bin_Edges[1]))
redshift_dict[sample_feh] = feh_bin_edges
min_sfrd_redshift = min(list(self.sample_redshift_feh_bins_dict.keys()))
if self._get_redshift_from_age(self._physical_age_bin_centers[[-1]])[0] < min_sfrd_redshift:
new_limit = cosmo.age(min_sfrd_redshift).value
self._physical_age_bin_centers[-1] = new_limit
def _set_dz_dfeh_dage_mrates_arr(self):
"""Get the volumetric merger rate per z_zams and [Fe/H] bin, in yr-1 Gpc-3."""
self._dz_dfeh_dage_mrates_arr = np.zeros((*self.sample_feh_arr.shape, self._full_age_bin_centers.shape[0]),
np.float32)
for i_redshift_zams, redshift_zams in enumerate(self.sample_redshift_arr):
# self._dz_dfeh_mrates_dict[redshift_zams] = dict()
for i_feh, feh in enumerate(self.sample_feh_arr[i_redshift_zams]):
starforming_mass = self.sample_starforming_mass_dict[redshift_zams][feh] # (Mo)
dz_dfeh_merger_df = self.merger_df[(self.merger_df.Redshift_ZAMS == redshift_zams) &
(self.merger_df.FeH == feh)]
dz_dfeh_merger_age_arr = np.sort(dz_dfeh_merger_df.Age_Merger.to_numpy()) # (Gyr)
age_bin_heights = self._get_bin_frequency_heights(x_arr=dz_dfeh_merger_age_arr, # dN/dt (Gyr-1)
x_bin_edges=self._full_age_bin_edges)
age_bin_heights /= starforming_mass # dN/dt dMsf (Gyr-1 Mo-1)
age_bin_heights *= 1e9 # dN/dt dMsf (yr-1 Mo-1)
log_sfrd = self.sfrd.get_logsfrd(feh, redshift_zams) # log10(dMsf/dVc dz_zams dFeH)
# ~log10(dMsf/dt dVc dz_zams dFeH)~
# log10(Mo Mpc-3)
# ~(log(Mo Myr-1 Mpc-3))~
age_bin_sfmass_density = 10**log_sfrd / 1e9 # dMsf/dVc dz_zams dFeH (Mo Gpc-3)
# dMsf/dVc dz_zams dFeH(Mo Gpc-3)
if self._convert_time:
# convert time from the source (in which the SFR is measured) to the observer frame (in which the
# merger rate is measured), dt_s/dt_o = 1 / 1+z, and SFRD = dM_sf/dt_s dVc
age_bin_heights /= 1 + redshift_zams
#age_bin_sfmass_densities = sfrd * self._full_age_bin_widths # dMsf/dVc dz_zams dFeH (Mo Gpc-3)
age_bin_heights *= age_bin_sfmass_density # dN/dt dVc dz_zams dFeH (yr-1 Gpc-3)
if not self._new_method:
delta_z_zams = np.abs(self.sample_redshift_feh_bins_dict[redshift_zams]['redshift_bin_edges'][0] -
self.sample_redshift_feh_bins_dict[redshift_zams]['redshift_bin_edges'][1])
delta_feh = np.abs(self.sample_redshift_feh_bins_dict[redshift_zams][feh][0] -
self.sample_redshift_feh_bins_dict[redshift_zams][feh][1])
age_bin_heights /= delta_z_zams * delta_feh # dN/dt dVc delta_z_zams delta FeH (yr-1 Gpc-3)
# self._dz_dfeh_mrates_dict[redshift_zams][feh] = age_bin_heights
self._dz_dfeh_dage_mrates_arr[i_redshift_zams, i_feh] = age_bin_heights
def _get_feh_fits(self):
# We reorder the array columns to the order we must access them in for fitting over [Fe/H].
dage_dz_dfeh_mrates_arr = np.transpose(self._dz_dfeh_dage_mrates_arr, (2, 0, 1))
max_fehs_per_redshift = max([arr.shape[0] for arr in self.sample_feh_arr])
per_age_redshift_feh_fits = np.zeros((self._full_age_bin_widths.shape[0],
self.sample_redshift_arr.shape[0],
max_fehs_per_redshift - 1 + 2,
4),
np.float32)
for i_age, dz_dfeh_mrates_arr in enumerate(dage_dz_dfeh_mrates_arr):
for i_redshift, dfeh_mrates_arr in enumerate(dz_dfeh_mrates_arr):
feh_mrate0 = [self.min_feh,
0.0]
for i_feh, mrate1 in enumerate(dfeh_mrates_arr):
feh_mrate1 = [self.sample_feh_arr[i_redshift, i_feh],
mrate1]
per_age_redshift_feh_fits[i_age, i_redshift, i_feh] = self._get_linear_fit(feh_mrate0, feh_mrate1)
feh_mrate0 = feh_mrate1
feh_mrate1 = [self.max_feh,
0.0]
per_age_redshift_feh_fits[i_age, i_redshift, i_feh+1] = self._get_linear_fit(feh_mrate0, feh_mrate1)
return per_age_redshift_feh_fits
def _get_boundary_fit_limit(self, fit, variable, boundary):
x0, x1, slope, intercept = fit
if variable == 'feh':
min_var = self.min_feh
max_var = self.max_feh
elif variable == 'redshift':
min_var = self.min_redshift
max_var = self.max_redshift
else:
raise ValueError(f'Variable should be "feh" or "redshift", but {variable} was passed.')
if slope != 0:
x_intercept = -intercept / slope
else:
x_intercept = None
if boundary == 'lower':
if x_intercept is None or slope < 0:
var_mrate0 = (min_var, 0)
var_mrate1 = (x0, slope * x0 + intercept)
boundary_fit = self._get_linear_fit(var_mrate0, var_mrate1)
fixed_fit_edge = x0
else:
boundary_fit = (0, 0, 0, 0)
fixed_fit_edge = max([min_var, x_intercept])
elif boundary == 'upper':
if x_intercept is None or slope > 0:
var_mrate0 = (x1, slope * x1 + intercept)
var_mrate1 = (max_var, 0)
boundary_fit = self._get_linear_fit(var_mrate0, var_mrate1)
fixed_fit_edge = x1
else:
boundary_fit = (0, 0, 0, 0)
fixed_fit_edge = min([max_var, x_intercept])
else:
raise ValueError(f'Boundary parameter must be one of ["lower","upper"], not {boundary}.')
return boundary_fit, fixed_fit_edge
def _set_dz_dage_mrates_arr(self):
"""Get the volumetric merger rate per z_zams bin, integrated over [Fe/H], in yr-1 Gpc-3."""
self._per_age_redshift_feh_fits = self._get_feh_fits()
self._dz_dage_mrates_arr = np.zeros((self._per_age_redshift_feh_fits.shape[1],
self._per_age_redshift_feh_fits.shape[0]),
np.float32)
for i_age, per_redshift_feh_fits in enumerate(self._per_age_redshift_feh_fits):
for i_redshift, per_feh_fits in enumerate(per_redshift_feh_fits):
per_redshift_age_mrate = 0
for i_feh, fit in enumerate(per_feh_fits):
x0, x1, slope, intercept = fit
if i_feh == 0:
boundary_fit, x0 = self._get_boundary_fit_limit(fit, boundary='lower', variable='feh')
elif i_feh == per_feh_fits.shape[0] - 1:
boundary_fit, x1 = self._get_boundary_fit_limit(fit, boundary='upper', variable='feh')
else:
boundary_fit = np.zeros(4)
mrate = self._get_linear_fit_area((x0, x1, slope, intercept))
boundary_mrate = self._get_linear_fit_area(boundary_fit)
per_redshift_age_mrate += mrate + boundary_mrate
self._dz_dage_mrates_arr[i_redshift, i_age] = per_redshift_age_mrate
def _set_dz_dpopage_mrates_arr(self):
self._dz_dpopage_mrates_arr = np.zeros(self._dz_dage_mrates_arr.shape, np.float32)
for i_redshift, dage_mrates_arr in enumerate(self._dz_dage_mrates_arr):
start_i = next((i for i, x in enumerate(dage_mrates_arr) if x), -1)
nonzero_dage_mrates_arr = dage_mrates_arr[start_i:]
dpopage_mrates_arr = np.concatenate((
nonzero_dage_mrates_arr,
np.zeros(self._full_age_bin_centers.shape[0]-nonzero_dage_mrates_arr.shape[0], np.float32)
))
self._dz_dpopage_mrates_arr[i_redshift] = dpopage_mrates_arr
def _set_ip_dz_dpopage_mrates_arr(self):
self.ip_redshift_arr = self._get_redshift_from_age(self._physical_age_bin_centers)
#self.ip_redshift_arr = np.array([self._get_redshift_from_age(age) for age in self._physical_age_bin_centers])
self._ip_dz_dpopage_mrates_arr = np.zeros([self.ip_redshift_arr.shape[0], self._dz_dpopage_mrates_arr.shape[1]])
dpopage_dz_mrates_arr = self._dz_dpopage_mrates_arr.T
ip_dpopage_dz_mrates_arr = self._ip_dz_dpopage_mrates_arr.T
for i_age, dz_mrates_arr in enumerate(dpopage_dz_mrates_arr):
interpolator = interp1d(self.sample_redshift_arr, dz_mrates_arr, kind='linear')
ip_dpopage_dz_mrates_arr[i_age] = interpolator(self.ip_redshift_arr)
self._ip_dz_dpopage_mrates_arr = ip_dpopage_dz_mrates_arr.T
def _set_ip_dz_dage_mrates_arr(self):
self._ip_dz_dage_mrates_arr = np.zeros(self._ip_dz_dpopage_mrates_arr.shape, np.float32)
for ip_redshift_i, dpopage_mrates_arr in enumerate(self._ip_dz_dpopage_mrates_arr):
redshift = self.ip_redshift_arr[ip_redshift_i]
first_physical_redshift_bin_edge_i = next((i for i, z in enumerate(self._full_redshift_bin_edges)
if z < redshift)) - 1
if first_physical_redshift_bin_edge_i == 0:
last_physical_age_bin_edge_i = None
else:
last_physical_age_bin_edge_i = -first_physical_redshift_bin_edge_i
dage_mrates_arr = dpopage_mrates_arr[:last_physical_age_bin_edge_i]
dage_mrates_arr = np.concatenate((np.zeros(self._full_age_bin_centers.shape[0] - dage_mrates_arr.shape[0]),
dage_mrates_arr))
self._ip_dz_dage_mrates_arr[ip_redshift_i] = dage_mrates_arr
def _get_z_fits(self):
ip_dage_dz_mrates_arr = self._ip_dz_dage_mrates_arr.T
per_age_redshift_fits = np.zeros((self._physical_age_bin_centers.shape[0],
self.ip_redshift_arr.shape[0] -1 + 2,
4),
np.float32)
for i_age, dz_mrates_arr in enumerate(ip_dage_dz_mrates_arr):
if i_age == self._physical_age_bin_centers.shape[0]:
break
z_mrate0 = [self.max_redshift,
0]
for i_z, mrate1 in enumerate(dz_mrates_arr):
if i_z == self.ip_redshift_arr.shape[0]:
break
z_mrate1 = [self.ip_redshift_arr[i_z],
mrate1]
per_age_redshift_fits[i_age, i_z] = self._get_linear_fit(z_mrate0, z_mrate1)
z_mrate0 = z_mrate1
z_mrate1 = [self.min_redshift,
0]
per_age_redshift_fits[i_age, i_z+1] = self._get_linear_fit(z_mrate0, z_mrate1)
return per_age_redshift_fits
def _set_dage_mrates_arr(self):
"Get the volumetric merger rate, integrated over z_zams and [Fe/H], in yr-1 Gpc-3."
self._per_age_redshift_fits = self._get_z_fits()
dage_mrates_arr = np.zeros((self._per_age_redshift_fits.shape[0]), np.float32)
for i_age, per_redshift_fits in enumerate(self._per_age_redshift_fits):
per_age_mrate = 0
for i_z, fit in enumerate(per_redshift_fits):
x1, x0, slope, intercept = fit
fit = (x0, x1, slope, intercept)
if i_z == 0:
boundary_fit, x1 = self._get_boundary_fit_limit(fit, boundary='upper', variable='redshift')
elif i_z == per_redshift_fits.shape[0] - 1:
boundary_fit, x0 = self._get_boundary_fit_limit(fit, boundary='lower', variable='redshift')
else:
boundary_fit = np.zeros(4)
mrate = self._get_linear_fit_area((x0, x1, slope, intercept))
boundary_mrate = self._get_linear_fit_area(boundary_fit)
per_age_mrate += mrate + boundary_mrate
dage_mrates_arr[i_age] = per_age_mrate
self.mrate_arr = dage_mrates_arr
[docs]
def delete_sample_df_from_memory(self):
self.sample_df = None
gc.collect()